xfs: use iomap_dio_rw
Straight switch over to using iomap for direct I/O - we already have the non-COW dio path in write_begin for DAX and files with extent size hints, so nothing to add there. The COW path is ported over from the old get_blocks version and a bit of a mess, but I have some work in progress to make it look more like the buffered I/O COW path. This gets rid of xfs_get_blocks_direct and the last caller of xfs_get_blocks with the create flag set, so all that code can be removed. Last but not least I've removed a comment in xfs_filemap_fault that refers to xfs_get_blocks entirely instead of updating it - while the reference is correct, the whole DAX fault path looks different than the non-DAX one, so it seems rather pointless. Signed-off-by: Christoph Hellwig <hch@lst.de> Tested-by: Jens Axboe <axboe@fb.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
parent
ff6a9292e6
commit
acdda3aae1
|
@ -37,11 +37,6 @@
|
||||||
#include <linux/pagevec.h>
|
#include <linux/pagevec.h>
|
||||||
#include <linux/writeback.h>
|
#include <linux/writeback.h>
|
||||||
|
|
||||||
/* flags for direct write completions */
|
|
||||||
#define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
|
|
||||||
#define XFS_DIO_FLAG_APPEND (1 << 1)
|
|
||||||
#define XFS_DIO_FLAG_COW (1 << 2)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* structure owned by writepages passed to individual writepage calls
|
* structure owned by writepages passed to individual writepage calls
|
||||||
*/
|
*/
|
||||||
|
@ -1175,45 +1170,6 @@ xfs_vm_releasepage(
|
||||||
return try_to_free_buffers(page);
|
return try_to_free_buffers(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* When we map a DIO buffer, we may need to pass flags to
|
|
||||||
* xfs_end_io_direct_write to tell it what kind of write IO we are doing.
|
|
||||||
*
|
|
||||||
* Note that for DIO, an IO to the highest supported file block offset (i.e.
|
|
||||||
* 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
|
|
||||||
* bit variable. Hence if we see this overflow, we have to assume that the IO is
|
|
||||||
* extending the file size. We won't know for sure until IO completion is run
|
|
||||||
* and the actual max write offset is communicated to the IO completion
|
|
||||||
* routine.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
xfs_map_direct(
|
|
||||||
struct inode *inode,
|
|
||||||
struct buffer_head *bh_result,
|
|
||||||
struct xfs_bmbt_irec *imap,
|
|
||||||
xfs_off_t offset,
|
|
||||||
bool is_cow)
|
|
||||||
{
|
|
||||||
uintptr_t *flags = (uintptr_t *)&bh_result->b_private;
|
|
||||||
xfs_off_t size = bh_result->b_size;
|
|
||||||
|
|
||||||
trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
|
|
||||||
ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW :
|
|
||||||
XFS_IO_OVERWRITE, imap);
|
|
||||||
|
|
||||||
if (ISUNWRITTEN(imap)) {
|
|
||||||
*flags |= XFS_DIO_FLAG_UNWRITTEN;
|
|
||||||
set_buffer_defer_completion(bh_result);
|
|
||||||
} else if (is_cow) {
|
|
||||||
*flags |= XFS_DIO_FLAG_COW;
|
|
||||||
set_buffer_defer_completion(bh_result);
|
|
||||||
}
|
|
||||||
if (offset + size > i_size_read(inode) || offset + size < 0) {
|
|
||||||
*flags |= XFS_DIO_FLAG_APPEND;
|
|
||||||
set_buffer_defer_completion(bh_result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this is O_DIRECT or the mpage code calling tell them how large the mapping
|
* If this is O_DIRECT or the mpage code calling tell them how large the mapping
|
||||||
* is, so that we can avoid repeated get_blocks calls.
|
* is, so that we can avoid repeated get_blocks calls.
|
||||||
|
@ -1254,51 +1210,12 @@ xfs_map_trim_size(
|
||||||
bh_result->b_size = mapping_size;
|
bh_result->b_size = mapping_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Bounce unaligned directio writes to the page cache. */
|
|
||||||
static int
|
static int
|
||||||
xfs_bounce_unaligned_dio_write(
|
xfs_get_blocks(
|
||||||
struct xfs_inode *ip,
|
|
||||||
xfs_fileoff_t offset_fsb,
|
|
||||||
struct xfs_bmbt_irec *imap)
|
|
||||||
{
|
|
||||||
struct xfs_bmbt_irec irec;
|
|
||||||
xfs_fileoff_t delta;
|
|
||||||
bool shared;
|
|
||||||
bool x;
|
|
||||||
int error;
|
|
||||||
|
|
||||||
irec = *imap;
|
|
||||||
if (offset_fsb > irec.br_startoff) {
|
|
||||||
delta = offset_fsb - irec.br_startoff;
|
|
||||||
irec.br_blockcount -= delta;
|
|
||||||
irec.br_startblock += delta;
|
|
||||||
irec.br_startoff = offset_fsb;
|
|
||||||
}
|
|
||||||
error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x);
|
|
||||||
if (error)
|
|
||||||
return error;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We're here because we're trying to do a directio write to a
|
|
||||||
* region that isn't aligned to a filesystem block. If any part
|
|
||||||
* of the extent is shared, fall back to buffered mode to handle
|
|
||||||
* the RMW. This is done by returning -EREMCHG ("remote addr
|
|
||||||
* changed"), which is caught further up the call stack.
|
|
||||||
*/
|
|
||||||
if (shared) {
|
|
||||||
trace_xfs_reflink_bounce_dio_write(ip, imap);
|
|
||||||
return -EREMCHG;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
STATIC int
|
|
||||||
__xfs_get_blocks(
|
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
sector_t iblock,
|
sector_t iblock,
|
||||||
struct buffer_head *bh_result,
|
struct buffer_head *bh_result,
|
||||||
int create,
|
int create)
|
||||||
bool direct)
|
|
||||||
{
|
{
|
||||||
struct xfs_inode *ip = XFS_I(inode);
|
struct xfs_inode *ip = XFS_I(inode);
|
||||||
struct xfs_mount *mp = ip->i_mount;
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
@ -1309,10 +1226,8 @@ __xfs_get_blocks(
|
||||||
int nimaps = 1;
|
int nimaps = 1;
|
||||||
xfs_off_t offset;
|
xfs_off_t offset;
|
||||||
ssize_t size;
|
ssize_t size;
|
||||||
int new = 0;
|
|
||||||
bool is_cow = false;
|
|
||||||
|
|
||||||
BUG_ON(create && !direct);
|
BUG_ON(create);
|
||||||
|
|
||||||
if (XFS_FORCED_SHUTDOWN(mp))
|
if (XFS_FORCED_SHUTDOWN(mp))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
@ -1321,7 +1236,7 @@ __xfs_get_blocks(
|
||||||
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
|
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
|
||||||
size = bh_result->b_size;
|
size = bh_result->b_size;
|
||||||
|
|
||||||
if (!create && offset >= i_size_read(inode))
|
if (offset >= i_size_read(inode))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1336,73 +1251,12 @@ __xfs_get_blocks(
|
||||||
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
|
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
|
||||||
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||||
|
|
||||||
if (create && direct && xfs_is_reflink_inode(ip)) {
|
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
|
||||||
is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
|
&imap, &nimaps, XFS_BMAPI_ENTIRE);
|
||||||
ASSERT(!is_cow || !isnullstartblock(imap.br_startblock));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!is_cow) {
|
|
||||||
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
|
|
||||||
&imap, &nimaps, XFS_BMAPI_ENTIRE);
|
|
||||||
/*
|
|
||||||
* Truncate an overwrite extent if there's a pending CoW
|
|
||||||
* reservation before the end of this extent. This
|
|
||||||
* forces us to come back to get_blocks to take care of
|
|
||||||
* the CoW.
|
|
||||||
*/
|
|
||||||
if (create && direct && nimaps &&
|
|
||||||
imap.br_startblock != HOLESTARTBLOCK &&
|
|
||||||
imap.br_startblock != DELAYSTARTBLOCK &&
|
|
||||||
!ISUNWRITTEN(&imap))
|
|
||||||
xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb,
|
|
||||||
&imap);
|
|
||||||
}
|
|
||||||
if (error)
|
if (error)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
/*
|
if (nimaps) {
|
||||||
* The only time we can ever safely find delalloc blocks on direct I/O
|
|
||||||
* is a dio write to post-eof speculative preallocation. All other
|
|
||||||
* scenarios are indicative of a problem or misuse (such as mixing
|
|
||||||
* direct and mapped I/O).
|
|
||||||
*
|
|
||||||
* The file may be unmapped by the time we get here so we cannot
|
|
||||||
* reliably fail the I/O based on mapping. Instead, fail the I/O if this
|
|
||||||
* is a read or a write within eof. Otherwise, carry on but warn as a
|
|
||||||
* precuation if the file happens to be mapped.
|
|
||||||
*/
|
|
||||||
if (direct && imap.br_startblock == DELAYSTARTBLOCK) {
|
|
||||||
if (!create || offset < i_size_read(VFS_I(ip))) {
|
|
||||||
WARN_ON_ONCE(1);
|
|
||||||
error = -EIO;
|
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* for DAX, we convert unwritten extents directly */
|
|
||||||
if (create &&
|
|
||||||
(!nimaps ||
|
|
||||||
(imap.br_startblock == HOLESTARTBLOCK ||
|
|
||||||
imap.br_startblock == DELAYSTARTBLOCK) ||
|
|
||||||
(IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
|
|
||||||
/*
|
|
||||||
* xfs_iomap_write_direct() expects the shared lock. It
|
|
||||||
* is unlocked on return.
|
|
||||||
*/
|
|
||||||
if (lockmode == XFS_ILOCK_EXCL)
|
|
||||||
xfs_ilock_demote(ip, lockmode);
|
|
||||||
|
|
||||||
error = xfs_iomap_write_direct(ip, offset, size,
|
|
||||||
&imap, nimaps);
|
|
||||||
if (error)
|
|
||||||
return error;
|
|
||||||
new = 1;
|
|
||||||
|
|
||||||
trace_xfs_get_blocks_alloc(ip, offset, size,
|
|
||||||
ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
|
|
||||||
: XFS_IO_DELALLOC, &imap);
|
|
||||||
} else if (nimaps) {
|
|
||||||
trace_xfs_get_blocks_found(ip, offset, size,
|
trace_xfs_get_blocks_found(ip, offset, size,
|
||||||
ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
|
ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
|
||||||
: XFS_IO_OVERWRITE, &imap);
|
: XFS_IO_OVERWRITE, &imap);
|
||||||
|
@ -1412,12 +1266,6 @@ __xfs_get_blocks(
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IS_DAX(inode) && create) {
|
|
||||||
ASSERT(!ISUNWRITTEN(&imap));
|
|
||||||
/* zeroing is not needed at a higher layer */
|
|
||||||
new = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* trim mapping down to size requested */
|
/* trim mapping down to size requested */
|
||||||
xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
|
xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
|
||||||
|
|
||||||
|
@ -1427,43 +1275,14 @@ __xfs_get_blocks(
|
||||||
*/
|
*/
|
||||||
if (imap.br_startblock != HOLESTARTBLOCK &&
|
if (imap.br_startblock != HOLESTARTBLOCK &&
|
||||||
imap.br_startblock != DELAYSTARTBLOCK &&
|
imap.br_startblock != DELAYSTARTBLOCK &&
|
||||||
(create || !ISUNWRITTEN(&imap))) {
|
!ISUNWRITTEN(&imap))
|
||||||
if (create && direct && !is_cow) {
|
|
||||||
error = xfs_bounce_unaligned_dio_write(ip, offset_fsb,
|
|
||||||
&imap);
|
|
||||||
if (error)
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
xfs_map_buffer(inode, bh_result, &imap, offset);
|
xfs_map_buffer(inode, bh_result, &imap, offset);
|
||||||
if (ISUNWRITTEN(&imap))
|
|
||||||
set_buffer_unwritten(bh_result);
|
|
||||||
/* direct IO needs special help */
|
|
||||||
if (create)
|
|
||||||
xfs_map_direct(inode, bh_result, &imap, offset, is_cow);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this is a realtime file, data may be on a different device.
|
* If this is a realtime file, data may be on a different device.
|
||||||
* to that pointed to from the buffer_head b_bdev currently.
|
* to that pointed to from the buffer_head b_bdev currently.
|
||||||
*/
|
*/
|
||||||
bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
|
bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
|
||||||
|
|
||||||
/*
|
|
||||||
* If we previously allocated a block out beyond eof and we are now
|
|
||||||
* coming back to use it then we will need to flag it as new even if it
|
|
||||||
* has a disk address.
|
|
||||||
*
|
|
||||||
* With sub-block writes into unwritten extents we also need to mark
|
|
||||||
* the buffer as new so that the unwritten parts of the buffer gets
|
|
||||||
* correctly zeroed.
|
|
||||||
*/
|
|
||||||
if (create &&
|
|
||||||
((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
|
|
||||||
(offset >= i_size_read(inode)) ||
|
|
||||||
(new || ISUNWRITTEN(&imap))))
|
|
||||||
set_buffer_new(bh_result);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
|
@ -1471,100 +1290,6 @@ out_unlock:
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
xfs_get_blocks(
|
|
||||||
struct inode *inode,
|
|
||||||
sector_t iblock,
|
|
||||||
struct buffer_head *bh_result,
|
|
||||||
int create)
|
|
||||||
{
|
|
||||||
return __xfs_get_blocks(inode, iblock, bh_result, create, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
xfs_get_blocks_direct(
|
|
||||||
struct inode *inode,
|
|
||||||
sector_t iblock,
|
|
||||||
struct buffer_head *bh_result,
|
|
||||||
int create)
|
|
||||||
{
|
|
||||||
return __xfs_get_blocks(inode, iblock, bh_result, create, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Complete a direct I/O write request.
|
|
||||||
*
|
|
||||||
* xfs_map_direct passes us some flags in the private data to tell us what to
|
|
||||||
* do. If no flags are set, then the write IO is an overwrite wholly within
|
|
||||||
* the existing allocated file size and so there is nothing for us to do.
|
|
||||||
*
|
|
||||||
* Note that in this case the completion can be called in interrupt context,
|
|
||||||
* whereas if we have flags set we will always be called in task context
|
|
||||||
* (i.e. from a workqueue).
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
xfs_end_io_direct_write(
|
|
||||||
struct kiocb *iocb,
|
|
||||||
loff_t offset,
|
|
||||||
ssize_t size,
|
|
||||||
void *private)
|
|
||||||
{
|
|
||||||
struct inode *inode = file_inode(iocb->ki_filp);
|
|
||||||
struct xfs_inode *ip = XFS_I(inode);
|
|
||||||
uintptr_t flags = (uintptr_t)private;
|
|
||||||
int error = 0;
|
|
||||||
|
|
||||||
trace_xfs_end_io_direct_write(ip, offset, size);
|
|
||||||
|
|
||||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
|
||||||
return -EIO;
|
|
||||||
|
|
||||||
if (size <= 0)
|
|
||||||
return size;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The flags tell us whether we are doing unwritten extent conversions
|
|
||||||
* or an append transaction that updates the on-disk file size. These
|
|
||||||
* cases are the only cases where we should *potentially* be needing
|
|
||||||
* to update the VFS inode size.
|
|
||||||
*/
|
|
||||||
if (flags == 0) {
|
|
||||||
ASSERT(offset + size <= i_size_read(inode));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to update the in-core inode size here so that we don't end up
|
|
||||||
* with the on-disk inode size being outside the in-core inode size. We
|
|
||||||
* have no other method of updating EOF for AIO, so always do it here
|
|
||||||
* if necessary.
|
|
||||||
*
|
|
||||||
* We need to lock the test/set EOF update as we can be racing with
|
|
||||||
* other IO completions here to update the EOF. Failing to serialise
|
|
||||||
* here can result in EOF moving backwards and Bad Things Happen when
|
|
||||||
* that occurs.
|
|
||||||
*/
|
|
||||||
spin_lock(&ip->i_flags_lock);
|
|
||||||
if (offset + size > i_size_read(inode))
|
|
||||||
i_size_write(inode, offset + size);
|
|
||||||
spin_unlock(&ip->i_flags_lock);
|
|
||||||
|
|
||||||
if (flags & XFS_DIO_FLAG_COW)
|
|
||||||
error = xfs_reflink_end_cow(ip, offset, size);
|
|
||||||
if (flags & XFS_DIO_FLAG_UNWRITTEN) {
|
|
||||||
trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
|
|
||||||
|
|
||||||
error = xfs_iomap_write_unwritten(ip, offset, size);
|
|
||||||
}
|
|
||||||
if (flags & XFS_DIO_FLAG_APPEND) {
|
|
||||||
trace_xfs_end_io_direct_write_append(ip, offset, size);
|
|
||||||
|
|
||||||
error = xfs_setfilesize(ip, offset, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
STATIC ssize_t
|
STATIC ssize_t
|
||||||
xfs_vm_direct_IO(
|
xfs_vm_direct_IO(
|
||||||
struct kiocb *iocb,
|
struct kiocb *iocb,
|
||||||
|
|
|
@ -55,12 +55,6 @@ struct xfs_ioend {
|
||||||
|
|
||||||
extern const struct address_space_operations xfs_address_space_operations;
|
extern const struct address_space_operations xfs_address_space_operations;
|
||||||
|
|
||||||
int xfs_get_blocks(struct inode *inode, sector_t offset,
|
|
||||||
struct buffer_head *map_bh, int create);
|
|
||||||
int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
|
|
||||||
struct buffer_head *map_bh, int create);
|
|
||||||
int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
|
|
||||||
ssize_t size, void *private);
|
|
||||||
int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
|
int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
|
||||||
|
|
||||||
extern void xfs_count_page_state(struct page *, int *, int *);
|
extern void xfs_count_page_state(struct page *, int *, int *);
|
||||||
|
|
|
@ -210,62 +210,21 @@ xfs_file_dio_aio_read(
|
||||||
struct kiocb *iocb,
|
struct kiocb *iocb,
|
||||||
struct iov_iter *to)
|
struct iov_iter *to)
|
||||||
{
|
{
|
||||||
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
|
||||||
struct inode *inode = mapping->host;
|
|
||||||
struct xfs_inode *ip = XFS_I(inode);
|
|
||||||
loff_t isize = i_size_read(inode);
|
|
||||||
size_t count = iov_iter_count(to);
|
size_t count = iov_iter_count(to);
|
||||||
loff_t end = iocb->ki_pos + count - 1;
|
ssize_t ret;
|
||||||
struct iov_iter data;
|
|
||||||
struct xfs_buftarg *target;
|
|
||||||
ssize_t ret = 0;
|
|
||||||
|
|
||||||
trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
|
trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
|
||||||
|
|
||||||
if (!count)
|
if (!count)
|
||||||
return 0; /* skip atime */
|
return 0; /* skip atime */
|
||||||
|
|
||||||
if (XFS_IS_REALTIME_INODE(ip))
|
|
||||||
target = ip->i_mount->m_rtdev_targp;
|
|
||||||
else
|
|
||||||
target = ip->i_mount->m_ddev_targp;
|
|
||||||
|
|
||||||
/* DIO must be aligned to device logical sector size */
|
|
||||||
if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
|
|
||||||
if (iocb->ki_pos == isize)
|
|
||||||
return 0;
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
file_accessed(iocb->ki_filp);
|
file_accessed(iocb->ki_filp);
|
||||||
|
|
||||||
xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
||||||
if (mapping->nrpages) {
|
ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
|
||||||
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
|
|
||||||
if (ret)
|
|
||||||
goto out_unlock;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Invalidate whole pages. This can return an error if we fail
|
|
||||||
* to invalidate a page, but this should never happen on XFS.
|
|
||||||
* Warn if it does fail.
|
|
||||||
*/
|
|
||||||
ret = invalidate_inode_pages2_range(mapping,
|
|
||||||
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
|
|
||||||
WARN_ON_ONCE(ret);
|
|
||||||
ret = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
data = *to;
|
|
||||||
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
|
|
||||||
xfs_get_blocks_direct, NULL, NULL, 0);
|
|
||||||
if (ret >= 0) {
|
|
||||||
iocb->ki_pos += ret;
|
|
||||||
iov_iter_advance(to, ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
out_unlock:
|
|
||||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -465,6 +424,58 @@ restart:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
xfs_dio_write_end_io(
|
||||||
|
struct kiocb *iocb,
|
||||||
|
ssize_t size,
|
||||||
|
unsigned flags)
|
||||||
|
{
|
||||||
|
struct inode *inode = file_inode(iocb->ki_filp);
|
||||||
|
struct xfs_inode *ip = XFS_I(inode);
|
||||||
|
loff_t offset = iocb->ki_pos;
|
||||||
|
bool update_size = false;
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
|
trace_xfs_end_io_direct_write(ip, offset, size);
|
||||||
|
|
||||||
|
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
if (size <= 0)
|
||||||
|
return size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to update the in-core inode size here so that we don't end up
|
||||||
|
* with the on-disk inode size being outside the in-core inode size. We
|
||||||
|
* have no other method of updating EOF for AIO, so always do it here
|
||||||
|
* if necessary.
|
||||||
|
*
|
||||||
|
* We need to lock the test/set EOF update as we can be racing with
|
||||||
|
* other IO completions here to update the EOF. Failing to serialise
|
||||||
|
* here can result in EOF moving backwards and Bad Things Happen when
|
||||||
|
* that occurs.
|
||||||
|
*/
|
||||||
|
spin_lock(&ip->i_flags_lock);
|
||||||
|
if (offset + size > i_size_read(inode)) {
|
||||||
|
i_size_write(inode, offset + size);
|
||||||
|
update_size = true;
|
||||||
|
}
|
||||||
|
spin_unlock(&ip->i_flags_lock);
|
||||||
|
|
||||||
|
if (flags & IOMAP_DIO_COW) {
|
||||||
|
error = xfs_reflink_end_cow(ip, offset, size);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & IOMAP_DIO_UNWRITTEN)
|
||||||
|
error = xfs_iomap_write_unwritten(ip, offset, size);
|
||||||
|
else if (update_size)
|
||||||
|
error = xfs_setfilesize(ip, offset, size);
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* xfs_file_dio_aio_write - handle direct IO writes
|
* xfs_file_dio_aio_write - handle direct IO writes
|
||||||
*
|
*
|
||||||
|
@ -504,9 +515,7 @@ xfs_file_dio_aio_write(
|
||||||
int unaligned_io = 0;
|
int unaligned_io = 0;
|
||||||
int iolock;
|
int iolock;
|
||||||
size_t count = iov_iter_count(from);
|
size_t count = iov_iter_count(from);
|
||||||
loff_t end;
|
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
|
||||||
struct iov_iter data;
|
|
||||||
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
|
|
||||||
mp->m_rtdev_targp : mp->m_ddev_targp;
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
||||||
|
|
||||||
/* DIO must be aligned to device logical sector size */
|
/* DIO must be aligned to device logical sector size */
|
||||||
|
@ -534,23 +543,6 @@ xfs_file_dio_aio_write(
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
count = iov_iter_count(from);
|
count = iov_iter_count(from);
|
||||||
end = iocb->ki_pos + count - 1;
|
|
||||||
|
|
||||||
if (mapping->nrpages) {
|
|
||||||
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Invalidate whole pages. This can return an error if we fail
|
|
||||||
* to invalidate a page, but this should never happen on XFS.
|
|
||||||
* Warn if it does fail.
|
|
||||||
*/
|
|
||||||
ret = invalidate_inode_pages2_range(mapping,
|
|
||||||
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
|
|
||||||
WARN_ON_ONCE(ret);
|
|
||||||
ret = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we are doing unaligned IO, wait for all other IO to drain,
|
* If we are doing unaligned IO, wait for all other IO to drain,
|
||||||
|
@ -573,22 +565,7 @@ xfs_file_dio_aio_write(
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
data = *from;
|
ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
|
||||||
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
|
|
||||||
xfs_get_blocks_direct, xfs_end_io_direct_write,
|
|
||||||
NULL, DIO_ASYNC_EXTEND);
|
|
||||||
|
|
||||||
/* see generic_file_direct_write() for why this is necessary */
|
|
||||||
if (mapping->nrpages) {
|
|
||||||
invalidate_inode_pages2_range(mapping,
|
|
||||||
iocb->ki_pos >> PAGE_SHIFT,
|
|
||||||
end >> PAGE_SHIFT);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ret > 0) {
|
|
||||||
iocb->ki_pos += ret;
|
|
||||||
iov_iter_advance(from, ret);
|
|
||||||
}
|
|
||||||
out:
|
out:
|
||||||
xfs_iunlock(ip, iolock);
|
xfs_iunlock(ip, iolock);
|
||||||
|
|
||||||
|
@ -1468,15 +1445,9 @@ xfs_filemap_fault(
|
||||||
return xfs_filemap_page_mkwrite(vma, vmf);
|
return xfs_filemap_page_mkwrite(vma, vmf);
|
||||||
|
|
||||||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||||
if (IS_DAX(inode)) {
|
if (IS_DAX(inode))
|
||||||
/*
|
|
||||||
* we do not want to trigger unwritten extent conversion on read
|
|
||||||
* faults - that is unnecessary overhead and would also require
|
|
||||||
* changes to xfs_get_blocks_direct() to map unwritten extent
|
|
||||||
* ioend for conversion on read-only mappings.
|
|
||||||
*/
|
|
||||||
ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
|
ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
|
||||||
} else
|
else
|
||||||
ret = filemap_fault(vma, vmf);
|
ret = filemap_fault(vma, vmf);
|
||||||
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||||
|
|
||||||
|
|
|
@ -950,6 +950,19 @@ static inline bool imap_needs_alloc(struct inode *inode,
|
||||||
(IS_DAX(inode) && ISUNWRITTEN(imap));
|
(IS_DAX(inode) && ISUNWRITTEN(imap));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* COW writes will allocate delalloc space, so we need to make sure
|
||||||
|
* to take the lock exclusively here.
|
||||||
|
*/
|
||||||
|
if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO)))
|
||||||
|
return true;
|
||||||
|
if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
xfs_file_iomap_begin(
|
xfs_file_iomap_begin(
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
|
@ -969,18 +982,14 @@ xfs_file_iomap_begin(
|
||||||
if (XFS_FORCED_SHUTDOWN(mp))
|
if (XFS_FORCED_SHUTDOWN(mp))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
|
if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
|
||||||
!xfs_get_extsz_hint(ip)) {
|
!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
|
||||||
/* Reserve delalloc blocks for regular writeback. */
|
/* Reserve delalloc blocks for regular writeback. */
|
||||||
return xfs_file_iomap_begin_delay(inode, offset, length, flags,
|
return xfs_file_iomap_begin_delay(inode, offset, length, flags,
|
||||||
iomap);
|
iomap);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
if (need_excl_ilock(ip, flags)) {
|
||||||
* COW writes will allocate delalloc space, so we need to make sure
|
|
||||||
* to take the lock exclusively here.
|
|
||||||
*/
|
|
||||||
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
|
|
||||||
lockmode = XFS_ILOCK_EXCL;
|
lockmode = XFS_ILOCK_EXCL;
|
||||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||||
} else {
|
} else {
|
||||||
|
@ -993,17 +1002,41 @@ xfs_file_iomap_begin(
|
||||||
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||||
end_fsb = XFS_B_TO_FSB(mp, offset + length);
|
end_fsb = XFS_B_TO_FSB(mp, offset + length);
|
||||||
|
|
||||||
|
if (xfs_is_reflink_inode(ip) &&
|
||||||
|
(flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
|
||||||
|
shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
|
||||||
|
if (shared) {
|
||||||
|
xfs_iunlock(ip, lockmode);
|
||||||
|
goto alloc_done;
|
||||||
|
}
|
||||||
|
ASSERT(!isnullstartblock(imap.br_startblock));
|
||||||
|
}
|
||||||
|
|
||||||
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
|
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
|
||||||
&nimaps, 0);
|
&nimaps, 0);
|
||||||
if (error)
|
if (error)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
if (flags & IOMAP_REPORT) {
|
if ((flags & IOMAP_REPORT) ||
|
||||||
|
(xfs_is_reflink_inode(ip) &&
|
||||||
|
(flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
|
||||||
/* Trim the mapping to the nearest shared extent boundary. */
|
/* Trim the mapping to the nearest shared extent boundary. */
|
||||||
error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
|
error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
|
||||||
&trimmed);
|
&trimmed);
|
||||||
if (error)
|
if (error)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're here because we're trying to do a directio write to a
|
||||||
|
* region that isn't aligned to a filesystem block. If the
|
||||||
|
* extent is shared, fall back to buffered mode to handle the
|
||||||
|
* RMW.
|
||||||
|
*/
|
||||||
|
if (!(flags & IOMAP_REPORT) && shared) {
|
||||||
|
trace_xfs_reflink_bounce_dio_write(ip, &imap);
|
||||||
|
error = -EREMCHG;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
|
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
|
||||||
|
@ -1038,6 +1071,7 @@ xfs_file_iomap_begin(
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
|
alloc_done:
|
||||||
iomap->flags = IOMAP_F_NEW;
|
iomap->flags = IOMAP_F_NEW;
|
||||||
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
|
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue