vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode

Introduce new function for generic inode syncing (vfs_fsync_range) and use
it from fsync() path. Introduce also new helper for syncing after a sync
write (generic_write_sync) using the generic function.

Use these new helpers for syncing from generic VFS functions. This makes
O_SYNC writes to block devices acquire i_mutex for syncing. If we really
care about this, we can make block_fsync() drop the i_mutex and reacquire
it before it returns.

CC: Evgeniy Polyakov <zbr@ioremap.net>
CC: ocfs2-devel@oss.oracle.com
CC: Joel Becker <joel.becker@oracle.com>
CC: Felix Blyakher <felixb@sgi.com>
CC: xfs@oss.sgi.com
CC: Anton Altaparmakov <aia21@cantab.net>
CC: linux-ntfs-dev@lists.sourceforge.net
CC: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
CC: linux-ext4@vger.kernel.org
CC: tytso@mit.edu
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
This commit is contained in:
Jan Kara 2009-08-17 19:52:36 +02:00
parent eef9938067
commit 148f948ba8
4 changed files with 61 additions and 30 deletions

View File

@ -976,25 +976,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
if (ret > 0) { if (ret > 0) {
unsigned long nr_pages; unsigned long nr_pages;
*ppos += ret;
nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
/*
* If file or inode is SYNC and we actually wrote some data,
* sync it.
*/
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err; int err;
mutex_lock(&inode->i_mutex); nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
err = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
mutex_unlock(&inode->i_mutex);
err = generic_write_sync(out, *ppos, ret);
if (err) if (err)
ret = err; ret = err;
} else
*ppos += ret;
balance_dirty_pages_ratelimited_nr(mapping, nr_pages); balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
} }

View File

@ -178,19 +178,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
} }
/** /**
* vfs_fsync - perform a fsync or fdatasync on a file * vfs_fsync_range - helper to sync a range of data & metadata to disk
* @file: file to sync * @file: file to sync
* @dentry: dentry of @file * @dentry: dentry of @file
* @data: only perform a fdatasync operation * @start: offset in bytes of the beginning of data range to sync
* @end: offset in bytes of the end of data range (inclusive)
* @datasync: perform only datasync
* *
* Write back data and metadata for @file to disk. If @datasync is * Write back data in range @start..@end and metadata for @file to disk. If
* set only metadata needed to access modified file data is written. * @datasync is set only metadata needed to access modified file data is
* written.
* *
* In case this function is called from nfsd @file may be %NULL and * In case this function is called from nfsd @file may be %NULL and
* only @dentry is set. This can only happen when the filesystem * only @dentry is set. This can only happen when the filesystem
* implements the export_operations API. * implements the export_operations API.
*/ */
int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
loff_t end, int datasync)
{ {
const struct file_operations *fop; const struct file_operations *fop;
struct address_space *mapping; struct address_space *mapping;
@ -214,7 +218,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
goto out; goto out;
} }
ret = filemap_fdatawrite(mapping); ret = filemap_fdatawrite_range(mapping, start, end);
/* /*
* We need to protect against concurrent writers, which could cause * We need to protect against concurrent writers, which could cause
@ -225,12 +229,32 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
if (!ret) if (!ret)
ret = err; ret = err;
mutex_unlock(&mapping->host->i_mutex); mutex_unlock(&mapping->host->i_mutex);
err = filemap_fdatawait(mapping);
err = filemap_fdatawait_range(mapping, start, end);
if (!ret) if (!ret)
ret = err; ret = err;
out: out:
return ret; return ret;
} }
EXPORT_SYMBOL(vfs_fsync_range);
/**
* vfs_fsync - perform a fsync or fdatasync on a file
* @file: file to sync
* @dentry: dentry of @file
* @datasync: only perform a fdatasync operation
*
* Write back data and metadata for @file to disk. If @datasync is
* set only metadata needed to access modified file data is written.
*
* In case this function is called from nfsd @file may be %NULL and
* only @dentry is set. This can only happen when the filesystem
* implements the export_operations API.
*/
int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
{
return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
}
EXPORT_SYMBOL(vfs_fsync); EXPORT_SYMBOL(vfs_fsync);
static int do_fsync(unsigned int fd, int datasync) static int do_fsync(unsigned int fd, int datasync)
@ -256,6 +280,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
return do_fsync(fd, 1); return do_fsync(fd, 1);
} }
/**
* generic_write_sync - perform syncing after a write if file / inode is sync
* @file: file to which the write happened
* @pos: offset where the write started
* @count: length of the write
*
* This is just a simple wrapper about our general syncing function.
*/
int generic_write_sync(struct file *file, loff_t pos, loff_t count)
{
if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host))
return 0;
return vfs_fsync_range(file, file->f_path.dentry, pos,
pos + count - 1, 1);
}
EXPORT_SYMBOL(generic_write_sync);
/* /*
* sys_sync_file_range() permits finely controlled syncing over a segment of * sys_sync_file_range() permits finely controlled syncing over a segment of
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is

View File

@ -2098,7 +2098,10 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
extern int filemap_fdatawrite_range(struct address_space *mapping, extern int filemap_fdatawrite_range(struct address_space *mapping,
loff_t start, loff_t end); loff_t start, loff_t end);
extern int vfs_fsync_range(struct file *file, struct dentry *dentry,
loff_t start, loff_t end, int datasync);
extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
extern void sync_supers(void); extern void sync_supers(void);
extern void emergency_sync(void); extern void emergency_sync(void);
extern void emergency_remount(void); extern void emergency_remount(void);

View File

@ -39,11 +39,10 @@
/* /*
* FIXME: remove all knowledge of the buffer layer from the core VM * FIXME: remove all knowledge of the buffer layer from the core VM
*/ */
#include <linux/buffer_head.h> /* for generic_osync_inode */ #include <linux/buffer_head.h> /* for try_to_free_buffers */
#include <asm/mman.h> #include <asm/mman.h>
/* /*
* Shared mappings implemented 30.11.1994. It's not fully working yet, * Shared mappings implemented 30.11.1994. It's not fully working yet,
* though. * though.
@ -2477,8 +2476,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos) unsigned long nr_segs, loff_t pos)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host;
struct inode *inode = mapping->host;
ssize_t ret; ssize_t ret;
BUG_ON(iocb->ki_pos != pos); BUG_ON(iocb->ki_pos != pos);
@ -2487,11 +2485,10 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
if ((ret > 0 || ret == -EIOCBQUEUED) && if (ret > 0 || ret == -EIOCBQUEUED) {
((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
ssize_t err; ssize_t err;
err = sync_page_range(inode, mapping, pos, ret); err = generic_write_sync(file, pos, ret);
if (err < 0 && ret > 0) if (err < 0 && ret > 0)
ret = err; ret = err;
} }