f2fs: support atomic writes
This patch introduces a very limited functionality for atomic write support. In order to support atomic write, this patch adds two ioctls: o F2FS_IOC_START_ATOMIC_WRITE o F2FS_IOC_COMMIT_ATOMIC_WRITE The database engine should be aware of the following sequence. 1. open -> ioctl(F2FS_IOC_START_ATOMIC_WRITE); 2. writes : all the written data will be treated as atomic pages. 3. commit -> ioctl(F2FS_IOC_COMMIT_ATOMIC_WRITE); : this flushes all the data blocks to the disk, which will be shown all or nothing by f2fs recovery procedure. 4. repeat to #2. The IO pattens should be: ,- START_ATOMIC_WRITE ,- COMMIT_ATOMIC_WRITE CP | D D D D D D | FSYNC | D D D D | FSYNC ... `- COMMIT_ATOMIC_WRITE Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
This commit is contained in:
parent
120c2cba1d
commit
88b88a6679
|
@ -1052,6 +1052,9 @@ static int f2fs_write_end(struct file *file,
|
||||||
|
|
||||||
trace_f2fs_write_end(inode, pos, len, copied);
|
trace_f2fs_write_end(inode, pos, len, copied);
|
||||||
|
|
||||||
|
if (f2fs_is_atomic_file(inode))
|
||||||
|
register_inmem_page(inode, page);
|
||||||
|
else
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
|
|
||||||
if (pos + copied > i_size_read(inode)) {
|
if (pos + copied > i_size_read(inode)) {
|
||||||
|
|
|
@ -195,6 +195,10 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
|
||||||
#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
|
#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
|
||||||
#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
|
#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
|
||||||
|
|
||||||
|
#define F2FS_IOCTL_MAGIC 0xf5
|
||||||
|
#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
|
||||||
|
#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
|
||||||
|
|
||||||
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
|
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
|
||||||
/*
|
/*
|
||||||
* ioctl commands in 32 bit emulation
|
* ioctl commands in 32 bit emulation
|
||||||
|
@ -263,6 +267,9 @@ struct f2fs_inode_info {
|
||||||
unsigned long long xattr_ver; /* cp version of xattr modification */
|
unsigned long long xattr_ver; /* cp version of xattr modification */
|
||||||
struct extent_info ext; /* in-memory extent cache entry */
|
struct extent_info ext; /* in-memory extent cache entry */
|
||||||
struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
|
struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
|
||||||
|
|
||||||
|
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
|
||||||
|
struct mutex inmem_lock; /* lock for inmemory pages */
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void get_extent_info(struct extent_info *ext,
|
static inline void get_extent_info(struct extent_info *ext,
|
||||||
|
@ -1051,7 +1058,8 @@ enum {
|
||||||
FI_INLINE_DATA, /* used for inline data*/
|
FI_INLINE_DATA, /* used for inline data*/
|
||||||
FI_APPEND_WRITE, /* inode has appended data */
|
FI_APPEND_WRITE, /* inode has appended data */
|
||||||
FI_UPDATE_WRITE, /* inode has in-place-update data */
|
FI_UPDATE_WRITE, /* inode has in-place-update data */
|
||||||
FI_NEED_IPU, /* used fo ipu for fdatasync */
|
FI_NEED_IPU, /* used for ipu per file */
|
||||||
|
FI_ATOMIC_FILE, /* indicate atomic file */
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
|
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
|
||||||
|
@ -1138,6 +1146,11 @@ static inline int f2fs_has_inline_data(struct inode *inode)
|
||||||
return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
|
return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool f2fs_is_atomic_file(struct inode *inode)
|
||||||
|
{
|
||||||
|
return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void *inline_data_addr(struct page *page)
|
static inline void *inline_data_addr(struct page *page)
|
||||||
{
|
{
|
||||||
struct f2fs_inode *ri = F2FS_INODE(page);
|
struct f2fs_inode *ri = F2FS_INODE(page);
|
||||||
|
@ -1275,6 +1288,8 @@ void destroy_node_manager_caches(void);
|
||||||
/*
|
/*
|
||||||
* segment.c
|
* segment.c
|
||||||
*/
|
*/
|
||||||
|
void register_inmem_page(struct inode *, struct page *);
|
||||||
|
void commit_inmem_pages(struct inode *, bool);
|
||||||
void f2fs_balance_fs(struct f2fs_sb_info *);
|
void f2fs_balance_fs(struct f2fs_sb_info *);
|
||||||
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
|
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
|
||||||
int f2fs_issue_flush(struct f2fs_sb_info *);
|
int f2fs_issue_flush(struct f2fs_sb_info *);
|
||||||
|
|
|
@ -862,6 +862,41 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int f2fs_ioc_start_atomic_write(struct file *filp)
|
||||||
|
{
|
||||||
|
struct inode *inode = file_inode(filp);
|
||||||
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
||||||
|
|
||||||
|
if (!inode_owner_or_capable(inode))
|
||||||
|
return -EACCES;
|
||||||
|
|
||||||
|
f2fs_balance_fs(sbi);
|
||||||
|
|
||||||
|
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
|
||||||
|
|
||||||
|
return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int f2fs_ioc_commit_atomic_write(struct file *filp)
|
||||||
|
{
|
||||||
|
struct inode *inode = file_inode(filp);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!inode_owner_or_capable(inode))
|
||||||
|
return -EACCES;
|
||||||
|
|
||||||
|
ret = mnt_want_write_file(filp);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (f2fs_is_atomic_file(inode))
|
||||||
|
commit_inmem_pages(inode, false);
|
||||||
|
|
||||||
|
ret = f2fs_sync_file(filp, 0, LONG_MAX, 0);
|
||||||
|
mnt_drop_write_file(filp);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
|
static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
|
||||||
{
|
{
|
||||||
struct inode *inode = file_inode(filp);
|
struct inode *inode = file_inode(filp);
|
||||||
|
@ -899,6 +934,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||||
return f2fs_ioc_getflags(filp, arg);
|
return f2fs_ioc_getflags(filp, arg);
|
||||||
case F2FS_IOC_SETFLAGS:
|
case F2FS_IOC_SETFLAGS:
|
||||||
return f2fs_ioc_setflags(filp, arg);
|
return f2fs_ioc_setflags(filp, arg);
|
||||||
|
case F2FS_IOC_START_ATOMIC_WRITE:
|
||||||
|
return f2fs_ioc_start_atomic_write(filp);
|
||||||
|
case F2FS_IOC_COMMIT_ATOMIC_WRITE:
|
||||||
|
return f2fs_ioc_commit_atomic_write(filp);
|
||||||
case FITRIM:
|
case FITRIM:
|
||||||
return f2fs_ioc_fitrim(filp, arg);
|
return f2fs_ioc_fitrim(filp, arg);
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -21,6 +21,9 @@ bool f2fs_may_inline(struct inode *inode)
|
||||||
if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
|
if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (f2fs_is_atomic_file(inode))
|
||||||
|
return false;
|
||||||
|
|
||||||
nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
|
nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
|
||||||
if (inode->i_blocks > nr_blocks)
|
if (inode->i_blocks > nr_blocks)
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -269,6 +269,10 @@ void f2fs_evict_inode(struct inode *inode)
|
||||||
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
||||||
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
|
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
|
||||||
|
|
||||||
|
/* some remained atomic pages should discarded */
|
||||||
|
if (f2fs_is_atomic_file(inode))
|
||||||
|
commit_inmem_pages(inode, true);
|
||||||
|
|
||||||
trace_f2fs_evict_inode(inode);
|
trace_f2fs_evict_inode(inode);
|
||||||
truncate_inode_pages_final(&inode->i_data);
|
truncate_inode_pages_final(&inode->i_data);
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
|
|
||||||
static struct kmem_cache *discard_entry_slab;
|
static struct kmem_cache *discard_entry_slab;
|
||||||
static struct kmem_cache *sit_entry_set_slab;
|
static struct kmem_cache *sit_entry_set_slab;
|
||||||
|
static struct kmem_cache *inmem_entry_slab;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
|
* __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
|
||||||
|
@ -173,6 +174,60 @@ found_middle:
|
||||||
return result + __reverse_ffz(tmp);
|
return result + __reverse_ffz(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void register_inmem_page(struct inode *inode, struct page *page)
|
||||||
|
{
|
||||||
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
||||||
|
struct inmem_pages *new;
|
||||||
|
|
||||||
|
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
|
||||||
|
|
||||||
|
/* add atomic page indices to the list */
|
||||||
|
new->page = page;
|
||||||
|
INIT_LIST_HEAD(&new->list);
|
||||||
|
|
||||||
|
/* increase reference count with clean state */
|
||||||
|
mutex_lock(&fi->inmem_lock);
|
||||||
|
get_page(page);
|
||||||
|
list_add_tail(&new->list, &fi->inmem_pages);
|
||||||
|
mutex_unlock(&fi->inmem_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void commit_inmem_pages(struct inode *inode, bool abort)
|
||||||
|
{
|
||||||
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
||||||
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
||||||
|
struct inmem_pages *cur, *tmp;
|
||||||
|
bool submit_bio = false;
|
||||||
|
struct f2fs_io_info fio = {
|
||||||
|
.type = DATA,
|
||||||
|
.rw = WRITE_SYNC,
|
||||||
|
};
|
||||||
|
|
||||||
|
f2fs_balance_fs(sbi);
|
||||||
|
f2fs_lock_op(sbi);
|
||||||
|
|
||||||
|
mutex_lock(&fi->inmem_lock);
|
||||||
|
list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
|
||||||
|
lock_page(cur->page);
|
||||||
|
if (!abort && cur->page->mapping == inode->i_mapping) {
|
||||||
|
f2fs_wait_on_page_writeback(cur->page, DATA);
|
||||||
|
if (clear_page_dirty_for_io(cur->page))
|
||||||
|
inode_dec_dirty_pages(inode);
|
||||||
|
do_write_data_page(cur->page, &fio);
|
||||||
|
submit_bio = true;
|
||||||
|
}
|
||||||
|
f2fs_put_page(cur->page, 1);
|
||||||
|
list_del(&cur->list);
|
||||||
|
kmem_cache_free(inmem_entry_slab, cur);
|
||||||
|
}
|
||||||
|
if (submit_bio)
|
||||||
|
f2fs_submit_merged_bio(sbi, DATA, WRITE);
|
||||||
|
mutex_unlock(&fi->inmem_lock);
|
||||||
|
|
||||||
|
filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX);
|
||||||
|
f2fs_unlock_op(sbi);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function balances dirty node and dentry pages.
|
* This function balances dirty node and dentry pages.
|
||||||
* In addition, it controls garbage collection.
|
* In addition, it controls garbage collection.
|
||||||
|
@ -2148,8 +2203,15 @@ int __init create_segment_manager_caches(void)
|
||||||
sizeof(struct nat_entry_set));
|
sizeof(struct nat_entry_set));
|
||||||
if (!sit_entry_set_slab)
|
if (!sit_entry_set_slab)
|
||||||
goto destory_discard_entry;
|
goto destory_discard_entry;
|
||||||
|
|
||||||
|
inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
|
||||||
|
sizeof(struct inmem_pages));
|
||||||
|
if (!inmem_entry_slab)
|
||||||
|
goto destroy_sit_entry_set;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
destroy_sit_entry_set:
|
||||||
|
kmem_cache_destroy(sit_entry_set_slab);
|
||||||
destory_discard_entry:
|
destory_discard_entry:
|
||||||
kmem_cache_destroy(discard_entry_slab);
|
kmem_cache_destroy(discard_entry_slab);
|
||||||
fail:
|
fail:
|
||||||
|
@ -2160,4 +2222,5 @@ void destroy_segment_manager_caches(void)
|
||||||
{
|
{
|
||||||
kmem_cache_destroy(sit_entry_set_slab);
|
kmem_cache_destroy(sit_entry_set_slab);
|
||||||
kmem_cache_destroy(discard_entry_slab);
|
kmem_cache_destroy(discard_entry_slab);
|
||||||
|
kmem_cache_destroy(inmem_entry_slab);
|
||||||
}
|
}
|
||||||
|
|
|
@ -175,6 +175,11 @@ struct segment_allocation {
|
||||||
void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
|
void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct inmem_pages {
|
||||||
|
struct list_head list;
|
||||||
|
struct page *page;
|
||||||
|
};
|
||||||
|
|
||||||
struct sit_info {
|
struct sit_info {
|
||||||
const struct segment_allocation *s_ops;
|
const struct segment_allocation *s_ops;
|
||||||
|
|
||||||
|
@ -504,7 +509,7 @@ static inline bool need_inplace_update(struct inode *inode)
|
||||||
unsigned int policy = SM_I(sbi)->ipu_policy;
|
unsigned int policy = SM_I(sbi)->ipu_policy;
|
||||||
|
|
||||||
/* IPU can be done only for the user data */
|
/* IPU can be done only for the user data */
|
||||||
if (S_ISDIR(inode->i_mode))
|
if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (policy & (0x1 << F2FS_IPU_FORCE))
|
if (policy & (0x1 << F2FS_IPU_FORCE))
|
||||||
|
|
|
@ -373,6 +373,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
|
||||||
fi->i_advise = 0;
|
fi->i_advise = 0;
|
||||||
rwlock_init(&fi->ext.ext_lock);
|
rwlock_init(&fi->ext.ext_lock);
|
||||||
init_rwsem(&fi->i_sem);
|
init_rwsem(&fi->i_sem);
|
||||||
|
INIT_LIST_HEAD(&fi->inmem_pages);
|
||||||
|
mutex_init(&fi->inmem_lock);
|
||||||
|
|
||||||
set_inode_flag(fi, FI_NEW_INODE);
|
set_inode_flag(fi, FI_NEW_INODE);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue