diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 11b7f4ebea7c..a7799c2fca28 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -51,6 +51,18 @@ Description: Controls the dirty page count condition for the in-place-update policies. +What: /sys/fs/f2fs//min_hot_blocks +Date: March 2017 +Contact: "Jaegeuk Kim" +Description: + Controls the dirty page count condition for redefining hot data. + +What: /sys/fs/f2fs//min_ssr_sections +Date: October 2017 +Contact: "Chao Yu" +Description: + Controls the fee section threshold to trigger SSR allocation. + What: /sys/fs/f2fs//max_small_discards Date: November 2013 Contact: "Jaegeuk Kim" @@ -102,6 +114,12 @@ Contact: "Jaegeuk Kim" Description: Controls the idle timing. +What: /sys/fs/f2fs//iostat_enable +Date: August 2017 +Contact: "Chao Yu" +Description: + Controls to enable/disable IO stat. + What: /sys/fs/f2fs//ra_nid_pages Date: October 2015 Contact: "Chao Yu" @@ -122,6 +140,12 @@ Contact: "Shuoran Liu" Description: Shows total written kbytes issued to disk. +What: /sys/fs/f2fs//feature +Date: July 2017 +Contact: "Jaegeuk Kim" +Description: + Shows all enabled features in current device. + What: /sys/fs/f2fs//inject_rate Date: May 2016 Contact: "Sheng Yong" @@ -138,7 +162,18 @@ What: /sys/fs/f2fs//reserved_blocks Date: June 2017 Contact: "Chao Yu" Description: - Controls current reserved blocks in system. + Controls target reserved blocks in system, the threshold + is soft, it could exceed current available user space. + +What: /sys/fs/f2fs//current_reserved_blocks +Date: October 2017 +Contact: "Yunlong Song" +Contact: "Chao Yu" +Description: + Shows current reserved blocks in system, it may be temporarily + smaller than target_reserved_blocks, but will gradually + increase to target_reserved_blocks when more free blocks are + freed by user later. What: /sys/fs/f2fs//gc_urgent Date: August 2017 diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 436b3a1464d9..2bb7c9fc5144 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -250,6 +250,9 @@ static int __f2fs_set_acl(struct inode *inode, int type, int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + return __f2fs_set_acl(inode, type, acl, NULL); } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0bb8e2c022d3..dd2e73e10857 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -29,7 +29,6 @@ struct kmem_cache *inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) { set_ckpt_flags(sbi, CP_ERROR_FLAG); - sbi->sb->s_flags |= MS_RDONLY; if (!end_io) f2fs_flush_merged_writes(sbi); } @@ -398,24 +397,23 @@ const struct address_space_operations f2fs_meta_aops = { #endif }; -static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type) { struct inode_management *im = &sbi->im[type]; struct ino_entry *e, *tmp; tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); -retry: + radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); spin_lock(&im->ino_lock); e = radix_tree_lookup(&im->ino_root, ino); if (!e) { e = tmp; - if (radix_tree_insert(&im->ino_root, ino, e)) { - spin_unlock(&im->ino_lock); - radix_tree_preload_end(); - goto retry; - } + if (unlikely(radix_tree_insert(&im->ino_root, ino, e))) + f2fs_bug_on(sbi, 1); + memset(e, 0, sizeof(struct ino_entry)); e->ino = ino; @@ -423,6 +421,10 @@ retry: if (type != ORPHAN_INO) im->ino_num++; } + + if (type == FLUSH_INO) + f2fs_set_bit(devidx, (char *)&e->dirty_device); + spin_unlock(&im->ino_lock); radix_tree_preload_end(); @@ -451,7 +453,7 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* add new dirty ino entry into list */ - __add_ino_entry(sbi, ino, type); + __add_ino_entry(sbi, ino, 0, type); } void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) @@ -477,7 +479,7 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all) struct ino_entry *e, *tmp; int i; - for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) { + for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) { struct inode_management *im = &sbi->im[i]; spin_lock(&im->ino_lock); @@ -491,6 +493,27 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all) } } +void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type) +{ + __add_ino_entry(sbi, ino, devidx, type); +} + +bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type) +{ + struct inode_management *im = &sbi->im[type]; + struct ino_entry *e; + bool is_dirty = false; + + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); + if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device)) + is_dirty = true; + spin_unlock(&im->ino_lock); + return is_dirty; +} + int acquire_orphan_inode(struct f2fs_sb_info *sbi) { struct inode_management *im = &sbi->im[ORPHAN_INO]; @@ -527,7 +550,7 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) void add_orphan_inode(struct inode *inode) { /* add new orphan ino entry into list */ - __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO); + __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO); update_inode_page(inode); } @@ -551,7 +574,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) return err; } - __add_ino_entry(sbi, ino, ORPHAN_INO); + __add_ino_entry(sbi, ino, 0, ORPHAN_INO); inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) { @@ -587,6 +610,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) block_t start_blk, orphan_blocks, i, j; unsigned int s_flags = sbi->sb->s_flags; int err = 0; +#ifdef CONFIG_QUOTA + int quota_enabled; +#endif if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; @@ -599,8 +625,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; + /* Turn on quotas so that they are updated correctly */ - f2fs_enable_quota_files(sbi); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); @@ -628,7 +655,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) out: #ifdef CONFIG_QUOTA /* Turn quotas off */ - f2fs_quota_off_umount(sbi->sb); + if (quota_enabled) + f2fs_quota_off_umount(sbi->sb); #endif sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -983,7 +1011,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) update_inode_page(inode); iput(inode); } - }; + } return 0; } @@ -1143,6 +1171,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct super_block *sb = sbi->sb; struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); u64 kbytes_written; + int err; /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) { @@ -1236,6 +1265,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (unlikely(f2fs_cp_error(sbi))) return -EIO; + /* flush all device cache */ + err = f2fs_flush_device_cache(sbi); + if (err) + return err; + /* write out checkpoint buffer at block 0 */ update_meta_page(sbi, ckpt, start_blk++); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7b3ad5d8e2e9..516fa0d3ff9c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -173,7 +173,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, { struct bio *bio; - bio = f2fs_bio_alloc(npages); + bio = f2fs_bio_alloc(sbi, npages, true); f2fs_target_device(sbi, blk_addr, bio); bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; @@ -418,8 +418,8 @@ next: bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; - /* set submitted = 1 as a return value */ - fio->submitted = 1; + /* set submitted = true as a return value */ + fio->submitted = true; inc_page_count(sbi, WB_DATA_TYPE(bio_page)); @@ -473,7 +473,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, f2fs_wait_on_block_writeback(sbi, blkaddr); } - bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); + bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); if (!bio) { if (ctx) fscrypt_release_ctx(ctx); @@ -833,6 +833,13 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) struct f2fs_map_blocks map; int err = 0; + /* convert inline data for Direct I/O*/ + if (iocb->ki_flags & IOCB_DIRECT) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; @@ -845,15 +852,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_next_pgofs = NULL; - if (iocb->ki_flags & IOCB_DIRECT) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; + if (iocb->ki_flags & IOCB_DIRECT) return f2fs_map_blocks(inode, &map, 1, __force_buffered_io(inode, WRITE) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO); - } if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { err = f2fs_convert_inline_inode(inode); if (err) @@ -1334,7 +1337,7 @@ static int f2fs_read_data_pages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - struct inode *inode = file->f_mapping->host; + struct inode *inode = mapping->host; struct page *page = list_last_entry(pages, struct page, lru); trace_f2fs_readpages(inode, page, nr_pages); @@ -1495,6 +1498,7 @@ static int __write_data_page(struct page *page, bool *submitted, int err = 0; struct f2fs_io_info fio = { .sbi = sbi, + .ino = inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), @@ -1566,8 +1570,11 @@ write: err = do_write_data_page(&fio); } } + + down_write(&F2FS_I(inode)->i_sem); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; + up_write(&F2FS_I(inode)->i_sem); done: if (err && err != -ENOENT) @@ -1932,6 +1939,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); + if (f2fs_is_atomic_file(inode) && + !available_free_memory(sbi, INMEM_PAGES)) { + err = -ENOMEM; + goto fail; + } + /* * We should check this at this moment to avoid deadlock on inode page * and #0 page. The locking rule for inline_data conversion should be: @@ -1947,7 +1960,7 @@ repeat: * Do not use grab_cache_page_write_begin() to avoid deadlock due to * wait_for_stable_page. Will wait that below with our IO control. */ - page = pagecache_get_page(mapping, index, + page = f2fs_pagecache_get_page(mapping, index, FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); if (!page) { err = -ENOMEM; @@ -2009,6 +2022,8 @@ repeat: fail: f2fs_put_page(page, 1); f2fs_write_failed(mapping, pos + len); + if (f2fs_is_atomic_file(inode)) + drop_inmem_pages_all(sbi); return err; } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 87f449845f5f..ecada8425268 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -45,9 +45,18 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA); + si->ndirty_qdata = get_pages(sbi, F2FS_DIRTY_QDATA); si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; + + si->nquota_files = 0; + if (f2fs_sb_has_quota_ino(sbi->sb)) { + for (i = 0; i < MAXQUOTAS; i++) { + if (f2fs_qf_ino(sbi->sb, i)) + si->nquota_files++; + } + } si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->aw_cnt = atomic_read(&sbi->aw_cnt); @@ -61,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) atomic_read(&SM_I(sbi)->fcc_info->issued_flush); si->nr_flushing = atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + si->flush_list_empty = + llist_empty(&SM_I(sbi)->fcc_info->issue_list); } if (SM_I(sbi) && SM_I(sbi)->dcc_info) { si->nr_discarded = @@ -96,9 +107,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; si->sits = MAIN_SEGS(sbi); si->dirty_sits = SIT_I(sbi)->dirty_sentries; - si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST]; + si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; si->avail_nids = NM_I(sbi)->available_nids; - si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]; + si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; si->bg_gc = sbi->bg_gc; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) @@ -231,14 +242,14 @@ get_cache: } /* free nids */ - si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + - NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) * + si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] + + NM_I(sbi)->nid_cnt[PREALLOC_NID]) * sizeof(struct free_nid); si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); si->cache_mem += NM_I(sbi)->dirty_nat_cnt * sizeof(struct nat_entry_set); si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); - for (i = 0; i <= ORPHAN_INO; i++) + for (i = 0; i < MAX_INO_ENTRY; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); si->cache_mem += atomic_read(&sbi->total_ext_tree) * sizeof(struct extent_tree); @@ -262,9 +273,10 @@ static int stat_show(struct seq_file *s, void *v) list_for_each_entry(si, &f2fs_stat_list, stat_list) { update_general_status(si->sbi); - seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n", + seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n", si->sbi->sb->s_bdev, i++, - f2fs_readonly(si->sbi->sb) ? "RO": "RW"); + f2fs_readonly(si->sbi->sb) ? "RO": "RW", + f2fs_cp_error(si->sbi) ? "Error": "Good"); seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", si->sit_area_segs, si->nat_area_segs); seq_printf(s, "[SSA: %d] [MAIN: %d", @@ -349,10 +361,11 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " + seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, + si->flush_list_empty, si->nr_discarding, si->nr_discarded, si->nr_discard_cmd, si->undiscard_blks); seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " @@ -365,6 +378,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); seq_printf(s, " - datas: %4d in files:%4d\n", si->ndirty_data, si->ndirty_files); + seq_printf(s, " - quota datas: %4d in quota files:%4d\n", + si->ndirty_qdata, si->nquota_files); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); seq_printf(s, " - imeta: %4d\n", diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c0c933ad43c8..2d98d877c09d 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -10,10 +10,12 @@ */ #include #include +#include #include "f2fs.h" #include "node.h" #include "acl.h" #include "xattr.h" +#include static unsigned long dir_blocks(struct inode *inode) { @@ -847,6 +849,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct f2fs_dentry_block *dentry_blk = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; + loff_t start_pos = ctx->pos; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); struct f2fs_dentry_ptr d; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); @@ -855,24 +858,32 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) if (f2fs_encrypted_inode(inode)) { err = fscrypt_get_encryption_info(inode); if (err && err != -ENOKEY) - return err; + goto out; err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); if (err < 0) - return err; + goto out; } if (f2fs_has_inline_dentry(inode)) { err = f2fs_read_inline_dir(file, ctx, &fstr); - goto out; + goto out_free; } - /* readahead for multi pages of dir */ - if (npages - n > 1 && !ra_has_index(ra, n)) - page_cache_sync_readahead(inode->i_mapping, ra, file, n, + for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) { + + /* allow readdir() to be interrupted */ + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + goto out_free; + } + cond_resched(); + + /* readahead for multi pages of dir */ + if (npages - n > 1 && !ra_has_index(ra, n)) + page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); - for (; n < npages; n++) { dentry_page = get_lock_data_page(inode, n, false); if (IS_ERR(dentry_page)) { err = PTR_ERR(dentry_page); @@ -880,7 +891,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) err = 0; continue; } else { - goto out; + goto out_free; } } @@ -896,12 +907,13 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) break; } - ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } -out: +out_free: fscrypt_fname_free_buffer(&fstr); +out: + trace_f2fs_readdir(inode, start_pos, ctx->pos, err); return err < 0 ? err : 0; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 115204fdefcc..f4e094e816c6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -44,6 +44,8 @@ enum { FAULT_KMALLOC, FAULT_PAGE_ALLOC, + FAULT_PAGE_GET, + FAULT_ALLOC_BIO, FAULT_ALLOC_NID, FAULT_ORPHAN, FAULT_BLOCK, @@ -91,6 +93,7 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_GRPQUOTA 0x00100000 #define F2FS_MOUNT_PRJQUOTA 0x00200000 #define F2FS_MOUNT_QUOTA 0x00400000 +#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) @@ -116,6 +119,8 @@ struct f2fs_mount_info { #define F2FS_FEATURE_EXTRA_ATTR 0x0008 #define F2FS_FEATURE_PRJQUOTA 0x0010 #define F2FS_FEATURE_INODE_CHKSUM 0x0020 +#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040 +#define F2FS_FEATURE_QUOTA_INO 0x0080 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -145,7 +150,7 @@ enum { #define BATCHED_TRIM_BLOCKS(sbi) \ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) -#define DISCARD_ISSUE_RATE 8 +#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ #define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ #define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ #define DEF_CP_INTERVAL 60 /* 60 secs */ @@ -156,7 +161,6 @@ struct cp_control { __u64 trim_start; __u64 trim_end; __u64 trim_minlen; - __u64 trimmed; }; /* @@ -175,12 +179,14 @@ enum { ORPHAN_INO, /* for orphan ino list */ APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ + FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; struct ino_entry { - struct list_head list; /* list head */ - nid_t ino; /* inode number */ + struct list_head list; /* list head */ + nid_t ino; /* inode number */ + unsigned int dirty_device; /* dirty device bitmap */ }; /* for the list of inodes to be GCed */ @@ -204,10 +210,6 @@ struct discard_entry { #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ (MAX_PLIST_NUM - 1) : (blk_num - 1)) -#define P_ACTIVE 0x01 -#define P_TRIM 0x02 -#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM)) - enum { D_PREP, D_SUBMIT, @@ -239,12 +241,32 @@ struct discard_cmd { int error; /* bio error */ }; +enum { + DPOLICY_BG, + DPOLICY_FORCE, + DPOLICY_FSTRIM, + DPOLICY_UMOUNT, + MAX_DPOLICY, +}; + +struct discard_policy { + int type; /* type of discard */ + unsigned int min_interval; /* used for candidates exist */ + unsigned int max_interval; /* used for candidates not exist */ + unsigned int max_requests; /* # of discards issued per round */ + unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ + bool io_aware; /* issue discard in idle time */ + bool sync; /* submit discard with REQ_SYNC flag */ + unsigned int granularity; /* discard granularity */ +}; + struct discard_cmd_control { struct task_struct *f2fs_issue_discard; /* discard thread */ struct list_head entry_list; /* 4KB discard entry list */ struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */ struct list_head wait_list; /* store on-flushing entries */ + struct list_head fstrim_list; /* in-flight discard from fstrim */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ unsigned int discard_wake; /* to wake up discard thread */ struct mutex cmd_lock; @@ -377,11 +399,14 @@ struct f2fs_flush_device { /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 +#define DEF_MIN_INLINE_SIZE 1 static inline int get_extra_isize(struct inode *inode); -#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ - (CUR_ADDRS_PER_INODE(inode) - \ - DEF_INLINE_RESERVED_SIZE - \ - F2FS_INLINE_XATTR_ADDRS)) +static inline int get_inline_xattr_addrs(struct inode *inode); +#define F2FS_INLINE_XATTR_ADDRS(inode) get_inline_xattr_addrs(inode) +#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ + (CUR_ADDRS_PER_INODE(inode) - \ + F2FS_INLINE_XATTR_ADDRS(inode) - \ + DEF_INLINE_RESERVED_SIZE)) /* for inline dir */ #define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ @@ -581,6 +606,7 @@ struct f2fs_inode_info { #endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ + struct list_head inmem_ilist; /* list for inmem inodes */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ @@ -591,6 +617,7 @@ struct f2fs_inode_info { int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ + int i_inline_xattr_size; /* inline xattr size */ }; static inline void get_extent_info(struct extent_info *ext, @@ -664,10 +691,13 @@ static inline void __try_update_largest_extent(struct inode *inode, } } -enum nid_list { - FREE_NID_LIST, - ALLOC_NID_LIST, - MAX_NID_LIST, +/* + * For free nid management + */ +enum nid_state { + FREE_NID, /* newly added to free nid list */ + PREALLOC_NID, /* it is preallocated */ + MAX_NID_STATE, }; struct f2fs_nm_info { @@ -690,8 +720,8 @@ struct f2fs_nm_info { /* free node ids management */ struct radix_tree_root free_nid_root;/* root of the free_nid cache */ - struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */ - unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */ + struct list_head free_nid_list; /* list for free nids excluding preallocated nids */ + unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */ spinlock_t nid_list_lock; /* protect nid lists ops */ struct mutex build_lock; /* lock for build free nids */ unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; @@ -769,6 +799,7 @@ enum { struct flush_cmd { struct completion wait; struct llist_node llnode; + nid_t ino; int ret; }; @@ -787,6 +818,8 @@ struct f2fs_sm_info { struct dirty_seglist_info *dirty_info; /* dirty segment information */ struct curseg_info *curseg_array; /* active segment information */ + struct rw_semaphore curseg_lock; /* for preventing curseg change */ + block_t seg0_blkaddr; /* block address of 0'th segment */ block_t main_blkaddr; /* start block address of main area */ block_t ssa_blkaddr; /* start block address of SSA area */ @@ -808,6 +841,7 @@ struct f2fs_sm_info { unsigned int min_ipu_util; /* in-place-update threshold */ unsigned int min_fsync_blocks; /* threshold for fsync */ unsigned int min_hot_blocks; /* threshold for hot block allocation */ + unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */ /* for flush command control */ struct flush_cmd_control *fcc_info; @@ -829,6 +863,7 @@ struct f2fs_sm_info { enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, + F2FS_DIRTY_QDATA, F2FS_DIRTY_NODES, F2FS_DIRTY_META, F2FS_INMEM_PAGES, @@ -877,6 +912,18 @@ enum need_lock_type { LOCK_RETRY, }; +enum cp_reason_type { + CP_NO_NEEDED, + CP_NON_REGULAR, + CP_HARDLINK, + CP_SB_NEED_CP, + CP_WRONG_PINO, + CP_NO_SPC_ROLL, + CP_NODE_NEED_CP, + CP_FASTBOOT_MODE, + CP_SPEC_LOG_NUM, +}; + enum iostat_type { APP_DIRECT_IO, /* app direct IOs */ APP_BUFFERED_IO, /* app buffered IOs */ @@ -896,6 +943,7 @@ enum iostat_type { struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ + nid_t ino; /* inode number */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ enum temp_type temp; /* contains HOT/WARM/COLD */ int op; /* contains REQ_OP_ */ @@ -940,6 +988,7 @@ enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ DIRTY_META, /* for all dirtied inode metadata */ + ATOMIC_FILE, /* for all atomic files */ NR_INODE_TYPE, }; @@ -1042,12 +1091,15 @@ struct f2fs_sb_info { loff_t max_file_blocks; /* max block index of file */ int active_logs; /* # of active logs */ int dir_level; /* directory level */ + int inline_xattr_size; /* inline xattr size */ + unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks; /* configurable reserved blocks */ + block_t current_reserved_blocks; /* current reserved blocks */ u32 s_next_generation; /* for NFS support */ @@ -1113,6 +1165,8 @@ struct f2fs_sb_info { struct list_head s_list; int s_ndevs; /* number of devices */ struct f2fs_dev_info *devs; /* for device list */ + unsigned int dirty_device; /* for checkpoint data flush */ + spinlock_t dev_lock; /* protect dirty_device */ struct mutex umount_mutex; unsigned int shrinker_run_no; @@ -1176,8 +1230,7 @@ static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) { - struct timespec ts = {sbi->interval_time[type], 0}; - unsigned long interval = timespec_to_jiffies(&ts); + unsigned long interval = sbi->interval_time[type] * HZ; return time_after(jiffies, sbi->last_time[type] + interval); } @@ -1344,6 +1397,13 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) return le64_to_cpu(cp->checkpoint_ver); } +static inline unsigned long f2fs_qf_ino(struct super_block *sb, int type) +{ + if (type < F2FS_MAX_QUOTAS) + return le32_to_cpu(F2FS_SB(sb)->raw_super->qf_ino[type]); + return 0; +} + static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp) { size_t crc_offset = le32_to_cpu(cp->checksum_offset); @@ -1522,7 +1582,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); sbi->total_valid_block_count += (block_t)(*count); - avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks; + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; *count -= diff; @@ -1556,6 +1617,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; + if (sbi->reserved_blocks && + sbi->current_reserved_blocks < sbi->reserved_blocks) + sbi->current_reserved_blocks = min(sbi->reserved_blocks, + sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); f2fs_i_blocks_write(inode, count, false, true); } @@ -1576,6 +1641,8 @@ static inline void inode_inc_dirty_pages(struct inode *inode) atomic_inc(&F2FS_I(inode)->dirty_pages); inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); + if (IS_NOQUOTA(inode)) + inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA); } static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -1592,6 +1659,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode) atomic_dec(&F2FS_I(inode)->dirty_pages); dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); + if (IS_NOQUOTA(inode)) + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA); } static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) @@ -1699,10 +1768,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, return ret; } +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_BLOCK)) { + f2fs_show_injection_info(FAULT_BLOCK); + goto enospc; + } +#endif + spin_lock(&sbi->stat_lock); valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count + sbi->reserved_blocks > + if (unlikely(valid_block_count + sbi->current_reserved_blocks > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; @@ -1745,6 +1821,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, sbi->total_valid_node_count--; sbi->total_valid_block_count--; + if (sbi->reserved_blocks && + sbi->current_reserved_blocks < sbi->reserved_blocks) + sbi->current_reserved_blocks++; spin_unlock(&sbi->stat_lock); @@ -1791,6 +1870,19 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); } +static inline struct page *f2fs_pagecache_get_page( + struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp_mask) +{ +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { + f2fs_show_injection_info(FAULT_PAGE_GET); + return NULL; + } +#endif + return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); +} + static inline void f2fs_copy_page(struct page *src, struct page *dst) { char *src_kaddr = kmap(src); @@ -1840,15 +1932,25 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline struct bio *f2fs_bio_alloc(int npages) +static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, + int npages, bool no_fail) { struct bio *bio; - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; + if (no_fail) { + /* No failure on bio allocation */ + bio = bio_alloc(GFP_NOIO, npages); + if (!bio) + bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); + return bio; + } +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { + f2fs_show_injection_info(FAULT_ALLOC_BIO); + return NULL; + } +#endif + return bio_alloc(GFP_KERNEL, npages); } static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, @@ -2158,25 +2260,20 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) static inline unsigned int addrs_per_inode(struct inode *inode) { - if (f2fs_has_inline_xattr(inode)) - return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS; - return CUR_ADDRS_PER_INODE(inode); + return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS(inode); } -static inline void *inline_xattr_addr(struct page *page) +static inline void *inline_xattr_addr(struct inode *inode, struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - - F2FS_INLINE_XATTR_ADDRS]); + F2FS_INLINE_XATTR_ADDRS(inode)]); } static inline int inline_xattr_size(struct inode *inode) { - if (f2fs_has_inline_xattr(inode)) - return F2FS_INLINE_XATTR_ADDRS << 2; - else - return 0; + return get_inline_xattr_addrs(inode) * sizeof(__le32); } static inline int f2fs_has_inline_data(struct inode *inode) @@ -2257,9 +2354,10 @@ static inline void clear_file(struct inode *inode, int type) static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) { + bool ret; + if (dsync) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool ret; spin_lock(&sbi->inode_lock[DIRTY_META]); ret = list_empty(&F2FS_I(inode)->gdirty_list); @@ -2270,7 +2368,12 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) file_keep_isize(inode) || i_size_read(inode) & PAGE_MASK) return false; - return F2FS_I(inode)->last_disk_size == i_size_read(inode); + + down_read(&F2FS_I(inode)->i_sem); + ret = F2FS_I(inode)->last_disk_size == i_size_read(inode); + up_read(&F2FS_I(inode)->i_sem); + + return ret; } static inline int f2fs_readonly(struct super_block *sb) @@ -2320,6 +2423,12 @@ static inline int get_extra_isize(struct inode *inode) return F2FS_I(inode)->i_extra_isize / sizeof(__le32); } +static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb); +static inline int get_inline_xattr_addrs(struct inode *inode) +{ + return F2FS_I(inode)->i_inline_xattr_size; +} + #define get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) @@ -2448,7 +2557,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) */ int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); -void f2fs_enable_quota_files(struct f2fs_sb_info *sbi); +int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); @@ -2476,7 +2585,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni); pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); int truncate_inode_blocks(struct inode *inode, pgoff_t from); -int truncate_xattr_node(struct inode *inode, struct page *page); +int truncate_xattr_node(struct inode *inode); int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino); int remove_inode_page(struct inode *inode); struct page *new_inode_page(struct inode *inode); @@ -2511,19 +2620,22 @@ void destroy_node_manager_caches(void); */ bool need_SSR(struct f2fs_sb_info *sbi); void register_inmem_page(struct inode *inode, struct page *page); +void drop_inmem_pages_all(struct f2fs_sb_info *sbi); void drop_inmem_pages(struct inode *inode); void drop_inmem_page(struct inode *inode, struct page *page); int commit_inmem_pages(struct inode *inode); void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); -int f2fs_issue_flush(struct f2fs_sb_info *sbi); +int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); int create_flush_cmd_control(struct f2fs_sb_info *sbi); +int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); -void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); +void init_discard_policy(struct discard_policy *dpolicy, int discard_type, + unsigned int granularity); void stop_discard_thread(struct f2fs_sb_info *sbi); -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount); +bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); @@ -2578,6 +2690,10 @@ void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); void release_ino_entry(struct f2fs_sb_info *sbi, bool all); bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); +void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type); +bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type); int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi); int acquire_orphan_inode(struct f2fs_sb_info *sbi); void release_orphan_inode(struct f2fs_sb_info *sbi); @@ -2665,14 +2781,16 @@ struct f2fs_stat_info { unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; int ext_tree, zombie_tree, ext_node; - int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; + int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; + int ndirty_data, ndirty_qdata; int inmem_pages; - unsigned int ndirty_dirs, ndirty_files, ndirty_all; + unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; int nats, dirty_nats, sits, dirty_sits; int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; - int nr_flushing, nr_flushed, nr_discarding, nr_discarded; + int nr_flushing, nr_flushed, flush_list_empty; + int nr_discarding, nr_discarded; int nr_discard_cmd; unsigned int undiscard_blks; int inline_xattr, inline_inode, inline_dir, append, update, orphans; @@ -2981,6 +3099,16 @@ static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); } +static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR); +} + +static inline int f2fs_sb_has_quota_ino(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO); +} + #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkaddr) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f78b76ec4707..7874bbd7311d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -53,6 +53,11 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct dnode_of_data dn; int err; + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto err; + } + sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -114,6 +119,7 @@ out_sem: out: sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); +err: return block_page_mkwrite_return(err); } @@ -138,27 +144,29 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) return 1; } -static inline bool need_do_checkpoint(struct inode *inode) +static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool need_cp = false; + enum cp_reason_type cp_reason = CP_NO_NEEDED; - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) - need_cp = true; + if (!S_ISREG(inode->i_mode)) + cp_reason = CP_NON_REGULAR; + else if (inode->i_nlink != 1) + cp_reason = CP_HARDLINK; else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) - need_cp = true; + cp_reason = CP_SB_NEED_CP; else if (file_wrong_pino(inode)) - need_cp = true; + cp_reason = CP_WRONG_PINO; else if (!space_for_roll_forward(sbi)) - need_cp = true; + cp_reason = CP_NO_SPC_ROLL; else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) - need_cp = true; + cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) - need_cp = true; + cp_reason = CP_FASTBOOT_MODE; else if (sbi->active_logs == 2) - need_cp = true; + cp_reason = CP_SPEC_LOG_NUM; - return need_cp; + return cp_reason; } static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) @@ -193,7 +201,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t ino = inode->i_ino; int ret = 0; - bool need_cp = false; + enum cp_reason_type cp_reason = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, @@ -212,7 +220,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, clear_inode_flag(inode, FI_NEED_IPU); if (ret) { - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); return ret; } @@ -243,10 +251,10 @@ go_write: * sudden-power-off. */ down_read(&F2FS_I(inode)->i_sem); - need_cp = need_do_checkpoint(inode); + cp_reason = need_do_checkpoint(inode); up_read(&F2FS_I(inode)->i_sem); - if (need_cp) { + if (cp_reason) { /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); @@ -294,19 +302,24 @@ sync_nodes: remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - remove_ino_entry(sbi, ino, UPDATE_INO); - clear_inode_flag(inode, FI_UPDATE_WRITE); if (!atomic) - ret = f2fs_issue_flush(sbi); + ret = f2fs_issue_flush(sbi, inode->i_ino); + if (!ret) { + remove_ino_entry(sbi, ino, UPDATE_INO); + clear_inode_flag(inode, FI_UPDATE_WRITE); + remove_ino_entry(sbi, ino, FLUSH_INO); + } f2fs_update_time(sbi, REQ_TIME); out: - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); f2fs_trace_ios(NULL, 1); return ret; } int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) + return -EIO; return f2fs_do_sync_file(file, start, end, datasync, false); } @@ -444,6 +457,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file_inode(file); int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) @@ -630,6 +646,9 @@ int f2fs_truncate(struct inode *inode) { int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return 0; @@ -684,6 +703,12 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, STATX_ATTR_NODUMP); generic_fillattr(inode, stat); + + /* we need to show initial sectors used for inline_data/dentries */ + if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || + f2fs_has_inline_dentry(inode)) + stat->blocks += (stat->size + 511) >> 9; + return 0; } @@ -723,6 +748,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) int err; bool size_changed = false; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + err = setattr_prepare(dentry, attr); if (err) return err; @@ -775,6 +803,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) inode->i_mtime = inode->i_ctime = current_time(inode); } + down_write(&F2FS_I(inode)->i_sem); + F2FS_I(inode)->last_disk_size = i_size_read(inode); + up_write(&F2FS_I(inode)->i_sem); + size_changed = true; } @@ -845,7 +877,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); if (err) { if (err == -ENOENT) { - pg_start++; + pg_start = get_next_page_offset(&dn, pg_start); continue; } return err; @@ -1160,11 +1192,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (ret) goto out; + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + truncate_pagecache(inode, offset); ret = f2fs_do_collapse(inode, pg_start, pg_end); if (ret) - goto out; + goto out_unlock; /* write out all moved pages, if possible */ filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -1176,7 +1211,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = truncate_blocks(inode, new_size, true); if (!ret) f2fs_i_size_write(inode, new_size); - +out_unlock: + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); out: up_write(&F2FS_I(inode)->i_mmap_sem); return ret; @@ -1359,6 +1395,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (ret) goto out; + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + truncate_pagecache(inode, offset); pg_start = offset >> PAGE_SHIFT; @@ -1386,6 +1425,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (!ret) f2fs_i_size_write(inode, new_size); + + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); out: up_write(&F2FS_I(inode)->i_mmap_sem); return ret; @@ -1435,8 +1476,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; } - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } return err; } @@ -1447,6 +1492,9 @@ static long f2fs_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); long ret = 0; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* f2fs only support ->fallocate for regular file */ if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -1480,8 +1528,6 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, false); - if (mode & FALLOC_FL_KEEP_SIZE) - file_set_keep_isize(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1883,6 +1929,9 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + if (!f2fs_sb_has_crypto(inode->i_sb)) + return -EOPNOTSUPP; + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); @@ -1890,6 +1939,8 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { + if (!f2fs_sb_has_crypto(file_inode(filp)->i_sb)) + return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -2245,9 +2296,13 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } inode_lock(src); + down_write(&F2FS_I(src)->dio_rwsem[WRITE]); if (src != dst) { - if (!inode_trylock(dst)) { - ret = -EBUSY; + ret = -EBUSY; + if (!inode_trylock(dst)) + goto out; + if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) { + inode_unlock(dst); goto out; } } @@ -2307,9 +2362,12 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } f2fs_unlock_op(sbi); out_unlock: - if (src != dst) + if (src != dst) { + up_write(&F2FS_I(dst)->dio_rwsem[WRITE]); inode_unlock(dst); + } out: + up_write(&F2FS_I(src)->dio_rwsem[WRITE]); inode_unlock(src); return ret; } @@ -2625,6 +2683,9 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) + return -EIO; + switch (cmd) { case F2FS_IOC_GETFLAGS: return f2fs_ioc_getflags(filp, arg); @@ -2682,6 +2743,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct blk_plug plug; ssize_t ret; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret > 0) { @@ -2692,6 +2756,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) err = f2fs_preallocate_blocks(iocb, from); if (err) { + clear_inode_flag(inode, FI_NO_PREALLOC); inode_unlock(inode); return err; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bfe6a8ccc3a0..5d5bba462f26 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -267,16 +267,6 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); } -static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - unsigned int valid_blocks = - get_valid_blocks(sbi, segno, true); - - return IS_DATASEG(get_seg_entry(sbi, segno)->type) ? - valid_blocks * 2 : valid_blocks; -} - static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, struct victim_sel_policy *p) { @@ -285,7 +275,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) - return get_greedy_cost(sbi, segno); + return get_valid_blocks(sbi, segno, true); else return get_cb_cost(sbi, segno); } @@ -466,10 +456,10 @@ static int check_valid_map(struct f2fs_sb_info *sbi, struct seg_entry *sentry; int ret; - mutex_lock(&sit_i->sentry_lock); + down_read(&sit_i->sentry_lock); sentry = get_seg_entry(sbi, segno); ret = f2fs_test_bit(offset, sentry->cur_valid_map); - mutex_unlock(&sit_i->sentry_lock); + up_read(&sit_i->sentry_lock); return ret; } @@ -608,6 +598,7 @@ static void move_data_block(struct inode *inode, block_t bidx, { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), + .ino = inode->i_ino, .type = DATA, .temp = COLD, .op = REQ_OP_READ, @@ -659,8 +650,8 @@ static void move_data_block(struct inode *inode, block_t bidx, allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, CURSEG_COLD_DATA, NULL, false); - fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr, - FGP_LOCK | FGP_CREAT, GFP_NOFS); + fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), + newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); if (!fio.encrypted_page) { err = -ENOMEM; goto recover_block; @@ -738,6 +729,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, } else { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), + .ino = inode->i_ino, .type = DATA, .temp = COLD, .op = REQ_OP_WRITE, @@ -840,10 +832,17 @@ next_step: continue; } + if (!down_write_trylock( + &F2FS_I(inode)->dio_rwsem[WRITE])) { + iput(inode); + continue; + } + start_bidx = start_bidx_of_node(nofs, inode); data_page = get_read_data_page(inode, start_bidx + ofs_in_node, REQ_RAHEAD, true); + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); continue; @@ -901,10 +900,10 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, struct sit_info *sit_i = SIT_I(sbi); int ret; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); return ret; } @@ -952,8 +951,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, /* * this is to avoid deadlock: * - lock_page(sum_page) - f2fs_replace_block - * - check_valid_map() - mutex_lock(sentry_lock) - * - mutex_lock(sentry_lock) - change_curseg() + * - check_valid_map() - down_write(sentry_lock) + * - down_read(sentry_lock) - change_curseg() * - lock_page(sum_page) */ if (type == SUM_TYPE_NODE) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8322e4e7bb3f..90e38d8ea688 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -112,6 +112,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(dn->inode), + .ino = dn->inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 53fb08810ee9..b4c4f2b25304 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -235,6 +235,23 @@ static int do_read_inode(struct inode *inode) fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; + if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); + fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size); + } else if (f2fs_has_inline_xattr(inode) || + f2fs_has_inline_dentry(inode)) { + fi->i_inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + } else { + + /* + * Previous inline data or directory always reserved 200 bytes + * in inode layout, even if inline_xattr is disabled. In order + * to keep inline_dentry's structure for backward compatibility, + * we get the space back only from inline_data. + */ + fi->i_inline_xattr_size = 0; + } + /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); @@ -387,6 +404,10 @@ int update_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode)) { ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); + if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb)) + ri->i_inline_xattr_size = + cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_projid)) { @@ -483,6 +504,7 @@ void f2fs_evict_inode(struct inode *inode) remove_ino_entry(sbi, inode->i_ino, APPEND_INO); remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); + remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); @@ -522,8 +544,10 @@ no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); - if (!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) + if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); + else + f2fs_inode_synced(inode); /* ino == 0, if f2fs_new_inode() was failed t*/ if (inode->i_ino) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a4dab98c4b7b..28bdf8828e73 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -29,6 +29,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_t ino; struct inode *inode; bool nid_free = false; + int xattr_size = 0; int err; inode = new_inode(dir->i_sb); @@ -86,11 +87,23 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (test_opt(sbi, INLINE_XATTR)) set_inode_flag(inode, FI_INLINE_XATTR); + if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) set_inode_flag(inode, FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); + if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); + if (f2fs_has_inline_xattr(inode)) + xattr_size = sbi->inline_xattr_size; + /* Otherwise, will be 0 */ + } else if (f2fs_has_inline_xattr(inode) || + f2fs_has_inline_dentry(inode)) { + xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + } + F2FS_I(inode)->i_inline_xattr_size = xattr_size; + f2fs_init_extent_tree(inode, NULL); stat_inc_inline_xattr(inode); @@ -177,6 +190,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, nid_t ino = 0; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -221,6 +237,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if (f2fs_encrypted_inode(dir) && !fscrypt_has_permitted_context(dir, inode)) return -EPERM; @@ -331,12 +350,15 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; struct f2fs_dir_entry *de; struct page *page; - nid_t ino; + struct dentry *new; + nid_t ino = -1; int err = 0; unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); + trace_f2fs_lookup_start(dir, dentry, flags); + if (f2fs_encrypted_inode(dir)) { - int res = fscrypt_get_encryption_info(dir); + err = fscrypt_get_encryption_info(dir); /* * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is @@ -346,18 +368,22 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (fscrypt_has_encryption_key(dir)) fscrypt_set_encrypted_dentry(dentry); fscrypt_set_d_op(dentry); - if (res && res != -ENOKEY) - return ERR_PTR(res); + if (err && err != -ENOKEY) + goto out; } - if (dentry->d_name.len > F2FS_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); + if (dentry->d_name.len > F2FS_NAME_LEN) { + err = -ENAMETOOLONG; + goto out; + } de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { - if (IS_ERR(page)) - return (struct dentry *)page; - return d_splice_alias(inode, dentry); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; + } + goto out_splice; } ino = le32_to_cpu(de->ino); @@ -365,19 +391,21 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { err = __recover_dot_dentries(dir, root_ino); if (err) - goto err_out; + goto out_iput; } if (f2fs_has_inline_dots(inode)) { err = __recover_dot_dentries(inode, dir->i_ino); if (err) - goto err_out; + goto out_iput; } if (f2fs_encrypted_inode(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && @@ -386,12 +414,18 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, "Inconsistent encryption contexts: %lu/%lu", dir->i_ino, inode->i_ino); err = -EPERM; - goto err_out; + goto out_iput; } - return d_splice_alias(inode, dentry); - -err_out: +out_splice: + new = d_splice_alias(inode, dentry); + if (IS_ERR(new)) + err = PTR_ERR(new); + trace_f2fs_lookup_end(dir, dentry, ino, err); + return new; +out_iput: iput(inode); +out: + trace_f2fs_lookup_end(dir, dentry, ino, err); return ERR_PTR(err); } @@ -405,7 +439,13 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); + if (err) + return err; + err = dquot_initialize(inode); if (err) return err; @@ -460,6 +500,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct fscrypt_symlink_data *sd = NULL; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if (f2fs_encrypted_inode(dir)) { err = fscrypt_get_encryption_info(dir); if (err) @@ -566,6 +609,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -618,6 +664,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -712,6 +761,9 @@ out: static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + return -EIO; + if (f2fs_encrypted_inode(dir)) { int err = fscrypt_get_encryption_info(dir); if (err) @@ -723,6 +775,9 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + return -EIO; + return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout); } @@ -742,6 +797,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, bool is_old_inline = f2fs_has_inline_dentry(old_dir); int err = -ENOENT; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if ((f2fs_encrypted_inode(old_dir) && !fscrypt_has_encryption_key(old_dir)) || (f2fs_encrypted_inode(new_dir) && @@ -767,6 +825,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out; + if (new_inode) { + err = dquot_initialize(new_inode); + if (err) + goto out; + } + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -935,6 +999,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, int old_nlink = 0, new_nlink = 0; int err = -ENOENT; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if ((f2fs_encrypted_inode(old_dir) && !fscrypt_has_encryption_key(old_dir)) || (f2fs_encrypted_inode(new_dir) && diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b33dac9592ca..d3322752426f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -46,7 +46,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) * give 25%, 25%, 50%, 50%, 50% memory for each components respectively */ if (type == FREE_NIDS) { - mem_size = (nm_i->nid_cnt[FREE_NID_LIST] * + mem_size = (nm_i->nid_cnt[FREE_NID] * sizeof(struct free_nid)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { @@ -63,7 +63,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) } else if (type == INO_ENTRIES) { int i; - for (i = 0; i <= UPDATE_INO; i++) + for (i = 0; i < MAX_INO_ENTRY; i++) mem_size += sbi->im[i].ino_num * sizeof(struct ino_entry); mem_size >>= PAGE_SHIFT; @@ -74,6 +74,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) atomic_read(&sbi->total_ext_node) * sizeof(struct extent_node)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == INMEM_PAGES) { + /* it allows 20% / total_ram for inmemory pages */ + mem_size = get_pages(sbi, F2FS_INMEM_PAGES); + res = mem_size < (val.totalram / 5); } else { if (!sbi->sb->s_bdi->wb.dirty_exceeded) return true; @@ -134,6 +138,44 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) return dst_page; } +static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail) +{ + struct nat_entry *new; + + if (no_fail) + new = f2fs_kmem_cache_alloc(nat_entry_slab, + GFP_NOFS | __GFP_ZERO); + else + new = kmem_cache_alloc(nat_entry_slab, + GFP_NOFS | __GFP_ZERO); + if (new) { + nat_set_nid(new, nid); + nat_reset_flag(new); + } + return new; +} + +static void __free_nat_entry(struct nat_entry *e) +{ + kmem_cache_free(nat_entry_slab, e); +} + +/* must be locked by nat_tree_lock */ +static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, + struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail) +{ + if (no_fail) + f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne); + else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne)) + return NULL; + + if (raw_ne) + node_info_from_raw_nat(&ne->ni, raw_ne); + list_add_tail(&ne->list, &nm_i->nat_entries); + nm_i->nat_cnt++; + return ne; +} + static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) { return radix_tree_lookup(&nm_i->nat_root, n); @@ -150,7 +192,7 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) list_del(&e->list); radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); nm_i->nat_cnt--; - kmem_cache_free(nat_entry_slab, e); + __free_nat_entry(e); } static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, @@ -246,49 +288,29 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) return need_update; } -static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, - bool no_fail) -{ - struct nat_entry *new; - - if (no_fail) { - new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); - f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); - } else { - new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS); - if (!new) - return NULL; - if (radix_tree_insert(&nm_i->nat_root, nid, new)) { - kmem_cache_free(nat_entry_slab, new); - return NULL; - } - } - - memset(new, 0, sizeof(struct nat_entry)); - nat_set_nid(new, nid); - nat_reset_flag(new); - list_add_tail(&new->list, &nm_i->nat_entries); - nm_i->nat_cnt++; - return new; -} - +/* must be locked by nat_tree_lock */ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nat_entry *ne) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct nat_entry *e; + struct nat_entry *new, *e; + new = __alloc_nat_entry(nid, false); + if (!new) + return; + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); - if (!e) { - e = grab_nat_entry(nm_i, nid, false); - if (e) - node_info_from_raw_nat(&e->ni, ne); - } else { + if (!e) + e = __init_nat_entry(nm_i, new, ne, false); + else f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || nat_get_blkaddr(e) != le32_to_cpu(ne->block_addr) || nat_get_version(e) != ne->version); - } + up_write(&nm_i->nat_tree_lock); + if (e != new) + __free_nat_entry(new); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -296,11 +318,12 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; + struct nat_entry *new = __alloc_nat_entry(ni->nid, true); down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { - e = grab_nat_entry(nm_i, ni->nid, true); + e = __init_nat_entry(nm_i, new, NULL, true); copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -312,6 +335,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); } + /* let's free early to reduce memory consumption */ + if (e != new) + __free_nat_entry(new); /* sanity check */ f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); @@ -327,10 +353,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { unsigned char version = nat_get_version(e); nat_set_version(e, inc_node_version(version)); - - /* in order to reuse the nid */ - if (nm_i->next_scan_nid > ni->nid) - nm_i->next_scan_nid = ni->nid; } /* change address */ @@ -424,9 +446,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) f2fs_put_page(page, 1); cache: /* cache nat entry */ - down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); - up_write(&nm_i->nat_tree_lock); } /* @@ -962,7 +982,8 @@ fail: return err > 0 ? 0 : err; } -int truncate_xattr_node(struct inode *inode, struct page *page) +/* caller must lock inode page */ +int truncate_xattr_node(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t nid = F2FS_I(inode)->i_xattr_nid; @@ -978,10 +999,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page) f2fs_i_xnid_write(inode, 0); - set_new_dnode(&dn, inode, page, npage, nid); - - if (page) - dn.inode_page_locked = true; + set_new_dnode(&dn, inode, NULL, npage, nid); truncate_node(&dn); return 0; } @@ -1000,7 +1018,7 @@ int remove_inode_page(struct inode *inode) if (err) return err; - err = truncate_xattr_node(inode, dn.inode_page); + err = truncate_xattr_node(inode); if (err) { f2fs_put_dnode(&dn); return err; @@ -1220,7 +1238,8 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!inode) return; - page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); + page = f2fs_pagecache_get_page(inode->i_mapping, 0, + FGP_LOCK|FGP_NOWAIT, 0); if (!page) goto iput_out; @@ -1244,37 +1263,6 @@ iput_out: iput(inode); } -void move_node_page(struct page *node_page, int gc_type) -{ - if (gc_type == FG_GC) { - struct f2fs_sb_info *sbi = F2FS_P_SB(node_page); - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 1, - .for_reclaim = 0, - }; - - set_page_dirty(node_page); - f2fs_wait_on_page_writeback(node_page, NODE, true); - - f2fs_bug_on(sbi, PageWriteback(node_page)); - if (!clear_page_dirty_for_io(node_page)) - goto out_page; - - if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc)) - unlock_page(node_page); - goto release_page; - } else { - /* set page dirty and write it */ - if (!PageWriteback(node_page)) - set_page_dirty(node_page); - } -out_page: - unlock_page(node_page); -release_page: - f2fs_put_page(node_page, 0); -} - static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) { pgoff_t index; @@ -1340,6 +1328,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, + .ino = ino_of_node(page), .type = NODE, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), @@ -1412,6 +1401,37 @@ redirty_out: return AOP_WRITEPAGE_ACTIVATE; } +void move_node_page(struct page *node_page, int gc_type) +{ + if (gc_type == FG_GC) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 1, + .for_reclaim = 0, + }; + + set_page_dirty(node_page); + f2fs_wait_on_page_writeback(node_page, NODE, true); + + f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); + if (!clear_page_dirty_for_io(node_page)) + goto out_page; + + if (__write_node_page(node_page, false, NULL, + &wbc, false, FS_GC_NODE_IO)) + unlock_page(node_page); + goto release_page; + } else { + /* set page dirty and write it */ + if (!PageWriteback(node_page)) + set_page_dirty(node_page); + } +out_page: + unlock_page(node_page); +release_page: + f2fs_put_page(node_page, 0); +} + static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { @@ -1742,35 +1762,54 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, return radix_tree_lookup(&nm_i->free_nid_root, n); } -static int __insert_nid_to_list(struct f2fs_sb_info *sbi, - struct free_nid *i, enum nid_list list, bool new) +static int __insert_free_nid(struct f2fs_sb_info *sbi, + struct free_nid *i, enum nid_state state) { struct f2fs_nm_info *nm_i = NM_I(sbi); - if (new) { - int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); - if (err) - return err; - } + int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); + if (err) + return err; - f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : - i->state != NID_ALLOC); - nm_i->nid_cnt[list]++; - list_add_tail(&i->list, &nm_i->nid_list[list]); + f2fs_bug_on(sbi, state != i->state); + nm_i->nid_cnt[state]++; + if (state == FREE_NID) + list_add_tail(&i->list, &nm_i->free_nid_list); return 0; } -static void __remove_nid_from_list(struct f2fs_sb_info *sbi, - struct free_nid *i, enum nid_list list, bool reuse) +static void __remove_free_nid(struct f2fs_sb_info *sbi, + struct free_nid *i, enum nid_state state) { struct f2fs_nm_info *nm_i = NM_I(sbi); - f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : - i->state != NID_ALLOC); - nm_i->nid_cnt[list]--; - list_del(&i->list); - if (!reuse) - radix_tree_delete(&nm_i->free_nid_root, i->nid); + f2fs_bug_on(sbi, state != i->state); + nm_i->nid_cnt[state]--; + if (state == FREE_NID) + list_del(&i->list); + radix_tree_delete(&nm_i->free_nid_root, i->nid); +} + +static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i, + enum nid_state org_state, enum nid_state dst_state) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + f2fs_bug_on(sbi, org_state != i->state); + i->state = dst_state; + nm_i->nid_cnt[org_state]--; + nm_i->nid_cnt[dst_state]++; + + switch (dst_state) { + case PREALLOC_NID: + list_del(&i->list); + break; + case FREE_NID: + list_add_tail(&i->list, &nm_i->free_nid_list); + break; + default: + BUG_ON(1); + } } /* return if the nid is recognized as free */ @@ -1788,7 +1827,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); i->nid = nid; - i->state = NID_NEW; + i->state = FREE_NID; if (radix_tree_preload(GFP_NOFS)) goto err; @@ -1801,7 +1840,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) * - f2fs_create * - f2fs_new_inode * - alloc_nid - * - __insert_nid_to_list(ALLOC_NID_LIST) + * - __insert_nid_to_list(PREALLOC_NID) * - f2fs_balance_fs_bg * - build_free_nids * - __build_free_nids @@ -1814,8 +1853,8 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) * - new_node_page * - set_node_addr * - alloc_nid_done - * - __remove_nid_from_list(ALLOC_NID_LIST) - * - __insert_nid_to_list(FREE_NID_LIST) + * - __remove_nid_from_list(PREALLOC_NID) + * - __insert_nid_to_list(FREE_NID) */ ne = __lookup_nat_cache(nm_i, nid); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || @@ -1824,13 +1863,13 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) e = __lookup_free_nid_list(nm_i, nid); if (e) { - if (e->state == NID_NEW) + if (e->state == FREE_NID) ret = true; goto err_out; } } ret = true; - err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true); + err = __insert_free_nid(sbi, i, FREE_NID); err_out: spin_unlock(&nm_i->nid_list_lock); radix_tree_preload_end(); @@ -1848,8 +1887,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); - if (i && i->state == NID_NEW) { - __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + if (i && i->state == FREE_NID) { + __remove_free_nid(sbi, i, FREE_NID); need_free = true; } spin_unlock(&nm_i->nid_list_lock); @@ -1868,15 +1907,18 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) return; - if (set) + if (set) { + if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) + return; __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - else - __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - - if (set) nm_i->free_nid_count[nat_ofs]++; - else if (!build) - nm_i->free_nid_count[nat_ofs]--; + } else { + if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) + return; + __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + if (!build) + nm_i->free_nid_count[nat_ofs]--; + } } static void scan_nat_page(struct f2fs_sb_info *sbi, @@ -1911,34 +1953,12 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, } } -static void scan_free_nid_bits(struct f2fs_sb_info *sbi) +static void scan_curseg_cache(struct f2fs_sb_info *sbi) { - struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_journal *journal = curseg->journal; - unsigned int i, idx; + int i; - down_read(&nm_i->nat_tree_lock); - - for (i = 0; i < nm_i->nat_blocks; i++) { - if (!test_bit_le(i, nm_i->nat_block_bitmap)) - continue; - if (!nm_i->free_nid_count[i]) - continue; - for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { - nid_t nid; - - if (!test_bit_le(idx, nm_i->free_nid_bitmap[i])) - continue; - - nid = i * NAT_ENTRY_PER_BLOCK + idx; - add_free_nid(sbi, nid, true); - - if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS) - goto out; - } - } -out: down_read(&curseg->journal_rwsem); for (i = 0; i < nats_in_cursum(journal); i++) { block_t addr; @@ -1952,14 +1972,43 @@ out: remove_free_nid(sbi, nid); } up_read(&curseg->journal_rwsem); +} + +static void scan_free_nid_bits(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int i, idx; + nid_t nid; + + down_read(&nm_i->nat_tree_lock); + + for (i = 0; i < nm_i->nat_blocks; i++) { + if (!test_bit_le(i, nm_i->nat_block_bitmap)) + continue; + if (!nm_i->free_nid_count[i]) + continue; + for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { + idx = find_next_bit_le(nm_i->free_nid_bitmap[i], + NAT_ENTRY_PER_BLOCK, idx); + if (idx >= NAT_ENTRY_PER_BLOCK) + break; + + nid = i * NAT_ENTRY_PER_BLOCK + idx; + add_free_nid(sbi, nid, true); + + if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS) + goto out; + } + } +out: + scan_curseg_cache(sbi); + up_read(&nm_i->nat_tree_lock); } static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); - struct f2fs_journal *journal = curseg->journal; int i = 0; nid_t nid = nm_i->next_scan_nid; @@ -1967,7 +2016,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) nid = 0; /* Enough entries */ - if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK) + if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) return; if (!sync && !available_free_memory(sbi, FREE_NIDS)) @@ -1977,7 +2026,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) /* try to find free nids in free_nid_bitmap */ scan_free_nid_bits(sbi); - if (nm_i->nid_cnt[FREE_NID_LIST]) + if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) return; } @@ -2005,18 +2054,8 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) nm_i->next_scan_nid = nid; /* find free nids from current sum_pages */ - down_read(&curseg->journal_rwsem); - for (i = 0; i < nats_in_cursum(journal); i++) { - block_t addr; + scan_curseg_cache(sbi); - addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); - nid = le32_to_cpu(nid_in_journal(journal, i)); - if (addr == NULL_ADDR) - add_free_nid(sbi, nid, true); - else - remove_free_nid(sbi, nid); - } - up_read(&curseg->journal_rwsem); up_read(&nm_i->nat_tree_lock); ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), @@ -2054,15 +2093,13 @@ retry: } /* We should not use stale free nids created by build_free_nids */ - if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) { - f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST])); - i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], + if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) { + f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); + i = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); *nid = i->nid; - __remove_nid_from_list(sbi, i, FREE_NID_LIST, true); - i->state = NID_ALLOC; - __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); + __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID); nm_i->available_nids--; update_free_nid_bitmap(sbi, *nid, false, false); @@ -2088,7 +2125,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); f2fs_bug_on(sbi, !i); - __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); + __remove_free_nid(sbi, i, PREALLOC_NID); spin_unlock(&nm_i->nid_list_lock); kmem_cache_free(free_nid_slab, i); @@ -2111,12 +2148,10 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) f2fs_bug_on(sbi, !i); if (!available_free_memory(sbi, FREE_NIDS)) { - __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); + __remove_free_nid(sbi, i, PREALLOC_NID); need_free = true; } else { - __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true); - i->state = NID_NEW; - __insert_nid_to_list(sbi, i, FREE_NID_LIST, false); + __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID); } nm_i->available_nids++; @@ -2135,20 +2170,19 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) struct free_nid *i, *next; int nr = nr_shrink; - if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) + if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) return 0; if (!mutex_trylock(&nm_i->build_lock)) return 0; spin_lock(&nm_i->nid_list_lock); - list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST], - list) { + list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { if (nr_shrink <= 0 || - nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) + nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) break; - __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + __remove_free_nid(sbi, i, FREE_NID); kmem_cache_free(free_nid_slab, i); nr_shrink--; } @@ -2174,8 +2208,8 @@ void recover_inline_xattr(struct inode *inode, struct page *page) goto update_inode; } - dst_addr = inline_xattr_addr(ipage); - src_addr = inline_xattr_addr(page); + dst_addr = inline_xattr_addr(inode, ipage); + src_addr = inline_xattr_addr(inode, page); inline_size = inline_xattr_size(inode); f2fs_wait_on_page_writeback(ipage, NODE, true); @@ -2264,6 +2298,12 @@ retry: dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR); if (dst->i_inline & F2FS_EXTRA_ATTR) { dst->i_extra_isize = src->i_extra_isize; + + if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_inline_xattr_size)) + dst->i_inline_xattr_size = src->i_inline_xattr_size; + if (f2fs_sb_has_project_quota(sbi->sb) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) @@ -2335,8 +2375,8 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) ne = __lookup_nat_cache(nm_i, nid); if (!ne) { - ne = grab_nat_entry(nm_i, nid, true); - node_info_from_raw_nat(&ne->ni, &raw_ne); + ne = __alloc_nat_entry(nid, true); + __init_nat_entry(nm_i, ne, &raw_ne, true); } /* @@ -2382,15 +2422,17 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; struct f2fs_nat_block *nat_blk = page_address(page); int valid = 0; - int i; + int i = 0; if (!enabled_nat_bits(sbi, NULL)) return; - for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) { - if (start_nid == 0 && i == 0) - valid++; - if (nat_blk->entries[i].block_addr) + if (nat_index == 0) { + valid = 1; + i = 1; + } + for (; i < NAT_ENTRY_PER_BLOCK; i++) { + if (nat_blk->entries[i].block_addr != NULL_ADDR) valid++; } if (valid == 0) { @@ -2585,7 +2627,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) __set_bit_le(i, nm_i->nat_block_bitmap); nid = i * NAT_ENTRY_PER_BLOCK; - last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; + last_nid = nid + NAT_ENTRY_PER_BLOCK; spin_lock(&NM_I(sbi)->nid_list_lock); for (; nid < last_nid; nid++) @@ -2620,16 +2662,15 @@ static int init_node_manager(struct f2fs_sb_info *sbi) /* not used nids: 0, node, meta, (and root counted as valid node) */ nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - F2FS_RESERVED_NODE_NUM; - nm_i->nid_cnt[FREE_NID_LIST] = 0; - nm_i->nid_cnt[ALLOC_NID_LIST] = 0; + nm_i->nid_cnt[FREE_NID] = 0; + nm_i->nid_cnt[PREALLOC_NID] = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); - INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]); - INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]); + INIT_LIST_HEAD(&nm_i->free_nid_list); INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); @@ -2721,16 +2762,15 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) /* destroy free nid list */ spin_lock(&nm_i->nid_list_lock); - list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST], - list) { - __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { + __remove_free_nid(sbi, i, FREE_NID); spin_unlock(&nm_i->nid_list_lock); kmem_cache_free(free_nid_slab, i); spin_lock(&nm_i->nid_list_lock); } - f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]); - f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]); - f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST])); + f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]); + f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]); + f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list)); spin_unlock(&nm_i->nid_list_lock); /* destroy nat cache */ diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index bb53e9955ff2..0ee3e5ff49a3 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -140,6 +140,7 @@ enum mem_type { DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ EXTENT_CACHE, /* indicates extent cache */ + INMEM_PAGES, /* indicates inmemory pages */ BASE_CHECK, /* check kernel status */ }; @@ -150,18 +151,10 @@ struct nat_entry_set { unsigned int entry_cnt; /* the # of nat entries in set */ }; -/* - * For free nid mangement - */ -enum nid_state { - NID_NEW, /* newly added to free nid list */ - NID_ALLOC /* it is allocated */ -}; - struct free_nid { struct list_head list; /* for free node id list */ nid_t nid; /* node id */ - int state; /* in use or not: NID_NEW or NID_ALLOC */ + int state; /* in use or not: FREE_NID or PREALLOC_NID */ }; static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) @@ -170,12 +163,11 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *fnid; spin_lock(&nm_i->nid_list_lock); - if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) { + if (nm_i->nid_cnt[FREE_NID] <= 0) { spin_unlock(&nm_i->nid_list_lock); return; } - fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], - struct free_nid, list); + fnid = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); *nid = fnid->nid; spin_unlock(&nm_i->nid_list_lock); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9626758bc762..92c57ace1939 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -594,6 +594,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; +#ifdef CONFIG_QUOTA + int quota_enabled; +#endif if (s_flags & MS_RDONLY) { f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); @@ -604,7 +607,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; /* Turn on quotas so that they are updated correctly */ - f2fs_enable_quota_files(sbi); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", @@ -665,7 +668,8 @@ skip: out: #ifdef CONFIG_QUOTA /* Turn quotas off */ - f2fs_quota_off_umount(sbi->sb); + if (quota_enabled) + f2fs_quota_off_umount(sbi->sb); #endif sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c695ff462ee6..c117e0913f2a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -181,11 +181,12 @@ bool need_SSR(struct f2fs_sb_info *sbi) return true; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + - 2 * reserved_sections(sbi)); + SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } void register_inmem_page(struct inode *inode, struct page *page) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; @@ -204,6 +205,10 @@ void register_inmem_page(struct inode *inode, struct page *page) mutex_lock(&fi->inmem_lock); get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(&fi->inmem_ilist)) + list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); mutex_unlock(&fi->inmem_lock); @@ -262,12 +267,41 @@ next: return err; } +void drop_inmem_pages_all(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; + struct inode *inode; + struct f2fs_inode_info *fi; +next: + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(head)) { + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + return; + } + fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + + if (inode) { + drop_inmem_pages(inode); + iput(inode); + } + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto next; +} + void drop_inmem_pages(struct inode *inode) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); @@ -313,6 +347,7 @@ static int __commit_inmem_pages(struct inode *inode, struct inmem_pages *cur, *tmp; struct f2fs_io_info fio = { .sbi = sbi, + .ino = inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, @@ -398,6 +433,10 @@ int commit_inmem_pages(struct inode *inode) /* drop all uncommitted pages */ __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); } + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_COMMIT); @@ -472,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) static int __submit_flush_wait(struct f2fs_sb_info *sbi, struct block_device *bdev) { - struct bio *bio = f2fs_bio_alloc(0); + struct bio *bio = f2fs_bio_alloc(sbi, 0, true); int ret; bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; @@ -485,15 +524,17 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi, return ret; } -static int submit_flush_wait(struct f2fs_sb_info *sbi) +static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) { - int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); + int ret = 0; int i; - if (!sbi->s_ndevs || ret) - return ret; + if (!sbi->s_ndevs) + return __submit_flush_wait(sbi, sbi->sb->s_bdev); - for (i = 1; i < sbi->s_ndevs; i++) { + for (i = 0; i < sbi->s_ndevs; i++) { + if (!is_dirty_device(sbi, ino, i, FLUSH_INO)) + continue; ret = __submit_flush_wait(sbi, FDEV(i).bdev); if (ret) break; @@ -519,7 +560,9 @@ repeat: fcc->dispatch_list = llist_del_all(&fcc->issue_list); fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); - ret = submit_flush_wait(sbi); + cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode); + + ret = submit_flush_wait(sbi, cmd->ino); atomic_inc(&fcc->issued_flush); llist_for_each_entry_safe(cmd, next, @@ -537,7 +580,7 @@ repeat: goto repeat; } -int f2fs_issue_flush(struct f2fs_sb_info *sbi) +int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; @@ -547,19 +590,20 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return 0; if (!test_opt(sbi, FLUSH_MERGE)) { - ret = submit_flush_wait(sbi); + ret = submit_flush_wait(sbi, ino); atomic_inc(&fcc->issued_flush); return ret; } - if (atomic_inc_return(&fcc->issing_flush) == 1) { - ret = submit_flush_wait(sbi); + if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) { + ret = submit_flush_wait(sbi, ino); atomic_dec(&fcc->issing_flush); atomic_inc(&fcc->issued_flush); return ret; } + cmd.ino = ino; init_completion(&cmd.wait); llist_add(&cmd.llnode, &fcc->issue_list); @@ -583,7 +627,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) } else { struct flush_cmd *tmp, *next; - ret = submit_flush_wait(sbi); + ret = submit_flush_wait(sbi, ino); llist_for_each_entry_safe(tmp, next, list, llnode) { if (tmp == &cmd) { @@ -653,6 +697,28 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) } } +int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) +{ + int ret = 0, i; + + if (!sbi->s_ndevs) + return 0; + + for (i = 1; i < sbi->s_ndevs; i++) { + if (!f2fs_test_bit(i, (char *)&sbi->dirty_device)) + continue; + ret = __submit_flush_wait(sbi, FDEV(i).bdev); + if (ret) + break; + + spin_lock(&sbi->dev_lock); + f2fs_clear_bit(i, (char *)&sbi->dirty_device); + spin_unlock(&sbi->dev_lock); + } + + return ret; +} + static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { @@ -794,6 +860,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); + f2fs_bug_on(sbi, dc->ref); if (dc->error == -EOPNOTSUPP) @@ -845,10 +913,14 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi, /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, - struct discard_cmd *dc) + struct discard_policy *dpolicy, + struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? + &(dcc->fstrim_list) : &(dcc->wait_list); struct bio *bio = NULL; + int flag = dpolicy->sync ? REQ_SYNC : 0; if (dc->state != D_PREP) return; @@ -867,9 +939,9 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, if (bio) { bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; - bio->bi_opf |= REQ_SYNC; + bio->bi_opf |= flag; submit_bio(bio); - list_move_tail(&dc->list, &dcc->wait_list); + list_move_tail(&dc->list, wait_list); __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); f2fs_update_iostat(sbi, FS_DISCARD, 1); @@ -886,7 +958,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, struct rb_node *insert_parent) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct rb_node **p = &dcc->root.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct discard_cmd *dc = NULL; @@ -1054,58 +1126,107 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } -static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) +static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + unsigned int start, unsigned int end) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct discard_cmd *dc; + struct blk_plug plug; + int issued; + +next: + issued = 0; + + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, start, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (!dc) + dc = next_dc; + + blk_start_plug(&plug); + + while (dc && dc->lstart <= end) { + struct rb_node *node; + + if (dc->len < dpolicy->granularity) + goto skip; + + if (dc->state != D_PREP) { + list_move_tail(&dc->list, &dcc->fstrim_list); + goto skip; + } + + __submit_discard_cmd(sbi, dpolicy, dc); + + if (++issued >= dpolicy->max_requests) { + start = dc->lstart + dc->len; + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + + schedule(); + + goto next; + } +skip: + node = rb_next(&dc->rb_node); + dc = rb_entry_safe(node, struct discard_cmd, rb_node); + + if (fatal_signal_pending(current)) + break; + } + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} + +static int __issue_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int iter = 0, issued = 0; - int i; + int i, iter = 0, issued = 0; bool io_interrupted = false; - mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); - blk_start_plug(&plug); - for (i = MAX_PLIST_NUM - 1; - i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + if (i + 1 < dpolicy->granularity) + break; pend_list = &dcc->pend_list[i]; + + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); - /* Hurry up to finish fstrim */ - if (dcc->pend_list_tag[i] & P_TRIM) { - __submit_discard_cmd(sbi, dc); - issued++; - - if (fatal_signal_pending(current)) - break; - continue; - } - - if (!issue_cond) { - __submit_discard_cmd(sbi, dc); - issued++; - continue; - } - - if (is_idle(sbi)) { - __submit_discard_cmd(sbi, dc); - issued++; - } else { + if (dpolicy->io_aware && i < dpolicy->io_aware_gran && + !is_idle(sbi)) { io_interrupted = true; + goto skip; } - if (++iter >= DISCARD_ISSUE_RATE) - goto out; + __submit_discard_cmd(sbi, dpolicy, dc); + issued++; +skip: + if (++iter >= dpolicy->max_requests) + break; } - if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) - dcc->pend_list_tag[i] &= (~P_TRIM); + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + + if (iter >= dpolicy->max_requests) + break; } -out: - blk_finish_plug(&plug); - mutex_unlock(&dcc->cmd_lock); if (!issued && io_interrupted) issued = -1; @@ -1113,12 +1234,13 @@ out: return issued; } -static void __drop_discard_cmd(struct f2fs_sb_info *sbi) +static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; int i; + bool dropped = false; mutex_lock(&dcc->cmd_lock); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { @@ -1126,39 +1248,58 @@ static void __drop_discard_cmd(struct f2fs_sb_info *sbi) list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); __remove_discard_cmd(sbi, dc); + dropped = true; } } mutex_unlock(&dcc->cmd_lock); + + return dropped; } -static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, +static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + unsigned int len = 0; wait_for_completion_io(&dc->wait); mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, dc->state != D_DONE); dc->ref--; - if (!dc->ref) + if (!dc->ref) { + if (!dc->error) + len = dc->len; __remove_discard_cmd(sbi, dc); + } mutex_unlock(&dcc->cmd_lock); + + return len; } -static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + block_t start, block_t end) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->wait_list); + struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? + &(dcc->fstrim_list) : &(dcc->wait_list); struct discard_cmd *dc, *tmp; bool need_wait; + unsigned int trimmed = 0; next: need_wait = false; mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { + if (dc->lstart + dc->len <= start || end <= dc->lstart) + continue; + if (dc->len < dpolicy->granularity) + continue; + if (dc->state == D_DONE && !dc->ref) { wait_for_completion_io(&dc->wait); + if (!dc->error) + trimmed += dc->len; __remove_discard_cmd(sbi, dc); } else { dc->ref++; @@ -1169,9 +1310,17 @@ next: mutex_unlock(&dcc->cmd_lock); if (need_wait) { - __wait_one_discard_bio(sbi, dc); + trimmed += __wait_one_discard_bio(sbi, dc); goto next; } + + return trimmed; +} + +static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) +{ + __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); } /* This should be covered by global mutex, &sit_i->sentry_lock */ @@ -1209,23 +1358,19 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } } -/* This comes from f2fs_put_super and f2fs_trim_fs */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) -{ - __issue_discard_cmd(sbi, false); - __drop_discard_cmd(sbi); - __wait_discard_cmd(sbi, !umount); -} - -static void mark_discard_range_all(struct f2fs_sb_info *sbi) +/* This comes from f2fs_put_super */ +bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - int i; + struct discard_policy dpolicy; + bool dropped; - mutex_lock(&dcc->cmd_lock); - for (i = 0; i < MAX_PLIST_NUM; i++) - dcc->pend_list_tag[i] |= P_TRIM; - mutex_unlock(&dcc->cmd_lock); + init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); + __issue_discard_cmd(sbi, &dpolicy); + dropped = __drop_discard_cmd(sbi); + __wait_all_discard_cmd(sbi, &dpolicy); + + return dropped; } static int issue_discard_thread(void *data) @@ -1233,12 +1378,16 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; + struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; set_freezable(); do { + init_discard_policy(&dpolicy, DPOLICY_BG, + dcc->discard_granularity); + wait_event_interruptible_timeout(*q, kthread_should_stop() || freezing(current) || dcc->discard_wake, @@ -1251,17 +1400,18 @@ static int issue_discard_thread(void *data) if (dcc->discard_wake) { dcc->discard_wake = 0; if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - mark_discard_range_all(sbi); + init_discard_policy(&dpolicy, + DPOLICY_FORCE, 1); } sb_start_intwrite(sbi->sb); - issued = __issue_discard_cmd(sbi, true); + issued = __issue_discard_cmd(sbi, &dpolicy); if (issued) { - __wait_discard_cmd(sbi, true); - wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + __wait_all_discard_cmd(sbi, &dpolicy); + wait_ms = dpolicy.min_interval; } else { - wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; + wait_ms = dpolicy.max_interval; } sb_end_intwrite(sbi->sb); @@ -1525,7 +1675,6 @@ find_next: f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, len); - cpc->trimmed += len; total_len += len; } else { next_pos = find_next_bit_le(entry->discard_map, @@ -1546,6 +1695,37 @@ skip: wake_up_discard_thread(sbi, false); } +void init_discard_policy(struct discard_policy *dpolicy, + int discard_type, unsigned int granularity) +{ + /* common policy */ + dpolicy->type = discard_type; + dpolicy->sync = true; + dpolicy->granularity = granularity; + + if (discard_type == DPOLICY_BG) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = true; + } else if (discard_type == DPOLICY_FORCE) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = true; + } else if (discard_type == DPOLICY_FSTRIM) { + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = false; + } else if (discard_type == DPOLICY_UMOUNT) { + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = false; + } +} + static int create_discard_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; @@ -1563,12 +1743,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; INIT_LIST_HEAD(&dcc->entry_list); - for (i = 0; i < MAX_PLIST_NUM; i++) { + for (i = 0; i < MAX_PLIST_NUM; i++) INIT_LIST_HEAD(&dcc->pend_list[i]); - if (i >= dcc->discard_granularity - 1) - dcc->pend_list_tag[i] |= P_ACTIVE; - } INIT_LIST_HEAD(&dcc->wait_list); + INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->issing_discard, 0); @@ -1716,16 +1894,6 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) get_sec_entry(sbi, segno)->valid_blocks += del; } -void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new) -{ - update_sit_entry(sbi, new, 1); - if (GET_SEGNO(sbi, old) != NULL_SEGNO) - update_sit_entry(sbi, old, -1); - - locate_dirty_segment(sbi, GET_SEGNO(sbi, old)); - locate_dirty_segment(sbi, GET_SEGNO(sbi, new)); -} - void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) { unsigned int segno = GET_SEGNO(sbi, addr); @@ -1736,14 +1904,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) return; /* add it into sit main buffer */ - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); update_sit_entry(sbi, addr, -1); /* add it into dirty seglist */ locate_dirty_segment(sbi, segno); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); } bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) @@ -1756,7 +1924,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) return true; - mutex_lock(&sit_i->sentry_lock); + down_read(&sit_i->sentry_lock); segno = GET_SEGNO(sbi, blkaddr); se = get_seg_entry(sbi, segno); @@ -1765,7 +1933,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) if (f2fs_test_bit(offset, se->ckpt_valid_map)) is_cp = true; - mutex_unlock(&sit_i->sentry_lock); + up_read(&sit_i->sentry_lock); return is_cp; } @@ -1823,12 +1991,8 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { struct page *page = grab_meta_page(sbi, blk_addr); - void *dst = page_address(page); - if (src) - memcpy(dst, src, PAGE_SIZE); - else - memset(dst, 0, PAGE_SIZE); + memcpy(page_address(page), src, PAGE_SIZE); set_page_dirty(page); f2fs_put_page(page, 1); } @@ -1927,7 +2091,6 @@ find_other_zone: } secno = left_start; skip_left: - hint = secno; segno = GET_SEG_FROM_SEC(sbi, secno); zoneno = GET_ZONE_FROM_SEC(sbi, secno); @@ -2162,12 +2325,16 @@ void allocate_new_segments(struct f2fs_sb_info *sbi) unsigned int old_segno; int i; + down_write(&SIT_I(sbi)->sentry_lock); + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { curseg = CURSEG_I(sbi, i); old_segno = curseg->segno; SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); locate_dirty_segment(sbi, old_segno); } + + up_write(&SIT_I(sbi)->sentry_lock); } static const struct segment_allocation default_salloc_ops = { @@ -2179,14 +2346,14 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) __u64 trim_start = cpc->trim_start; bool has_candidate = false; - mutex_lock(&SIT_I(sbi)->sentry_lock); + down_write(&SIT_I(sbi)->sentry_lock); for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { if (add_discard_addrs(sbi, cpc, true)) { has_candidate = true; break; } } - mutex_unlock(&SIT_I(sbi)->sentry_lock); + up_write(&SIT_I(sbi)->sentry_lock); cpc->trim_start = trim_start; return has_candidate; @@ -2196,14 +2363,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; - unsigned int start_segno, end_segno; + unsigned int start_segno, end_segno, cur_segno; + block_t start_block, end_block; struct cp_control cpc; + struct discard_policy dpolicy; + unsigned long long trimmed = 0; int err = 0; if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; - cpc.trimmed = 0; if (end <= MAIN_BLKADDR(sbi)) goto out; @@ -2217,12 +2386,14 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); + cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); /* do checkpoint to issue discard commands safely */ - for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { - cpc.trim_start = start_segno; + for (cur_segno = start_segno; cur_segno <= end_segno; + cur_segno = cpc.trim_end + 1) { + cpc.trim_start = cur_segno; if (sbi->discard_blks == 0) break; @@ -2230,7 +2401,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_end = end_segno; else cpc.trim_end = min_t(unsigned int, - rounddown(start_segno + + rounddown(cur_segno + BATCHED_TRIM_SEGMENTS(sbi), sbi->segs_per_sec) - 1, end_segno); @@ -2242,11 +2413,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) schedule(); } - /* It's time to issue all the filed discards */ - mark_discard_range_all(sbi); - f2fs_wait_discard_bios(sbi, false); + + start_block = START_BLOCK(sbi, start_segno); + end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1); + + init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); + __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); + trimmed = __wait_discard_cmd_range(sbi, &dpolicy, + start_block, end_block); out: - range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); + range->len = F2FS_BLK_TO_BYTES(trimmed); return err; } @@ -2258,6 +2434,18 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } +int rw_hint_to_seg_type(enum rw_hint hint) +{ + switch (hint) { + case WRITE_LIFE_SHORT: + return CURSEG_HOT_DATA; + case WRITE_LIFE_EXTREME: + return CURSEG_COLD_DATA; + default: + return CURSEG_WARM_DATA; + } +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -2292,7 +2480,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) return CURSEG_COLD_DATA; if (is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; - return CURSEG_WARM_DATA; + return rw_hint_to_seg_type(inode->i_write_hint); } else { if (IS_DNODE(fio->page)) return is_cold_node(fio->page) ? CURSEG_WARM_NODE : @@ -2336,8 +2524,10 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); + down_read(&SM_I(sbi)->curseg_lock); + mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); @@ -2354,15 +2544,26 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_block_count(sbi, curseg); + /* + * SIT information should be updated before segment allocation, + * since SSR needs latest valid block information. + */ + update_sit_entry(sbi, *new_blkaddr, 1); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + update_sit_entry(sbi, old_blkaddr, -1); + if (!__has_curseg_space(sbi, type)) sit_i->s_ops->allocate_segment(sbi, type, false); - /* - * SIT information should be updated after segment allocation, - * since we need to keep dirty segments precisely under SSR. - */ - refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); - mutex_unlock(&sit_i->sentry_lock); + /* + * segment dirty status should be updated after segment allocation, + * so we just need to update status only one time after previous + * segment being closed. + */ + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); + + up_write(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); @@ -2382,6 +2583,29 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } mutex_unlock(&curseg->curseg_mutex); + + up_read(&SM_I(sbi)->curseg_lock); +} + +static void update_device_state(struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = fio->sbi; + unsigned int devidx; + + if (!sbi->s_ndevs) + return; + + devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); + + /* update device state for fsync */ + set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); + + /* update device state for checkpoint */ + if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { + spin_lock(&sbi->dev_lock); + f2fs_set_bit(devidx, (char *)&sbi->dirty_device); + spin_unlock(&sbi->dev_lock); + } } static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) @@ -2398,6 +2622,8 @@ reallocate: if (err == -EAGAIN) { fio->old_blkaddr = fio->new_blkaddr; goto reallocate; + } else if (!err) { + update_device_state(fio); } } @@ -2458,12 +2684,26 @@ int rewrite_data_page(struct f2fs_io_info *fio) stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); + if (!err) + update_device_state(fio); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); return err; } +static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (CURSEG_I(sbi, i)->segno == segno) + break; + } + return i; +} + void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr) @@ -2479,6 +2719,8 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, se = get_seg_entry(sbi, segno); type = se->type; + down_write(&SM_I(sbi)->curseg_lock); + if (!recover_curseg) { /* for recovery flow */ if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { @@ -2488,14 +2730,19 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, type = CURSEG_WARM_DATA; } } else { - if (!IS_CURSEG(sbi, segno)) + if (IS_CURSEG(sbi, segno)) { + /* se->type is volatile as SSR allocation */ + type = __f2fs_get_curseg(sbi, segno); + f2fs_bug_on(sbi, type == NO_CHECK_TYPE); + } else { type = CURSEG_WARM_DATA; + } } curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); old_cursegno = curseg->segno; old_blkoff = curseg->next_blkoff; @@ -2527,8 +2774,9 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, curseg->next_blkoff = old_blkoff; } - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); + up_write(&SM_I(sbi)->curseg_lock); } void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, @@ -2982,7 +3230,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) bool to_journal = true; struct seg_entry *se; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); if (!sit_i->dirty_sentries) goto out; @@ -3076,7 +3324,7 @@ out: cpc->trim_start = trim_start; } - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); set_prefree_as_free_segments(sbi); } @@ -3169,7 +3417,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); sit_i->mounted_time = ktime_get_real_seconds(); - mutex_init(&sit_i->sentry_lock); + init_rwsem(&sit_i->sentry_lock); return 0; } @@ -3410,7 +3658,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) struct sit_info *sit_i = SIT_I(sbi); unsigned int segno; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); sit_i->min_mtime = LLONG_MAX; @@ -3427,7 +3675,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) sit_i->min_mtime = mtime; } sit_i->max_mtime = get_mtime(sbi); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); } int build_segment_manager(struct f2fs_sb_info *sbi) @@ -3460,11 +3708,14 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; + sm_info->min_ssr_sections = reserved_sections(sbi); sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; INIT_LIST_HEAD(&sm_info->sit_entry_set); + init_rwsem(&sm_info->curseg_lock); + if (!f2fs_readonly(sbi->sb)) { err = create_flush_cmd_control(sbi); if (err) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index e0a6cc23ace3..d1d394cdf61d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -231,7 +231,7 @@ struct sit_info { unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ unsigned int dirty_sentries; /* # of dirty sentries */ unsigned int sents_per_block; /* # of SIT entries per block */ - struct mutex sentry_lock; /* to protect SIT cache */ + struct rw_semaphore sentry_lock; /* to protect SIT cache */ struct seg_entry *sentries; /* SIT segment-level cache */ struct sec_entry *sec_entries; /* SIT section-level cache */ @@ -497,6 +497,33 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); } +static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi) +{ + unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) + + get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int segno, left_blocks; + int i; + + /* check current node segment */ + for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { + segno = CURSEG_I(sbi, i)->segno; + left_blocks = sbi->blocks_per_seg - + get_seg_entry(sbi, segno)->ckpt_valid_blocks; + + if (node_blocks > left_blocks) + return false; + } + + /* check current data segment */ + segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; + left_blocks = sbi->blocks_per_seg - + get_seg_entry(sbi, segno)->ckpt_valid_blocks; + if (dent_blocks > left_blocks) + return false; + return true; +} + static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed, int needed) { @@ -507,6 +534,9 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; + if (free_sections(sbi) + freed == reserved_sections(sbi) + needed && + has_curseg_enough_space(sbi)) + return false; return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + imeta_secs + reserved_sections(sbi) + needed); @@ -731,7 +761,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, unsigned int secno) { - if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >= + if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) > sbi->fggc_threshold) return true; return false; @@ -796,8 +826,9 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) goto wake_up; mutex_lock(&dcc->cmd_lock); - for (i = MAX_PLIST_NUM - 1; - i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + if (i + 1 < dcc->discard_granularity) + break; if (!list_empty(&dcc->pend_list[i])) { wakeup = true; break; diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 5c60fc28ec75..0b5664a1a6cc 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -28,7 +28,7 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) { - long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS; + long count = NM_I(sbi)->nid_cnt[FREE_NID] - MAX_FREE_NIDS; return count > 0 ? count : 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 97e03c637e90..a6c5dd450002 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -44,6 +44,8 @@ static struct kmem_cache *f2fs_inode_cachep; char *fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", [FAULT_PAGE_ALLOC] = "page alloc", + [FAULT_PAGE_GET] = "page get", + [FAULT_ALLOC_BIO] = "alloc bio", [FAULT_ALLOC_NID] = "alloc nid", [FAULT_ORPHAN] = "orphan", [FAULT_BLOCK] = "no more block", @@ -92,6 +94,7 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_noinline_xattr, + Opt_inline_xattr_size, Opt_inline_data, Opt_inline_dentry, Opt_noinline_dentry, @@ -141,6 +144,7 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_noinline_xattr, "noinline_xattr"}, + {Opt_inline_xattr_size, "inline_xattr_size=%u"}, {Opt_inline_data, "inline_data"}, {Opt_inline_dentry, "inline_dentry"}, {Opt_noinline_dentry, "noinline_dentry"}, @@ -209,6 +213,12 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quota options when quota turned on"); return -EINVAL; } + if (f2fs_sb_has_quota_ino(sb)) { + f2fs_msg(sb, KERN_INFO, + "QUOTA feature is enabled, so ignore qf_name"); + return 0; + } + qname = match_strdup(args); if (!qname) { f2fs_msg(sb, KERN_ERR, @@ -287,6 +297,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) return -1; } } + + if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) { + f2fs_msg(sbi->sb, KERN_INFO, + "QUOTA feature is enabled, so ignore jquota_fmt"); + sbi->s_jquota_fmt = 0; + } + if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) { + f2fs_msg(sbi->sb, KERN_INFO, + "Filesystem with quota feature cannot be mounted RDWR " + "without CONFIG_QUOTA"); + return -1; + } return 0; } #endif @@ -383,6 +405,12 @@ static int parse_options(struct super_block *sb, char *options) case Opt_noinline_xattr: clear_opt(sbi, INLINE_XATTR); break; + case Opt_inline_xattr_size: + if (args->from && match_int(args, &arg)) + return -EINVAL; + set_opt(sbi, INLINE_XATTR_SIZE); + sbi->inline_xattr_size = arg; + break; #else case Opt_user_xattr: f2fs_msg(sb, KERN_INFO, @@ -604,6 +632,24 @@ static int parse_options(struct super_block *sb, char *options) F2FS_IO_SIZE_KB(sbi)); return -EINVAL; } + + if (test_opt(sbi, INLINE_XATTR_SIZE)) { + if (!test_opt(sbi, INLINE_XATTR)) { + f2fs_msg(sb, KERN_ERR, + "inline_xattr_size option should be " + "set with inline_xattr option"); + return -EINVAL; + } + if (!sbi->inline_xattr_size || + sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE - + F2FS_TOTAL_EXTRA_ATTR_SIZE - + DEF_INLINE_RESERVED_SIZE - + DEF_MIN_INLINE_SIZE) { + f2fs_msg(sb, KERN_ERR, + "inline xattr size is out of range"); + return -EINVAL; + } + } return 0; } @@ -618,13 +664,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_once((void *) fi); /* Initialize f2fs-specific inode info */ - fi->vfs_inode.i_version = 1; atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; fi->i_advise = 0; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); + INIT_LIST_HEAD(&fi->inmem_ilist); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); init_rwsem(&fi->dio_rwsem[READ]); @@ -673,7 +719,6 @@ static int f2fs_drop_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); - fscrypt_put_encryption_info(inode, NULL); spin_lock(&inode->i_lock); atomic_dec(&inode->i_count); } @@ -781,6 +826,7 @@ static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); int i; + bool dropped; f2fs_quota_off_umount(sb); @@ -801,9 +847,9 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - f2fs_wait_discard_bios(sbi, true); + dropped = f2fs_wait_discard_bios(sbi); - if (f2fs_discard_en(sbi) && !sbi->discard_blks) { + if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; @@ -858,6 +904,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) struct f2fs_sb_info *sbi = F2FS_SB(sb); int err = 0; + if (unlikely(f2fs_cp_error(sbi))) + return 0; + trace_f2fs_sync_fs(sb, sync); if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) @@ -957,7 +1006,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi) - - sbi->reserved_blocks; + sbi->current_reserved_blocks; avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; @@ -1046,6 +1095,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",inline_xattr"); else seq_puts(seq, ",noinline_xattr"); + if (test_opt(sbi, INLINE_XATTR_SIZE)) + seq_printf(seq, ",inline_xattr_size=%u", + sbi->inline_xattr_size); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) @@ -1108,6 +1160,7 @@ static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ sbi->active_logs = NR_CURSEG_TYPE; + sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1136,6 +1189,9 @@ static void default_options(struct f2fs_sb_info *sbi) #endif } +#ifdef CONFIG_QUOTA +static int f2fs_enable_quotas(struct super_block *sb); +#endif static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -1202,6 +1258,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) goto skip; +#ifdef CONFIG_QUOTA if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { err = dquot_suspend(sb, -1); if (err < 0) @@ -1209,9 +1266,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } else { /* dquot_resume needs RW */ sb->s_flags &= ~MS_RDONLY; - dquot_resume(sb, -1); + if (sb_any_quota_suspended(sb)) { + dquot_resume(sb, -1); + } else if (f2fs_sb_has_quota_ino(sb)) { + err = f2fs_enable_quotas(sb); + if (err) + goto restore_opts; + } } - +#endif /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; @@ -1320,8 +1383,13 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); repeat: page = read_mapping_page(mapping, blkidx, NULL); - if (IS_ERR(page)) + if (IS_ERR(page)) { + if (PTR_ERR(page) == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto repeat; + } return PTR_ERR(page); + } lock_page(page); @@ -1364,11 +1432,16 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, while (towrite > 0) { tocopy = min_t(unsigned long, sb->s_blocksize - offset, towrite); - +retry: err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, &page, NULL); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } break; + } kaddr = kmap_atomic(page); memcpy(kaddr + offset, data, tocopy); @@ -1385,8 +1458,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, } if (len == towrite) - return 0; - inode->i_version++; + return err; inode->i_mtime = inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, false); return len - towrite; @@ -1408,19 +1480,91 @@ static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) sbi->s_jquota_fmt, type); } -void f2fs_enable_quota_files(struct f2fs_sb_info *sbi) +int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) { - int i, ret; + int enabled = 0; + int i, err; + + if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) { + err = f2fs_enable_quotas(sbi->sb); + if (err) { + f2fs_msg(sbi->sb, KERN_ERR, + "Cannot turn on quota_ino: %d", err); + return 0; + } + return 1; + } for (i = 0; i < MAXQUOTAS; i++) { if (sbi->s_qf_names[i]) { - ret = f2fs_quota_on_mount(sbi, i); - if (ret < 0) - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on journaled " - "quota: error %d", ret); + err = f2fs_quota_on_mount(sbi, i); + if (!err) { + enabled = 1; + continue; + } + f2fs_msg(sbi->sb, KERN_ERR, + "Cannot turn on quotas: %d on %d", err, i); } } + return enabled; +} + +static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, + unsigned int flags) +{ + struct inode *qf_inode; + unsigned long qf_inum; + int err; + + BUG_ON(!f2fs_sb_has_quota_ino(sb)); + + qf_inum = f2fs_qf_ino(sb, type); + if (!qf_inum) + return -EPERM; + + qf_inode = f2fs_iget(sb, qf_inum); + if (IS_ERR(qf_inode)) { + f2fs_msg(sb, KERN_ERR, + "Bad quota inode %u:%lu", type, qf_inum); + return PTR_ERR(qf_inode); + } + + /* Don't account quota for quota files to avoid recursion */ + qf_inode->i_flags |= S_NOQUOTA; + err = dquot_enable(qf_inode, type, format_id, flags); + iput(qf_inode); + return err; +} + +static int f2fs_enable_quotas(struct super_block *sb) +{ + int type, err = 0; + unsigned long qf_inum; + bool quota_mopt[MAXQUOTAS] = { + test_opt(F2FS_SB(sb), USRQUOTA), + test_opt(F2FS_SB(sb), GRPQUOTA), + test_opt(F2FS_SB(sb), PRJQUOTA), + }; + + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY; + for (type = 0; type < MAXQUOTAS; type++) { + qf_inum = f2fs_qf_ino(sb, type); + if (qf_inum) { + err = f2fs_quota_enable(sb, type, QFMT_VFS_V1, + DQUOT_USAGE_ENABLED | + (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Failed to enable quota tracking " + "(type=%d, err=%d). Please run " + "fsck to fix.", type, err); + for (type--; type >= 0; type--) + dquot_quota_off(sb, type); + return err; + } + } + } + return 0; } static int f2fs_quota_sync(struct super_block *sb, int type) @@ -1491,7 +1635,7 @@ static int f2fs_quota_off(struct super_block *sb, int type) f2fs_quota_sync(sb, type); err = dquot_quota_off(sb, type); - if (err) + if (err || f2fs_sb_has_quota_ino(sb)) goto out_put; inode_lock(inode); @@ -1651,7 +1795,7 @@ static loff_t max_file_blocks(void) /* * note: previously, result is equal to (DEF_ADDRS_PER_INODE - - * F2FS_INLINE_XATTR_ADDRS), but now f2fs try to reserve more + * DEFAULT_INLINE_XATTR_ADDRS), but now f2fs try to reserve more * space in inode.i_addr, it will be more safe to reassign * result as zero. */ @@ -1960,6 +2104,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) for (j = HOT; j < NR_TEMP_TYPE; j++) mutex_init(&sbi->wio_mutex[i][j]); spin_lock_init(&sbi->cp_lock); + + sbi->dirty_device = 0; + spin_lock_init(&sbi->dev_lock); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -2310,7 +2457,10 @@ try_onemore: #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; - sb->s_qcop = &f2fs_quotactl_ops; + if (f2fs_sb_has_quota_ino(sb)) + sb->s_qcop = &dquot_quotactl_sysfile_ops; + else + sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif @@ -2408,6 +2558,7 @@ try_onemore: le64_to_cpu(sbi->ckpt->valid_block_count); sbi->last_valid_block_count = sbi->total_valid_block_count; sbi->reserved_blocks = 0; + sbi->current_reserved_blocks = 0; for (i = 0; i < NR_INODE_TYPE; i++) { INIT_LIST_HEAD(&sbi->inode_list[i]); @@ -2482,10 +2633,24 @@ try_onemore: if (err) goto free_root_inode; +#ifdef CONFIG_QUOTA + /* + * Turn on quotas which were not enabled for read-only mounts if + * filesystem has quota feature, so that they are updated correctly. + */ + if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) { + err = f2fs_enable_quotas(sb); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Cannot turn on quotas: error %d", err); + goto free_sysfs; + } + } +#endif /* if there are nt orphan nodes free them */ err = recover_orphan_inodes(sbi); if (err) - goto free_sysfs; + goto free_meta; /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -2519,7 +2684,7 @@ try_onemore: err = -EINVAL; f2fs_msg(sb, KERN_ERR, "Need to recover fsync data"); - goto free_sysfs; + goto free_meta; } } skip_recovery: @@ -2553,6 +2718,10 @@ skip_recovery: return 0; free_meta: +#ifdef CONFIG_QUOTA + if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) + f2fs_quota_off_umount(sbi->sb); +#endif f2fs_sync_inode_meta(sbi); /* * Some dirty meta pages can be produced by recover_orphan_inodes() @@ -2561,7 +2730,9 @@ free_meta: * falls into an infinite loop in sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); +#ifdef CONFIG_QUOTA free_sysfs: +#endif f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e2c258f717cd..9835348b6e5d 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -30,7 +30,7 @@ enum { FAULT_INFO_RATE, /* struct f2fs_fault_info */ FAULT_INFO_TYPE, /* struct f2fs_fault_info */ #endif - RESERVED_BLOCKS, + RESERVED_BLOCKS, /* struct f2fs_sb_info */ }; struct f2fs_attr { @@ -63,6 +63,13 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) return NULL; } +static ssize_t dirty_segments_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)(dirty_segments(sbi))); +} + static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -100,10 +107,22 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_inode_chksum(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); + if (f2fs_sb_has_flexible_inline_xattr(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "flexible_inline_xattr"); + if (f2fs_sb_has_quota_ino(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "quota_ino"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } +static ssize_t current_reserved_blocks_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks); +} + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -143,34 +162,22 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); - if ((unsigned long)sbi->total_valid_block_count + t > - (unsigned long)sbi->user_block_count) { + if (t > (unsigned long)sbi->user_block_count) { spin_unlock(&sbi->stat_lock); return -EINVAL; } *ui = t; + sbi->current_reserved_blocks = min(sbi->reserved_blocks, + sbi->user_block_count - valid_user_blocks(sbi)); spin_unlock(&sbi->stat_lock); return count; } if (!strcmp(a->attr.name, "discard_granularity")) { - struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - int i; - if (t == 0 || t > MAX_PLIST_NUM) return -EINVAL; if (t == *ui) return count; - - mutex_lock(&dcc->cmd_lock); - for (i = 0; i < MAX_PLIST_NUM; i++) { - if (i >= t - 1) - dcc->pend_list_tag[i] |= P_ACTIVE; - else - dcc->pend_list_tag[i] &= (~P_ACTIVE); - } - mutex_unlock(&dcc->cmd_lock); - *ui = t; return count; } @@ -222,6 +229,8 @@ enum feat_id { FEAT_EXTRA_ATTR, FEAT_PROJECT_QUOTA, FEAT_INODE_CHECKSUM, + FEAT_FLEXIBLE_INLINE_XATTR, + FEAT_QUOTA_INO, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -234,6 +243,8 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_EXTRA_ATTR: case FEAT_PROJECT_QUOTA: case FEAT_INODE_CHECKSUM: + case FEAT_FLEXIBLE_INLINE_XATTR: + case FEAT_QUOTA_INO: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -279,6 +290,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); @@ -291,8 +303,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif +F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); +F2FS_GENERAL_RO_ATTR(current_reserved_blocks); #ifdef CONFIG_F2FS_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -304,6 +318,8 @@ F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE); F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR); F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA); F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); +F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); +F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -321,6 +337,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), ATTR_LIST(min_hot_blocks), + ATTR_LIST(min_ssr_sections), ATTR_LIST(max_victim_search), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), @@ -333,9 +350,11 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_rate), ATTR_LIST(inject_type), #endif + ATTR_LIST(dirty_segments), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), + ATTR_LIST(current_reserved_blocks), NULL, }; @@ -350,6 +369,8 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(extra_attr), ATTR_LIST(project_quota), ATTR_LIST(inode_checksum), + ATTR_LIST(flexible_inline_xattr), + ATTR_LIST(quota_ino), NULL, }; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 7c65540148f8..ec8961ef8cac 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -217,12 +217,12 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, return entry; } -static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr, - void **last_addr, int index, - size_t len, const char *name) +static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, + void *base_addr, void **last_addr, int index, + size_t len, const char *name) { struct f2fs_xattr_entry *entry; - unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2; + unsigned int inline_size = inline_xattr_size(inode); list_for_each_xattr(entry, base_addr) { if ((void *)entry + sizeof(__u32) > base_addr + inline_size || @@ -241,12 +241,54 @@ static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr, return entry; } +static int read_inline_xattr(struct inode *inode, struct page *ipage, + void *txattr_addr) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int inline_size = inline_xattr_size(inode); + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(inode, ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + inline_addr = inline_xattr_addr(inode, page); + } + memcpy(txattr_addr, inline_addr, inline_size); + f2fs_put_page(page, 1); + + return 0; +} + +static int read_xattr_block(struct inode *inode, void *txattr_addr) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + nid_t xnid = F2FS_I(inode)->i_xattr_nid; + unsigned int inline_size = inline_xattr_size(inode); + struct page *xpage; + void *xattr_addr; + + /* The inode already has an extended attribute block. */ + xpage = get_node_page(sbi, xnid); + if (IS_ERR(xpage)) + return PTR_ERR(xpage); + + xattr_addr = page_address(xpage); + memcpy(txattr_addr + inline_size, xattr_addr, VALID_XATTR_BLOCK_SIZE); + f2fs_put_page(xpage, 1); + + return 0; +} + static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, void **base_addr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); void *cur_addr, *txattr_addr, *last_addr = NULL; nid_t xnid = F2FS_I(inode)->i_xattr_nid; unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0; @@ -263,23 +305,11 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, /* read from inline xattr */ if (inline_size) { - struct page *page = NULL; - void *inline_addr; + err = read_inline_xattr(inode, ipage, txattr_addr); + if (err) + goto out; - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out; - } - inline_addr = inline_xattr_addr(page); - } - memcpy(txattr_addr, inline_addr, inline_size); - f2fs_put_page(page, 1); - - *xe = __find_inline_xattr(txattr_addr, &last_addr, + *xe = __find_inline_xattr(inode, txattr_addr, &last_addr, index, len, name); if (*xe) goto check; @@ -287,19 +317,9 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, /* read from xattr node block */ if (xnid) { - struct page *xpage; - void *xattr_addr; - - /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, xnid); - if (IS_ERR(xpage)) { - err = PTR_ERR(xpage); + err = read_xattr_block(inode, txattr_addr); + if (err) goto out; - } - - xattr_addr = page_address(xpage); - memcpy(txattr_addr + inline_size, xattr_addr, size); - f2fs_put_page(xpage, 1); } if (last_addr) @@ -324,7 +344,6 @@ out: static int read_all_xattrs(struct inode *inode, struct page *ipage, void **base_addr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_header *header; nid_t xnid = F2FS_I(inode)->i_xattr_nid; unsigned int size = VALID_XATTR_BLOCK_SIZE; @@ -339,38 +358,16 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, /* read from inline xattr */ if (inline_size) { - struct page *page = NULL; - void *inline_addr; - - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; - } - inline_addr = inline_xattr_addr(page); - } - memcpy(txattr_addr, inline_addr, inline_size); - f2fs_put_page(page, 1); + err = read_inline_xattr(inode, ipage, txattr_addr); + if (err) + goto fail; } /* read from xattr node block */ if (xnid) { - struct page *xpage; - void *xattr_addr; - - /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, xnid); - if (IS_ERR(xpage)) { - err = PTR_ERR(xpage); + err = read_xattr_block(inode, txattr_addr); + if (err) goto fail; - } - - xattr_addr = page_address(xpage); - memcpy(txattr_addr + inline_size, xattr_addr, size); - f2fs_put_page(xpage, 1); } header = XATTR_HDR(txattr_addr); @@ -392,10 +389,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t inline_size = inline_xattr_size(inode); + struct page *in_page = NULL; void *xattr_addr; + void *inline_addr = NULL; struct page *xpage; nid_t new_nid = 0; - int err; + int err = 0; if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) if (!alloc_nid(sbi, &new_nid)) @@ -403,30 +402,30 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, /* write to inline xattr */ if (inline_size) { - struct page *page = NULL; - void *inline_addr; - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); - set_page_dirty(ipage); + inline_addr = inline_xattr_addr(inode, ipage); } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { + in_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(in_page)) { alloc_nid_failed(sbi, new_nid); - return PTR_ERR(page); + return PTR_ERR(in_page); } - inline_addr = inline_xattr_addr(page); - f2fs_wait_on_page_writeback(page, NODE, true); + inline_addr = inline_xattr_addr(inode, in_page); } - memcpy(inline_addr, txattr_addr, inline_size); - f2fs_put_page(page, 1); + f2fs_wait_on_page_writeback(ipage ? ipage : in_page, + NODE, true); /* no need to use xattr node block */ if (hsize <= inline_size) { - err = truncate_xattr_node(inode, ipage); + err = truncate_xattr_node(inode); alloc_nid_failed(sbi, new_nid); - return err; + if (err) { + f2fs_put_page(in_page, 1); + return err; + } + memcpy(inline_addr, txattr_addr, inline_size); + set_page_dirty(ipage ? ipage : in_page); + goto in_page_out; } } @@ -435,7 +434,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); if (IS_ERR(xpage)) { alloc_nid_failed(sbi, new_nid); - return PTR_ERR(xpage); + goto in_page_out; } f2fs_bug_on(sbi, new_nid); f2fs_wait_on_page_writeback(xpage, NODE, true); @@ -445,17 +444,24 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, xpage = new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(xpage)) { alloc_nid_failed(sbi, new_nid); - return PTR_ERR(xpage); + goto in_page_out; } alloc_nid_done(sbi, new_nid); } - xattr_addr = page_address(xpage); - memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); - set_page_dirty(xpage); - f2fs_put_page(xpage, 1); - return 0; + if (inline_size) + memcpy(inline_addr, txattr_addr, inline_size); + memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); + + if (inline_size) + set_page_dirty(ipage ? ipage : in_page); + set_page_dirty(xpage); + + f2fs_put_page(xpage, 1); +in_page_out: + f2fs_put_page(in_page, 1); + return err; } int f2fs_getxattr(struct inode *inode, int index, const char *name, @@ -681,6 +687,10 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; + err = dquot_initialize(inode); + if (err) + return err; + /* this case is only from init_inode_metadata */ if (ipage) return __f2fs_setxattr(inode, index, name, value, diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 2a0c453d7235..43e98d30d2df 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -36,6 +36,8 @@ #define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) #define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) +#define F2FS_MAX_QUOTAS 3 + #define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ #define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ @@ -108,7 +110,8 @@ struct f2fs_super_block { __u8 encryption_level; /* versioning level for encryption */ __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ struct f2fs_device devs[MAX_DEVICES]; /* device list */ - __u8 reserved[327]; /* valid reserved region */ + __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ + __u8 reserved[315]; /* valid reserved region */ } __packed; /* @@ -184,7 +187,8 @@ struct f2fs_extent { } __packed; #define F2FS_NAME_LEN 255 -#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ +/* 200 bytes for inline xattrs by default */ +#define DEFAULT_INLINE_XATTR_ADDRS 50 #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) @@ -238,7 +242,7 @@ struct f2fs_inode { union { struct { __le16 i_extra_isize; /* extra inode attribute size */ - __le16 i_padding; /* padding */ + __le16 i_inline_xattr_size; /* inline xattr size, unit: 4 bytes */ __le32 i_projid; /* project id */ __le32 i_inode_checksum;/* inode meta checksum */ __le32 i_extra_end[0]; /* for attribute size calculation */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 7ab40491485b..8f8dd42fa57b 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -137,6 +137,18 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { CP_UMOUNT, "Umount" }, \ { CP_TRIMMED, "Trimmed" }) +#define show_fsync_cpreason(type) \ + __print_symbolic(type, \ + { CP_NO_NEEDED, "no needed" }, \ + { CP_NON_REGULAR, "non regular" }, \ + { CP_HARDLINK, "hardlink" }, \ + { CP_SB_NEED_CP, "sb needs cp" }, \ + { CP_WRONG_PINO, "wrong pino" }, \ + { CP_NO_SPC_ROLL, "no space roll forward" }, \ + { CP_NODE_NEED_CP, "node needs cp" }, \ + { CP_FASTBOOT_MODE, "fastboot mode" }, \ + { CP_SPEC_LOG_NUM, "log type is 2" }) + struct victim_sel_policy; struct f2fs_map_blocks; @@ -211,14 +223,14 @@ DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, TRACE_EVENT(f2fs_sync_file_exit, - TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret), + TP_PROTO(struct inode *inode, int cp_reason, int datasync, int ret), - TP_ARGS(inode, need_cp, datasync, ret), + TP_ARGS(inode, cp_reason, datasync, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(int, need_cp) + __field(int, cp_reason) __field(int, datasync) __field(int, ret) ), @@ -226,15 +238,15 @@ TRACE_EVENT(f2fs_sync_file_exit, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->need_cp = need_cp; + __entry->cp_reason = cp_reason; __entry->datasync = datasync; __entry->ret = ret; ), - TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, " + TP_printk("dev = (%d,%d), ino = %lu, cp_reason: %s, " "datasync = %d, ret = %d", show_dev_ino(__entry), - __entry->need_cp ? "needed" : "not needed", + show_fsync_cpreason(__entry->cp_reason), __entry->datasync, __entry->ret) ); @@ -729,6 +741,91 @@ TRACE_EVENT(f2fs_get_victim, __entry->free) ); +TRACE_EVENT(f2fs_lookup_start, + + TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(const char *, name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->name = dentry->d_name.name; + __entry->flags = flags; + ), + + TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u", + show_dev_ino(__entry), + __entry->name, + __entry->flags) +); + +TRACE_EVENT(f2fs_lookup_end, + + TP_PROTO(struct inode *dir, struct dentry *dentry, nid_t ino, + int err), + + TP_ARGS(dir, dentry, ino, err), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(const char *, name) + __field(nid_t, cino) + __field(int, err) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->name = dentry->d_name.name; + __entry->cino = ino; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d", + show_dev_ino(__entry), + __entry->name, + __entry->cino, + __entry->err) +); + +TRACE_EVENT(f2fs_readdir, + + TP_PROTO(struct inode *dir, loff_t start_pos, loff_t end_pos, int err), + + TP_ARGS(dir, start_pos, end_pos, err), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, start) + __field(loff_t, end) + __field(int, err) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->start = start_pos; + __entry->end = end_pos; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), ino = %lu, start_pos:%llu, end_pos:%llu, err:%d", + show_dev_ino(__entry), + __entry->start, + __entry->end, + __entry->err) +); + TRACE_EVENT(f2fs_fallocate, TP_PROTO(struct inode *inode, int mode, @@ -1287,6 +1384,13 @@ DEFINE_EVENT(f2fs_discard, f2fs_issue_discard, TP_ARGS(dev, blkstart, blklen) ); +DEFINE_EVENT(f2fs_discard, f2fs_remove_discard, + + TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), + + TP_ARGS(dev, blkstart, blklen) +); + TRACE_EVENT(f2fs_issue_reset_zone, TP_PROTO(struct block_device *dev, block_t blkstart),