diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index a809f6005f14..84c606fb3ca4 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -75,7 +75,7 @@ Contact: "Jaegeuk Kim" Description: Controls the memory footprint used by f2fs. -What: /sys/fs/f2fs//trim_sections +What: /sys/fs/f2fs//batched_trim_sections Date: February 2015 Contact: "Jaegeuk Kim" Description: @@ -112,3 +112,21 @@ Date: January 2016 Contact: "Shuoran Liu" Description: Shows total written kbytes issued to disk. + +What: /sys/fs/f2fs//inject_rate +Date: May 2016 +Contact: "Sheng Yong" +Description: + Controls the injection rate. + +What: /sys/fs/f2fs//inject_type +Date: May 2016 +Contact: "Sheng Yong" +Description: + Controls the injection type. + +What: /sys/fs/f2fs//reserved_blocks +Date: June 2017 +Contact: "Chao Yu" +Description: + Controls current reserved blocks in system. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 4f6531a4701b..273ccb26885e 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -155,11 +155,15 @@ noinline_data Disable the inline data feature, inline data feature is enabled by default. data_flush Enable data flushing before checkpoint in order to persist data of regular and symlink. +fault_injection=%d Enable fault injection in all supported types with + specified injection rate. mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. io_bits=%u Set the bit size of write IO requests. It should be set with "mode=lfs". +usrquota Enable plain user disk quota accounting. +grpquota Enable plain group disk quota accounting. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index ca949ea7c02f..a0dc559b1b47 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o -f2fs-y += shrinker.o extent_cache.o +f2fs-y += shrinker.o extent_cache.o sysfs.o f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 8f487692c21f..a140c5e3dc54 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -233,7 +233,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size); if (IS_ERR(value)) { clear_inode_flag(inode, FI_ACL_MODE); - return (int)PTR_ERR(value); + return PTR_ERR(value); } } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ea9c317b5916..56bbf592e487 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -31,7 +31,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) set_ckpt_flags(sbi, CP_ERROR_FLAG); sbi->sb->s_flags |= MS_RDONLY; if (!end_io) - f2fs_flush_merged_bios(sbi); + f2fs_flush_merged_writes(sbi); } /* @@ -162,6 +162,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op = REQ_OP_READ, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, + .in_list = false, }; struct blk_plug plug; @@ -207,12 +208,10 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, } fio.page = page; - fio.old_blkaddr = fio.new_blkaddr; - f2fs_submit_page_mbio(&fio); + f2fs_submit_page_bio(&fio); f2fs_put_page(page, 0); } out: - f2fs_submit_merged_bio(sbi, META, READ); blk_finish_plug(&plug); return blkno - start; } @@ -249,13 +248,13 @@ static int f2fs_write_meta_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) - f2fs_submit_merged_bio_cond(sbi, page->mapping->host, - 0, page->index, META, WRITE); + f2fs_submit_merged_write_cond(sbi, page->mapping->host, + 0, page->index, META); unlock_page(page); if (unlikely(f2fs_cp_error(sbi))) - f2fs_submit_merged_bio(sbi, META, WRITE); + f2fs_submit_merged_write(sbi, META); return 0; @@ -270,6 +269,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); long diff, written; + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + /* collect a number of dirty meta pages and write together */ if (wbc->for_kupdate || get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) @@ -358,7 +360,7 @@ continue_unlock: } stop: if (nwritten) - f2fs_submit_merged_bio(sbi, type, WRITE); + f2fs_submit_merged_write(sbi, type); blk_finish_plug(&plug); @@ -906,7 +908,7 @@ retry: * We should submit bio, since it exists several * wribacking dentry pages in the freeing inode. */ - f2fs_submit_merged_bio(sbi, DATA, WRITE); + f2fs_submit_merged_write(sbi, DATA); cond_resched(); } goto retry; @@ -1051,8 +1053,9 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) { unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long flags; - spin_lock(&sbi->cp_lock); + spin_lock_irqsave(&sbi->cp_lock, flags); if ((cpc->reason & CP_UMOUNT) && le32_to_cpu(ckpt->cp_pack_total_block_count) > @@ -1083,14 +1086,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi); - unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; + unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags; block_t start_blk; unsigned int data_sum_blocks, orphan_blocks; __u32 crc32 = 0; @@ -1132,12 +1135,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi, false); - spin_lock(&sbi->cp_lock); + spin_lock_irqsave(&sbi->cp_lock, flags); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + @@ -1295,7 +1298,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); - f2fs_flush_merged_bios(sbi); + f2fs_flush_merged_writes(sbi); /* this is the case of multiple fstrims without any changes */ if (cpc->reason & CP_DISCARD) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 36fe82012a33..87c1f4150c64 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -282,29 +282,32 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, nid_t ino, pgoff_t idx, enum page_type type) { enum page_type btype = PAGE_TYPE_OF_BIO(type); - struct f2fs_bio_info *io = &sbi->write_io[btype]; - bool ret; + enum temp_type temp; + struct f2fs_bio_info *io; + bool ret = false; - down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, ino, idx); - up_read(&io->io_rwsem); + for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + io = sbi->write_io[btype] + temp; + + down_read(&io->io_rwsem); + ret = __has_merged_page(io, inode, ino, idx); + up_read(&io->io_rwsem); + + /* TODO: use HOT temp only for meta pages now. */ + if (ret || btype == META) + break; + } return ret; } -static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, int rw) +static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) { enum page_type btype = PAGE_TYPE_OF_BIO(type); - struct f2fs_bio_info *io; - - io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; + struct f2fs_bio_info *io = sbi->write_io[btype] + temp; down_write(&io->io_rwsem); - if (!__has_merged_page(io, inode, ino, idx)) - goto out; - /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { io->fio.type = META_FLUSH; @@ -314,29 +317,45 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } __submit_merged_bio(io); -out: up_write(&io->io_rwsem); } -void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type, - int rw) -{ - __f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw); -} - -void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi, +static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, int rw) + enum page_type type, bool force) { - if (has_merged_page(sbi, inode, ino, idx, type)) - __f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw); + enum temp_type temp; + + if (!force && !has_merged_page(sbi, inode, ino, idx, type)) + return; + + for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + + __f2fs_submit_merged_write(sbi, type, temp); + + /* TODO: use HOT temp only for meta pages now. */ + if (type >= META) + break; + } } -void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi) +void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) { - f2fs_submit_merged_bio(sbi, DATA, WRITE); - f2fs_submit_merged_bio(sbi, NODE, WRITE); - f2fs_submit_merged_bio(sbi, META, WRITE); + __submit_merged_write_cond(sbi, NULL, 0, 0, type, true); +} + +void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, + struct inode *inode, nid_t ino, pgoff_t idx, + enum page_type type) +{ + __submit_merged_write_cond(sbi, inode, ino, idx, type, false); +} + +void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) +{ + f2fs_submit_merged_write(sbi, DATA); + f2fs_submit_merged_write(sbi, NODE); + f2fs_submit_merged_write(sbi, META); } /* @@ -368,16 +387,29 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) return 0; } -int f2fs_submit_page_mbio(struct f2fs_io_info *fio) +int f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); - struct f2fs_bio_info *io; - bool is_read = is_read_io(fio->op); + struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; int err = 0; - io = is_read ? &sbi->read_io : &sbi->write_io[btype]; + f2fs_bug_on(sbi, is_read_io(fio->op)); + + down_write(&io->io_rwsem); +next: + if (fio->in_list) { + spin_lock(&io->io_lock); + if (list_empty(&io->io_list)) { + spin_unlock(&io->io_lock); + goto out_fail; + } + fio = list_first_entry(&io->io_list, + struct f2fs_io_info, list); + list_del(&fio->list); + spin_unlock(&io->io_lock); + } if (fio->old_blkaddr != NEW_ADDR) verify_block_addr(sbi, fio->old_blkaddr); @@ -388,10 +420,7 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio) /* set submitted = 1 as a return value */ fio->submitted = 1; - if (!is_read) - inc_page_count(sbi, WB_DATA_TYPE(bio_page)); - - down_write(&io->io_rwsem); + inc_page_count(sbi, WB_DATA_TYPE(bio_page)); if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 || (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) || @@ -402,26 +431,28 @@ alloc_new: if ((fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { err = -EAGAIN; - if (!is_read) - dec_page_count(sbi, WB_DATA_TYPE(bio_page)); + dec_page_count(sbi, WB_DATA_TYPE(bio_page)); goto out_fail; } io->bio = __bio_alloc(sbi, fio->new_blkaddr, - BIO_MAX_PAGES, is_read); + BIO_MAX_PAGES, false); io->fio = *fio; } - if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < - PAGE_SIZE) { + if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { __submit_merged_bio(io); goto alloc_new; } io->last_block_in_bio = fio->new_blkaddr; f2fs_trace_ios(fio, 0); + + trace_f2fs_submit_page_write(fio->page, fio); + + if (fio->in_list) + goto next; out_fail: up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(fio->page, fio); return err; } @@ -460,14 +491,15 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + int err; if (!count) return 0; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) - return -ENOSPC; + if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + return err; trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); @@ -718,6 +750,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct node_info ni; pgoff_t fofs; blkcnt_t count = 1; + int err; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; @@ -726,15 +759,15 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (dn->data_blkaddr == NEW_ADDR) goto alloc; - if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) - return -ENOSPC; + if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + return err; alloc: get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, - &sum, CURSEG_WARM_DATA); + &sum, CURSEG_WARM_DATA, NULL, false); set_data_blkaddr(dn); /* update i_size */ @@ -1321,7 +1354,7 @@ retry_encrypt: /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { - f2fs_flush_merged_bios(fio->sbi); + f2fs_flush_merged_writes(fio->sbi); congestion_wait(BLK_RW_ASYNC, HZ/50); gfp_flags |= __GFP_NOFAIL; goto retry_encrypt; @@ -1368,13 +1401,14 @@ int do_write_data_page(struct f2fs_io_info *fio) if (valid_ipu_blkaddr(fio)) { ipu_force = true; - fio->need_lock = false; + fio->need_lock = LOCK_DONE; goto got_it; } } - if (fio->need_lock) - f2fs_lock_op(fio->sbi); + /* Deadlock due to between page->lock and f2fs_lock_op */ + if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) + return -EAGAIN; err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) @@ -1388,19 +1422,18 @@ int do_write_data_page(struct f2fs_io_info *fio) goto out_writepage; } got_it: - err = encrypt_one_page(fio); - if (err) - goto out_writepage; - - set_page_writeback(page); - /* * If current allocation needs SSR, * it had better in-place writes for updated data. */ if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { + err = encrypt_one_page(fio); + if (err) + goto out_writepage; + + set_page_writeback(page); f2fs_put_dnode(&dn); - if (fio->need_lock) + if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); err = rewrite_data_page(fio); trace_f2fs_do_write_data_page(fio->page, IPU); @@ -1408,6 +1441,20 @@ got_it: return err; } + if (fio->need_lock == LOCK_RETRY) { + if (!f2fs_trylock_op(fio->sbi)) { + err = -EAGAIN; + goto out_writepage; + } + fio->need_lock = LOCK_REQ; + } + + err = encrypt_one_page(fio); + if (err) + goto out_writepage; + + set_page_writeback(page); + /* LFS mode write path */ write_data_page(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); @@ -1417,7 +1464,7 @@ got_it: out_writepage: f2fs_put_dnode(&dn); out: - if (fio->need_lock) + if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); return err; } @@ -1443,11 +1490,14 @@ static int __write_data_page(struct page *page, bool *submitted, .page = page, .encrypted_page = NULL, .submitted = false, - .need_lock = true, + .need_lock = LOCK_RETRY, }; trace_f2fs_writepage(page, DATA); + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto redirty_out; + if (page->index < end_index) goto write; @@ -1461,8 +1511,6 @@ static int __write_data_page(struct page *page, bool *submitted, zero_user_segment(page, offset, PAGE_SIZE); write: - if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) - goto redirty_out; if (f2fs_is_drop_cache(inode)) goto out; /* we should not write 0'th page having journal header */ @@ -1479,7 +1527,7 @@ write: /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { - fio.need_lock = false; + fio.need_lock = LOCK_DONE; err = do_write_data_page(&fio); goto done; } @@ -1498,8 +1546,13 @@ write: goto out; } - if (err == -EAGAIN) + if (err == -EAGAIN) { err = do_write_data_page(&fio); + if (err == -EAGAIN) { + fio.need_lock = LOCK_REQ; + err = do_write_data_page(&fio); + } + } if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; @@ -1513,8 +1566,7 @@ out: ClearPageUptodate(page); if (wbc->for_reclaim) { - f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index, - DATA, WRITE); + f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA); clear_inode_flag(inode, FI_HOT_DATA); remove_dirty_inode(inode); submitted = NULL; @@ -1525,7 +1577,7 @@ out: f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { - f2fs_submit_merged_bio(sbi, DATA, WRITE); + f2fs_submit_merged_write(sbi, DATA); submitted = NULL; } @@ -1618,7 +1670,7 @@ retry: } done_index = page->index; - +retry_write: lock_page(page); if (unlikely(page->mapping != mapping)) { @@ -1654,6 +1706,15 @@ continue_unlock: unlock_page(page); ret = 0; continue; + } else if (ret == -EAGAIN) { + ret = 0; + if (wbc->sync_mode == WB_SYNC_ALL) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, + HZ/50); + goto retry_write; + } + continue; } done_index = page->index + 1; done = 1; @@ -1684,8 +1745,8 @@ continue_unlock: mapping->writeback_index = done_index; if (last_idx != ULONG_MAX) - f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host, - 0, last_idx, DATA, WRITE); + f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, + 0, last_idx, DATA); return ret; } @@ -1706,6 +1767,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) return 0; + /* during POR, we don't need to trigger writepage at all. */ + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && available_free_memory(sbi, DIRTY_DENTS)) @@ -1715,10 +1780,6 @@ static int f2fs_write_data_pages(struct address_space *mapping, if (is_inode_flag_set(inode, FI_DO_DEFRAG)) goto skip_write; - /* during POR, we don't need to trigger writepage at all. */ - if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) - goto skip_write; - trace_f2fs_writepages(mapping->host, wbc, DATA); /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ @@ -1753,8 +1814,10 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) loff_t i_size = i_size_read(inode); if (to > i_size) { + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); truncate_blocks(inode, i_size, true); + up_write(&F2FS_I(inode)->i_mmap_sem); } } @@ -2152,8 +2215,12 @@ int f2fs_migrate_page(struct address_space *mapping, BUG_ON(PageWriteback(page)); /* migrating an atomic written page is safe with the inmem_lock hold */ - if (atomic_written && !mutex_trylock(&fi->inmem_lock)) - return -EAGAIN; + if (atomic_written) { + if (mode != MIGRATE_SYNC) + return -EBUSY; + if (!mutex_trylock(&fi->inmem_lock)) + return -EAGAIN; + } /* * A reference is expected if PagePrivate set when move mapping, diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 94756f55a97e..37f9c7f55605 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -415,7 +415,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, * We lost i_pino from now on. */ if (is_inode_flag_set(inode, FI_INC_LINK)) { - file_lost_pino(inode); + if (!S_ISDIR(inode->i_mode)) + file_lost_pino(inode); /* * If link the tmpfile to alias through linkat path, * we should remove this inode from orphan list. diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 2f98d7039701..ff2352a0ed15 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -320,7 +320,7 @@ static void __drop_largest_extent(struct inode *inode, } /* return true, if inode page is changed */ -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et; @@ -358,6 +358,16 @@ out: return false; } +bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +{ + bool ret = __f2fs_init_extent_tree(inode, i_ext); + + if (!F2FS_I(inode)->extent_tree) + set_inode_flag(inode, FI_NO_EXTENT); + + return ret; +} + static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fd2e651bad6d..94a88b233e98 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_F2FS_FS_ENCRYPTION #include #else @@ -88,6 +89,8 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 #define F2FS_MOUNT_ADAPTIVE 0x00020000 #define F2FS_MOUNT_LFS 0x00040000 +#define F2FS_MOUNT_USRQUOTA 0x00080000 +#define F2FS_MOUNT_GRPQUOTA 0x00100000 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) @@ -303,6 +306,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, struct f2fs_move_range) #define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ struct f2fs_flush_device) +#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ + struct f2fs_gc_range) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -327,6 +332,12 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION #endif +struct f2fs_gc_range { + u32 sync; + u64 start; + u64 len; +}; + struct f2fs_defragment { u64 start; u64 len; @@ -513,12 +524,19 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; + + /* quota space reservation, managed internally by quota code */ + qsize_t i_reserved_quota; +#endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ struct extent_tree *extent_tree; /* cached extent_tree entry */ struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ + struct rw_semaphore i_mmap_sem; }; static inline void get_extent_info(struct extent_info *ext, @@ -792,17 +810,33 @@ enum page_type { OPU, }; +enum temp_type { + HOT = 0, /* must be zero for meta bio */ + WARM, + COLD, + NR_TEMP_TYPE, +}; + +enum need_lock_type { + LOCK_REQ = 0, + LOCK_DONE, + LOCK_RETRY, +}; + struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ + enum temp_type temp; /* contains HOT/WARM/COLD */ int op; /* contains REQ_OP_ */ int op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ + struct list_head list; /* serialize IOs */ bool submitted; /* indicate IO submission */ - bool need_lock; /* indicate we need to lock cp_rwsem */ + int need_lock; /* indicate we need to lock cp_rwsem */ + bool in_list; /* indicate fio is in io_list */ }; #define is_read_io(rw) ((rw) == READ) @@ -812,6 +846,8 @@ struct f2fs_bio_info { sector_t last_block_in_bio; /* last block number */ struct f2fs_io_info fio; /* store buffered io info. */ struct rw_semaphore io_rwsem; /* blocking op for bio */ + spinlock_t io_lock; /* serialize DATA/NODE IOs */ + struct list_head io_list; /* track fios */ }; #define FDEV(i) (sbi->devs[i]) @@ -879,9 +915,9 @@ struct f2fs_sb_info { struct f2fs_sm_info *sm_info; /* segment manager */ /* for bio operations */ - struct f2fs_bio_info read_io; /* for read bios */ - struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ - struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */ + struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ + struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; + /* bio ordering for NODE/DATA */ int write_io_size_bits; /* Write IO size bits */ mempool_t *write_io_dummy; /* Dummy pages */ @@ -939,6 +975,8 @@ struct f2fs_sb_info { block_t total_valid_block_count; /* # of valid blocks */ block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ + block_t reserved_blocks; /* configurable reserved blocks */ + u32 s_next_generation; /* for NFS support */ /* # of pages, see count_type */ @@ -1228,9 +1266,11 @@ static inline void __set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) { - spin_lock(&sbi->cp_lock); + unsigned long flags; + + spin_lock_irqsave(&sbi->cp_lock, flags); __set_ckpt_flags(F2FS_CKPT(sbi), f); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) @@ -1244,22 +1284,26 @@ static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) { - spin_lock(&sbi->cp_lock); + unsigned long flags; + + spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), f); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { + unsigned long flags; + set_sbi_flag(sbi, SBI_NEED_FSCK); if (lock) - spin_lock(&sbi->cp_lock); + spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); kfree(NM_I(sbi)->nat_bits); NM_I(sbi)->nat_bits = NULL; if (lock) - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi, @@ -1275,6 +1319,11 @@ static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) down_read(&sbi->cp_rwsem); } +static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) +{ + return down_read_trylock(&sbi->cp_rwsem); +} + static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { up_read(&sbi->cp_rwsem); @@ -1324,17 +1373,14 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) return 0; } -#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 - /* * Check whether the inode has blocks or not */ static inline int F2FS_HAS_BLOCKS(struct inode *inode) { - if (F2FS_I(inode)->i_xattr_nid) - return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1; - else - return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; + block_t xattr_block = F2FS_I(inode)->i_xattr_nid ? 1 : 0; + + return (inode->i_blocks >> F2FS_LOG_SECTORS_PER_BLOCK) > xattr_block; } static inline bool f2fs_has_xattr_block(unsigned int ofs) @@ -1342,16 +1388,23 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } -static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool); -static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, +static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); +static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) { - blkcnt_t diff; + blkcnt_t diff = 0, release = 0; + block_t avail_user_block_count; + int ret; + + ret = dquot_reserve_block(inode, *count); + if (ret) + return ret; #ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_BLOCK)) { f2fs_show_injection_info(FAULT_BLOCK); - return false; + release = *count; + goto enospc; } #endif /* @@ -1362,32 +1415,42 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); sbi->total_valid_block_count += (block_t)(*count); - if (unlikely(sbi->total_valid_block_count > sbi->user_block_count)) { - diff = sbi->total_valid_block_count - sbi->user_block_count; + avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks; + if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { + diff = sbi->total_valid_block_count - avail_user_block_count; *count -= diff; - sbi->total_valid_block_count = sbi->user_block_count; + release = diff; + sbi->total_valid_block_count = avail_user_block_count; if (!*count) { spin_unlock(&sbi->stat_lock); percpu_counter_sub(&sbi->alloc_valid_block_count, diff); - return false; + goto enospc; } } spin_unlock(&sbi->stat_lock); - f2fs_i_blocks_write(inode, *count, true); - return true; + if (release) + dquot_release_reservation_block(inode, release); + f2fs_i_blocks_write(inode, *count, true, true); + return 0; + +enospc: + dquot_release_reservation_block(inode, release); + return -ENOSPC; } static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, - blkcnt_t count) + block_t count) { + blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK; + spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - f2fs_bug_on(sbi, inode->i_blocks < count); + f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); - f2fs_i_blocks_write(inode, count, false); + f2fs_i_blocks_write(inode, count, false, true); } static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -1516,51 +1579,70 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); } -static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, - struct inode *inode) +static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, bool is_inode) { block_t valid_block_count; unsigned int valid_node_count; + bool quota = inode && !is_inode; + + if (quota) { + int ret = dquot_reserve_block(inode, 1); + if (ret) + return ret; + } spin_lock(&sbi->stat_lock); valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count > sbi->user_block_count)) { + if (unlikely(valid_block_count + sbi->reserved_blocks > + sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); - return false; + goto enospc; } valid_node_count = sbi->total_valid_node_count + 1; if (unlikely(valid_node_count > sbi->total_node_count)) { spin_unlock(&sbi->stat_lock); - return false; + goto enospc; } - if (inode) - f2fs_i_blocks_write(inode, 1, true); - sbi->total_valid_node_count++; sbi->total_valid_block_count++; spin_unlock(&sbi->stat_lock); + if (inode) { + if (is_inode) + f2fs_mark_inode_dirty_sync(inode, true); + else + f2fs_i_blocks_write(inode, 1, true, true); + } + percpu_counter_inc(&sbi->alloc_valid_block_count); - return true; + return 0; + +enospc: + if (quota) + dquot_release_reservation_block(inode, 1); + return -ENOSPC; } static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, - struct inode *inode) + struct inode *inode, bool is_inode) { spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, !sbi->total_valid_block_count); f2fs_bug_on(sbi, !sbi->total_valid_node_count); - f2fs_bug_on(sbi, !inode->i_blocks); + f2fs_bug_on(sbi, !is_inode && !inode->i_blocks); - f2fs_i_blocks_write(inode, 1, false); sbi->total_valid_node_count--; sbi->total_valid_block_count--; spin_unlock(&sbi->stat_lock); + + if (!is_inode) + f2fs_i_blocks_write(inode, 1, false, true); } static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) @@ -1835,13 +1917,21 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc) } static inline void f2fs_i_blocks_write(struct inode *inode, - blkcnt_t diff, bool add) + block_t diff, bool add, bool claim) { bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER); - inode->i_blocks = add ? inode->i_blocks + diff : - inode->i_blocks - diff; + /* add = 1, claim = 1 should be dquot_reserve_block in pair */ + if (add) { + if (claim) + dquot_claim_block(inode, diff); + else + dquot_alloc_block_nofail(inode, diff); + } else { + dquot_free_block(inode, diff); + } + f2fs_mark_inode_dirty_sync(inode, true); if (clean || recover) set_inode_flag(inode, FI_AUTO_RECOVER); @@ -2236,6 +2326,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); +void stop_discard_thread(struct f2fs_sb_info *sbi); void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); @@ -2258,7 +2349,8 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, bool recover_newaddr); void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, int type); + struct f2fs_summary *sum, int type, + struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered); void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, @@ -2308,14 +2400,13 @@ void destroy_checkpoint_caches(void); /* * data.c */ -void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type, - int rw); -void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi, +void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); +void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, int rw); -void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi); + enum page_type type); +void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); -int f2fs_submit_page_mbio(struct f2fs_io_info *fio); +int f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio); int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); @@ -2632,6 +2723,14 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi); int __init create_extent_cache(void); void destroy_extent_cache(void); +/* + * sysfs.c + */ +int __init f2fs_register_sysfs(void); +void f2fs_unregister_sysfs(void); +int f2fs_init_sysfs(struct f2fs_sb_info *sbi); +void f2fs_exit_sysfs(struct f2fs_sb_info *sbi); + /* * crypto support */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 61af721329fa..a0e6d2c65a9e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -33,6 +33,18 @@ #include "trace.h" #include +static int f2fs_filemap_fault(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + int err; + + down_read(&F2FS_I(inode)->i_mmap_sem); + err = filemap_fault(vmf); + up_read(&F2FS_I(inode)->i_mmap_sem); + + return err; +} + static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; @@ -59,13 +71,14 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_balance_fs(sbi, dn.node_changed); file_update_time(vmf->vma->vm_file); + down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); if (unlikely(page->mapping != inode->i_mapping || page_offset(page) > i_size_read(inode) || !PageUptodate(page))) { unlock_page(page); err = -EFAULT; - goto out; + goto out_sem; } /* @@ -94,6 +107,8 @@ mapped: if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); +out_sem: + up_read(&F2FS_I(inode)->i_mmap_sem); out: sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); @@ -101,7 +116,7 @@ out: } static const struct vm_operations_struct f2fs_file_vm_ops = { - .fault = filemap_fault, + .fault = f2fs_filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = f2fs_vm_page_mkwrite, }; @@ -415,14 +430,6 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file_inode(file); int err; - if (f2fs_encrypted_inode(inode)) { - err = fscrypt_get_encryption_info(inode); - if (err) - return 0; - if (!f2fs_encrypted_inode(inode)) - return -ENOKEY; - } - /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) @@ -435,11 +442,10 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) static int f2fs_file_open(struct inode *inode, struct file *filp) { - int ret = generic_file_open(inode, filp); struct dentry *dir; - if (!ret && f2fs_encrypted_inode(inode)) { - ret = fscrypt_get_encryption_info(inode); + if (f2fs_encrypted_inode(inode)) { + int ret = fscrypt_get_encryption_info(inode); if (ret) return -EACCES; if (!fscrypt_has_encryption_key(inode)) @@ -452,7 +458,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) return -EPERM; } dput(dir); - return ret; + return dquot_file_open(inode, filp); } int truncate_data_blocks_range(struct dnode_of_data *dn, int count) @@ -527,8 +533,10 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, truncate_out: f2fs_wait_on_page_writeback(page, DATA, true); zero_user(page, offset, PAGE_SIZE - offset); - if (!cache_only || !f2fs_encrypted_inode(inode) || - !S_ISREG(inode->i_mode)) + + /* An encrypted inode should have a key and truncate the last page. */ + f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode)); + if (!cache_only) set_page_dirty(page); f2fs_put_page(page, 1); return 0; @@ -633,11 +641,31 @@ int f2fs_truncate(struct inode *inode) } int f2fs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) + u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int flags; + + flags = fi->i_flags & FS_FL_USER_VISIBLE; + if (flags & FS_APPEND_FL) + stat->attributes |= STATX_ATTR_APPEND; + if (flags & FS_COMPR_FL) + stat->attributes |= STATX_ATTR_COMPRESSED; + if (f2fs_encrypted_inode(inode)) + stat->attributes |= STATX_ATTR_ENCRYPTED; + if (flags & FS_IMMUTABLE_FL) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (flags & FS_NODUMP_FL) + stat->attributes |= STATX_ATTR_NODUMP; + + stat->attributes_mask |= (STATX_ATTR_APPEND | + STATX_ATTR_COMPRESSED | + STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP); + generic_fillattr(inode, stat); - stat->blocks <<= 3; return 0; } @@ -681,14 +709,34 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; + if (is_quota_modification(inode, attr)) { + err = dquot_initialize(inode); + if (err) + return err; + } + if ((attr->ia_valid & ATTR_UID && + !uid_eq(attr->ia_uid, inode->i_uid)) || + (attr->ia_valid & ATTR_GID && + !gid_eq(attr->ia_gid, inode->i_gid))) { + err = dquot_transfer(inode, attr); + if (err) + return err; + } + if (attr->ia_valid & ATTR_SIZE) { - if (f2fs_encrypted_inode(inode) && - fscrypt_get_encryption_info(inode)) - return -EACCES; + if (f2fs_encrypted_inode(inode)) { + err = fscrypt_get_encryption_info(inode); + if (err) + return err; + if (!fscrypt_has_encryption_key(inode)) + return -ENOKEY; + } if (attr->ia_size <= i_size_read(inode)) { + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); err = f2fs_truncate(inode); + up_write(&F2FS_I(inode)->i_mmap_sem); if (err) return err; } else { @@ -696,7 +744,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) * do not trim all blocks after i_size if target size is * larger than i_size. */ + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); + up_write(&F2FS_I(inode)->i_mmap_sem); /* should convert inline inode here */ if (!f2fs_may_inline_data(inode)) { @@ -839,12 +889,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_inode_pages_range(mapping, blk_start, blk_end - 1); f2fs_lock_op(sbi); ret = truncate_hole(inode, pg_start, pg_end); f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); } } @@ -957,9 +1009,9 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, if (do_replace[i]) { f2fs_i_blocks_write(src_inode, - 1, false); + 1, false, false); f2fs_i_blocks_write(dst_inode, - 1, true); + 1, true, false); f2fs_replace_block(sbi, &dn, dn.data_blkaddr, blkaddr[i], ni.version, true, false); @@ -1083,16 +1135,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; + down_write(&F2FS_I(inode)->i_mmap_sem); /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - return ret; + goto out; truncate_pagecache(inode, offset); ret = f2fs_do_collapse(inode, pg_start, pg_end); if (ret) - return ret; + goto out; /* write out all moved pages, if possible */ filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -1105,6 +1158,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (!ret) f2fs_i_size_write(inode, new_size); +out: + up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1169,9 +1224,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; + down_write(&F2FS_I(inode)->i_mmap_sem); ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); if (ret) - return ret; + goto out_sem; truncate_pagecache_range(inode, offset, offset + len - 1); @@ -1185,7 +1241,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start, off_start, off_end - off_start); if (ret) - return ret; + goto out_sem; new_size = max_t(loff_t, new_size, offset + len); } else { @@ -1193,7 +1249,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start++, off_start, PAGE_SIZE - off_start); if (ret) - return ret; + goto out_sem; new_size = max_t(loff_t, new_size, (loff_t)pg_start << PAGE_SHIFT); @@ -1242,6 +1298,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, out: if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) f2fs_i_size_write(inode, new_size); +out_sem: + up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1271,14 +1329,15 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); + down_write(&F2FS_I(inode)->i_mmap_sem); ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) - return ret; + goto out; /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - return ret; + goto out; truncate_pagecache(inode, offset); @@ -1307,6 +1366,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (!ret) f2fs_i_size_write(inode, new_size); +out: + up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1475,6 +1536,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) inode_lock(inode); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + inode_unlock(inode); + ret = -EPERM; + goto unlock_out; + } + flags = f2fs_mask_flags(inode->i_mode, flags); oldflags = fi->i_flags; @@ -1493,7 +1561,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); - + f2fs_mark_inode_dirty_sync(inode, false); +unlock_out: inode_unlock(inode); out: mnt_drop_write_file(filp); @@ -1862,6 +1931,50 @@ out: return ret; } +static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_gc_range range; + u64 end; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, + sizeof(range))) + return -EFAULT; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + end = range.start + range.len; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) + return -EINVAL; +do_more: + if (!range.sync) { + if (!mutex_trylock(&sbi->gc_mutex)) { + ret = -EBUSY; + goto out; + } + } else { + mutex_lock(&sbi->gc_mutex); + } + + ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); + range.start += sbi->blocks_per_seg; + if (range.start <= end) + goto do_more; +out: + mnt_drop_write_file(filp); + return ret; +} + static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -2306,6 +2419,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_get_encryption_pwsalt(filp, arg); case F2FS_IOC_GARBAGE_COLLECT: return f2fs_ioc_gc(filp, arg); + case F2FS_IOC_GARBAGE_COLLECT_RANGE: + return f2fs_ioc_gc_range(filp, arg); case F2FS_IOC_WRITE_CHECKPOINT: return f2fs_ioc_write_checkpoint(filp, arg); case F2FS_IOC_DEFRAGMENT: @@ -2326,11 +2441,6 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct blk_plug plug; ssize_t ret; - if (f2fs_encrypted_inode(inode) && - !fscrypt_has_encryption_key(inode) && - fscrypt_get_encryption_info(inode)) - return -EACCES; - inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret > 0) { @@ -2379,6 +2489,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_ENCRYPTION_PWSALT: case F2FS_IOC_GET_ENCRYPTION_POLICY: case F2FS_IOC_GARBAGE_COLLECT: + case F2FS_IOC_GARBAGE_COLLECT_RANGE: case F2FS_IOC_WRITE_CHECKPOINT: case F2FS_IOC_DEFRAGMENT: case F2FS_IOC_MOVE_RANGE: diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 026522107ca3..fa3d2e2df8e7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -32,13 +32,14 @@ static int gc_thread_func(void *data) wait_ms = gc_th->min_sleep_time; + set_freezable(); do { + wait_event_interruptible_timeout(*wq, + kthread_should_stop() || freezing(current), + msecs_to_jiffies(wait_ms)); + if (try_to_freeze()) continue; - else - wait_event_interruptible_timeout(*wq, - kthread_should_stop(), - msecs_to_jiffies(wait_ms)); if (kthread_should_stop()) break; @@ -258,11 +259,20 @@ static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi, valid_blocks * 2 : valid_blocks; } +static unsigned int get_ssr_cost(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct seg_entry *se = get_seg_entry(sbi, segno); + + return se->ckpt_valid_blocks > se->valid_blocks ? + se->ckpt_valid_blocks : se->valid_blocks; +} + static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, struct victim_sel_policy *p) { if (p->alloc_mode == SSR) - return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + return get_ssr_cost(sbi, segno); /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) @@ -586,9 +596,11 @@ static void move_encrypted_block(struct inode *inode, block_t bidx, struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, + .temp = COLD, .op = REQ_OP_READ, .op_flags = 0, .encrypted_page = NULL, + .in_list = false, }; struct dnode_of_data dn; struct f2fs_summary sum; @@ -632,7 +644,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx, fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, - &sum, CURSEG_COLD_DATA); + &sum, CURSEG_COLD_DATA, NULL, false); fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); @@ -670,7 +682,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx, fio.op = REQ_OP_WRITE; fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; - f2fs_submit_page_mbio(&fio); + f2fs_submit_page_write(&fio); f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); @@ -712,12 +724,13 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, + .temp = COLD, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC, .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, - .need_lock = true, + .need_lock = LOCK_REQ, }; bool is_dirty = PageDirty(page); int err; @@ -936,8 +949,8 @@ next: } if (gc_type == FG_GC) - f2fs_submit_merged_bio(sbi, - (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE); + f2fs_submit_merged_write(sbi, + (type == SUM_TYPE_NODE) ? NODE : DATA); blk_finish_plug(&plug); @@ -955,7 +968,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, { int gc_type = sync ? FG_GC : BG_GC; int sec_freed = 0; - int ret = -EINVAL; + int ret; struct cp_control cpc; unsigned int init_segno = segno; struct gc_inode_list gc_list = { @@ -965,8 +978,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, cpc.reason = __get_cp_reason(sbi); gc_more: - if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) + if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) { + ret = -EINVAL; goto stop; + } if (unlikely(f2fs_cp_error(sbi))) { ret = -EIO; goto stop; @@ -987,6 +1002,7 @@ gc_more: gc_type = FG_GC; } + ret = -EINVAL; /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */ if (gc_type == BG_GC && !background) goto stop; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e4c527c4e7d0..e0fd4376e6fb 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -316,12 +316,12 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, int make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage) { - struct f2fs_inline_dentry *dentry_blk; + struct f2fs_inline_dentry *inline_dentry; struct f2fs_dentry_ptr d; - dentry_blk = inline_data_addr(ipage); + inline_dentry = inline_data_addr(ipage); - make_dentry_ptr_inline(NULL, &d, dentry_blk); + make_dentry_ptr_inline(NULL, &d, inline_dentry); do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); @@ -500,7 +500,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, struct page *ipage; unsigned int bit_pos; f2fs_hash_t name_hash; - struct f2fs_inline_dentry *dentry_blk = NULL; + struct f2fs_inline_dentry *inline_dentry = NULL; struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(new_name->len); struct page *page = NULL; @@ -510,11 +510,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, if (IS_ERR(ipage)) return PTR_ERR(ipage); - dentry_blk = inline_data_addr(ipage); - bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + inline_dentry = inline_data_addr(ipage); + bit_pos = room_for_filename(&inline_dentry->dentry_bitmap, slots, NR_INLINE_DENTRY); if (bit_pos >= NR_INLINE_DENTRY) { - err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); + err = f2fs_convert_inline_dir(dir, ipage, inline_dentry); if (err) return err; err = -EAGAIN; @@ -534,7 +534,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true); name_hash = f2fs_dentry_hash(new_name, NULL); - make_dentry_ptr_inline(NULL, &d, dentry_blk); + make_dentry_ptr_inline(NULL, &d, inline_dentry); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); @@ -586,14 +586,14 @@ bool f2fs_empty_inline_dir(struct inode *dir) struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; unsigned int bit_pos = 2; - struct f2fs_inline_dentry *dentry_blk; + struct f2fs_inline_dentry *inline_dentry; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return false; - dentry_blk = inline_data_addr(ipage); - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + inline_dentry = inline_data_addr(ipage); + bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap, NR_INLINE_DENTRY, bit_pos); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 518f49643092..6cd312a17c69 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -16,6 +16,7 @@ #include "f2fs.h" #include "node.h" +#include "segment.h" #include @@ -44,7 +45,6 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); - f2fs_mark_inode_dirty_sync(inode, false); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -130,7 +130,7 @@ static int do_read_inode(struct inode *inode) i_gid_write(inode, le32_to_cpu(ri->i_gid)); set_nlink(inode, le32_to_cpu(ri->i_links)); inode->i_size = le64_to_cpu(ri->i_size); - inode->i_blocks = le64_to_cpu(ri->i_blocks); + inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1); inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); @@ -226,6 +226,7 @@ make_now: ret = -EIO; goto bad_inode; } + f2fs_set_inode_flags(inode); unlock_new_inode(inode); trace_f2fs_iget(inode); return inode; @@ -267,7 +268,7 @@ int update_inode(struct inode *inode, struct page *node_page) ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); ri->i_size = cpu_to_le64(i_size_read(inode)); - ri->i_blocks = cpu_to_le64(inode->i_blocks); + ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1); if (et) { read_lock(&et->lock); @@ -372,6 +373,8 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; + dquot_initialize(inode); + remove_ino_entry(sbi, inode->i_ino, APPEND_INO); remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); @@ -404,8 +407,11 @@ retry: if (err) update_inode_page(inode); + dquot_free_inode(inode); sb_end_intwrite(inode->i_sb); no_delete: + dquot_drop(inode); + stat_dec_inline_xattr(inode); stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); @@ -425,9 +431,10 @@ no_delete: if (is_inode_flag_set(inode, FI_FREE_NID)) { alloc_nid_failed(sbi, inode->i_ino); clear_inode_flag(inode, FI_FREE_NID); + } else { + f2fs_bug_on(sbi, err && + !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); } - f2fs_bug_on(sbi, err && - !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); out_clear: fscrypt_put_encryption_info(inode, NULL); clear_inode(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c31b40e5f9cf..760d85223c81 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -42,6 +43,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) } f2fs_unlock_op(sbi); + nid_free = true; + inode_init_owner(inode, dir, mode); inode->i_ino = ino; @@ -52,10 +55,17 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) err = insert_inode_locked(inode); if (err) { err = -EINVAL; - nid_free = true; goto fail; } + err = dquot_initialize(inode); + if (err) + goto fail_drop; + + err = dquot_alloc_inode(inode); + if (err) + goto fail_drop; + /* If the directory encrypted, then we should encrypt the inode. */ if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); @@ -85,6 +95,16 @@ fail: set_inode_flag(inode, FI_FREE_NID); iput(inode); return ERR_PTR(err); +fail_drop: + trace_f2fs_new_inode(inode, err); + dquot_drop(inode); + inode->i_flags |= S_NOQUOTA; + if (nid_free) + set_inode_flag(inode, FI_FREE_NID); + clear_nlink(inode); + unlock_new_inode(inode); + iput(inode); + return ERR_PTR(err); } static int is_multimedia_file(const unsigned char *s, const char *sub) @@ -136,6 +156,10 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, nid_t ino = 0; int err; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -180,6 +204,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, !fscrypt_has_permitted_context(dir, inode)) return -EPERM; + err = dquot_initialize(dir); + if (err) + return err; + f2fs_balance_fs(sbi, true); inode->i_ctime = current_time(inode); @@ -347,6 +375,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); + err = dquot_initialize(dir); + if (err) + return err; + de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { if (IS_ERR(page)) @@ -413,6 +445,10 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (disk_link.len > dir->i_sb->s_blocksize) return -ENAMETOOLONG; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -500,6 +536,10 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -548,6 +588,10 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -583,6 +627,10 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -676,6 +724,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } + err = dquot_initialize(old_dir); + if (err) + goto out; + + err = dquot_initialize(new_dir); + if (err) + goto out; + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -772,7 +828,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } down_write(&F2FS_I(old_inode)->i_sem); - file_lost_pino(old_inode); + if (!old_dir_entry || whiteout) + file_lost_pino(old_inode); + else + F2FS_I(old_inode)->i_pino = new_dir->i_ino; up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); @@ -853,6 +912,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, !fscrypt_has_permitted_context(old_dir, new_inode))) return -EPERM; + err = dquot_initialize(old_dir); + if (err) + goto out; + + err = dquot_initialize(new_dir); + if (err) + goto out; + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4547c5c5cd98..d53fe620939e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -158,9 +158,6 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; - if (get_nat_flag(ne, IS_DIRTY)) - return; - head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS); @@ -171,10 +168,18 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, head->entry_cnt = 0; f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } - list_move_tail(&ne->list, &head->entry_list); + + if (get_nat_flag(ne, IS_DIRTY)) + goto refresh_list; + nm_i->dirty_nat_cnt++; head->entry_cnt++; set_nat_flag(ne, IS_DIRTY, true); +refresh_list: + if (nat_get_blkaddr(ne) == NEW_ADDR) + list_del_init(&ne->list); + else + list_move_tail(&ne->list, &head->entry_list); } static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, @@ -673,15 +678,11 @@ static void truncate_node(struct dnode_of_data *dn) struct node_info ni; get_node_info(sbi, dn->nid, &ni); - if (dn->inode->i_blocks == 0) { - f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR); - goto invalidate; - } f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); /* Deallocate node address */ invalidate_blocks(sbi, ni.blk_addr); - dec_valid_node_count(sbi, dn->inode); + dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); set_node_addr(sbi, &ni, NULL_ADDR, false); if (dn->nid == dn->inode->i_ino) { @@ -689,7 +690,7 @@ static void truncate_node(struct dnode_of_data *dn) dec_valid_inode_count(sbi); f2fs_inode_synced(dn->inode); } -invalidate: + clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -1006,7 +1007,7 @@ int remove_inode_page(struct inode *inode) /* 0 is possible, after f2fs_new_inode() has failed */ f2fs_bug_on(F2FS_I_SB(inode), - inode->i_blocks != 0 && inode->i_blocks != 1); + inode->i_blocks != 0 && inode->i_blocks != 8); /* will put inode & node pages */ truncate_node(&dn); @@ -1039,10 +1040,9 @@ struct page *new_node_page(struct dnode_of_data *dn, if (!page) return ERR_PTR(-ENOMEM); - if (unlikely(!inc_valid_node_count(sbi, dn->inode))) { - err = -ENOSPC; + if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs)))) goto fail; - } + #ifdef CONFIG_F2FS_CHECK_FS get_node_info(sbi, dn->nid, &new_ni); f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR); @@ -1152,6 +1152,7 @@ repeat: f2fs_put_page(page, 1); return ERR_PTR(err); } else if (err == LOCKED_PAGE) { + err = 0; goto page_hit; } @@ -1165,15 +1166,22 @@ repeat: goto repeat; } - if (unlikely(!PageUptodate(page))) + if (unlikely(!PageUptodate(page))) { + err = -EIO; goto out_err; + } page_hit: if(unlikely(nid != nid_of_node(page))) { - f2fs_bug_on(sbi, 1); + f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, " + "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", + nid, nid_of_node(page), ino_of_node(page), + ofs_of_node(page), cpver_of_node(page), + next_blkaddr_of_node(page)); ClearPageUptodate(page); + err = -EINVAL; out_err: f2fs_put_page(page, 1); - return ERR_PTR(-EIO); + return ERR_PTR(err); } return page; } @@ -1373,15 +1381,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, up_read(&sbi->node_write); if (wbc->for_reclaim) { - f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0, - page->index, NODE, WRITE); + f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0, + page->index, NODE); submitted = NULL; } unlock_page(page); if (unlikely(f2fs_cp_error(sbi))) { - f2fs_submit_merged_bio(sbi, NODE, WRITE); + f2fs_submit_merged_write(sbi, NODE); submitted = NULL; } if (submitted) @@ -1518,8 +1526,7 @@ continue_unlock: } out: if (last_idx != ULONG_MAX) - f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx, - NODE, WRITE); + f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE); return ret ? -EIO: 0; } @@ -1625,7 +1632,7 @@ continue_unlock: } out: if (nwritten) - f2fs_submit_merged_bio(sbi, NODE, WRITE); + f2fs_submit_merged_write(sbi, NODE); return ret; } @@ -1675,6 +1682,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct blk_plug plug; long diff; + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + /* balancing f2fs's metadata in background */ f2fs_balance_fs_bg(sbi); @@ -2192,14 +2202,14 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) get_node_info(sbi, prev_xnid, &ni); f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); invalidate_blocks(sbi, ni.blk_addr); - dec_valid_node_count(sbi, inode); + dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); recover_xnid: /* 2: update xattr nid in inode */ remove_free_nid(sbi, new_xnid); f2fs_i_xnid_write(inode, new_xnid); - if (unlikely(!inc_valid_node_count(sbi, inode))) + if (unlikely(inc_valid_node_count(sbi, inode, false))) f2fs_bug_on(sbi, 1); update_inode_page(inode); @@ -2257,7 +2267,7 @@ retry: new_ni = old_ni; new_ni.ino = ino; - if (unlikely(!inc_valid_node_count(sbi, NULL))) + if (unlikely(inc_valid_node_count(sbi, NULL, true))) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR, false); inc_valid_inode_count(sbi); @@ -2424,8 +2434,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, nid_t nid = nat_get_nid(ne); int offset; - if (nat_get_blkaddr(ne) == NEW_ADDR) - continue; + f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR); if (to_journal) { offset = lookup_journal_in_cursum(journal, @@ -2553,7 +2562,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) return 0; } -inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) +static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int i = 0; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 558048e33cf9..bb53e9955ff2 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -224,11 +224,7 @@ static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, struct f2fs_nm_info *nm_i = NM_I(sbi); block_addr -= nm_i->nat_blkaddr; - if ((block_addr >> sbi->log_blocks_per_seg) % 2) - block_addr -= sbi->blocks_per_seg; - else - block_addr += sbi->blocks_per_seg; - + block_addr ^= 1 << sbi->log_blocks_per_seg; return block_addr + nm_i->nat_blkaddr; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ea9f455d94ba..f964b68718c1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -312,7 +313,7 @@ static int __commit_inmem_pages(struct inode *inode, fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; - fio.need_lock = false, + fio.need_lock = LOCK_DONE; err = do_write_data_page(&fio); if (err) { unlock_page(page); @@ -328,8 +329,7 @@ static int __commit_inmem_pages(struct inode *inode, } if (last_idx != ULONG_MAX) - f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx, - DATA, WRITE); + f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA); if (!err) __revoke_inmem_pages(inode, revoke_list, false, false); @@ -555,6 +555,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) if (SM_I(sbi)->fcc_info) { fcc = SM_I(sbi)->fcc_info; + if (fcc->f2fs_issue_flush) + return err; goto init_thread; } @@ -566,6 +568,9 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; + if (!test_opt(sbi, FLUSH_MERGE)) + return err; + init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); @@ -736,12 +741,15 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + f2fs_bug_on(sbi, dc->ref); + if (dc->error == -EOPNOTSUPP) dc->error = 0; if (dc->error) f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard failed, ret: %d", dc->error); + "Issue discard(%u, %u, %u) failed, ret: %d", + dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } @@ -751,10 +759,34 @@ static void f2fs_submit_discard_endio(struct bio *bio) dc->error = blk_status_to_errno(bio->bi_status); dc->state = D_DONE; - complete(&dc->wait); + complete_all(&dc->wait); bio_put(bio); } +void __check_sit_bitmap(struct f2fs_sb_info *sbi, + block_t start, block_t end) +{ +#ifdef CONFIG_F2FS_CHECK_FS + struct seg_entry *sentry; + unsigned int segno; + block_t blk = start; + unsigned long offset, size, max_blocks = sbi->blocks_per_seg; + unsigned long *map; + + while (blk < end) { + segno = GET_SEGNO(sbi, blk); + sentry = get_seg_entry(sbi, segno); + offset = GET_BLKOFF_FROM_SEG0(sbi, blk); + + size = min((unsigned long)(end - blk), max_blocks); + map = (unsigned long *)(sentry->cur_valid_map); + offset = __find_rev_next_bit(map, size, offset); + f2fs_bug_on(sbi, offset != size); + blk += size; + } +#endif +} + /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) @@ -782,6 +814,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, bio->bi_opf |= REQ_SYNC; submit_bio(bio); list_move_tail(&dc->list, &dcc->wait_list); + __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); } } else { __remove_discard_cmd(sbi, dc); @@ -838,7 +871,6 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, dc->len = blkaddr - dc->lstart; dcc->undiscard_blks += dc->len; __relocate_discard_cmd(dcc, dc); - f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); modified = true; } @@ -848,16 +880,12 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, di.start + blkaddr + 1 - di.lstart, di.lstart + di.len - 1 - blkaddr, NULL, NULL); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); } else { dc->lstart++; dc->len--; dc->start++; dcc->undiscard_blks += dc->len; __relocate_discard_cmd(dcc, dc); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); } } } @@ -918,8 +946,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, prev_dc->di.len += di.len; dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, prev_dc); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); di = prev_dc->di; tdc = prev_dc; merged = true; @@ -935,16 +961,12 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); merged = true; } if (!merged) { __insert_discard_tree(sbi, bdev, di.lstart, di.start, di.len, NULL, NULL); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); } next: prev_dc = next_dc; @@ -983,6 +1005,8 @@ static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) int i, iter = 0; mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); blk_start_plug(&plug); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { pend_list = &dcc->pend_list[i]; @@ -1000,22 +1024,47 @@ out: mutex_unlock(&dcc->cmd_lock); } +static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + wait_for_completion_io(&dc->wait); + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, dc->state != D_DONE); + dc->ref--; + if (!dc->ref) + __remove_discard_cmd(sbi, dc); + mutex_unlock(&dcc->cmd_lock); +} + static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *wait_list = &(dcc->wait_list); struct discard_cmd *dc, *tmp; + bool need_wait; + +next: + need_wait = false; mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (!wait_cond || dc->state == D_DONE) { - if (dc->ref) - continue; + if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); + } else { + dc->ref++; + need_wait = true; + break; } } mutex_unlock(&dcc->cmd_lock); + + if (need_wait) { + __wait_one_discard_bio(sbi, dc); + goto next; + } } /* This should be covered by global mutex, &sit_i->sentry_lock */ @@ -1037,14 +1086,19 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) } mutex_unlock(&dcc->cmd_lock); - if (need_wait) { - wait_for_completion_io(&dc->wait); - mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, dc->state != D_DONE); - dc->ref--; - if (!dc->ref) - __remove_discard_cmd(sbi, dc); - mutex_unlock(&dcc->cmd_lock); + if (need_wait) + __wait_one_discard_bio(sbi, dc); +} + +void stop_discard_thread(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + if (dcc && dcc->f2fs_issue_discard) { + struct task_struct *discard_thread = dcc->f2fs_issue_discard; + + dcc->f2fs_issue_discard = NULL; + kthread_stop(discard_thread); } } @@ -1060,18 +1114,24 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; -repeat: - if (kthread_should_stop()) - return 0; - __issue_discard_cmd(sbi, true); - __wait_discard_cmd(sbi, true); + set_freezable(); - congestion_wait(BLK_RW_SYNC, HZ/50); + do { + wait_event_interruptible(*q, kthread_should_stop() || + freezing(current) || + atomic_read(&dcc->discard_cmd_cnt)); + if (try_to_freeze()) + continue; + if (kthread_should_stop()) + return 0; - wait_event_interruptible(*q, kthread_should_stop() || - atomic_read(&dcc->discard_cmd_cnt)); - goto repeat; + __issue_discard_cmd(sbi, true); + __wait_discard_cmd(sbi, true); + + congestion_wait(BLK_RW_SYNC, HZ/50); + } while (!kthread_should_stop()); + return 0; } #ifdef CONFIG_BLK_DEV_ZONED @@ -1322,7 +1382,8 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (force && len < cpc->trim_minlen) + if (f2fs_sb_mounted_blkzoned(sbi->sb) || + (force && len < cpc->trim_minlen)) goto skip; f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, @@ -1398,12 +1459,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return; - if (dcc->f2fs_issue_discard) { - struct task_struct *discard_thread = dcc->f2fs_issue_discard; - - dcc->f2fs_issue_discard = NULL; - kthread_stop(discard_thread); - } + stop_discard_thread(sbi); kfree(dcc); SM_I(sbi)->dcc_info = NULL; @@ -2040,66 +2096,80 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } -static int __get_segment_type_2(struct page *page, enum page_type p_type) +static int __get_segment_type_2(struct f2fs_io_info *fio) { - if (p_type == DATA) + if (fio->type == DATA) return CURSEG_HOT_DATA; else return CURSEG_HOT_NODE; } -static int __get_segment_type_4(struct page *page, enum page_type p_type) +static int __get_segment_type_4(struct f2fs_io_info *fio) { - if (p_type == DATA) { - struct inode *inode = page->mapping->host; + if (fio->type == DATA) { + struct inode *inode = fio->page->mapping->host; if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && is_cold_node(page)) + if (IS_DNODE(fio->page) && is_cold_node(fio->page)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } } -static int __get_segment_type_6(struct page *page, enum page_type p_type) +static int __get_segment_type_6(struct f2fs_io_info *fio) { - if (p_type == DATA) { - struct inode *inode = page->mapping->host; + if (fio->type == DATA) { + struct inode *inode = fio->page->mapping->host; - if (is_cold_data(page) || file_is_cold(inode)) + if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; if (is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; return CURSEG_WARM_DATA; } else { - if (IS_DNODE(page)) - return is_cold_node(page) ? CURSEG_WARM_NODE : + if (IS_DNODE(fio->page)) + return is_cold_node(fio->page) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; return CURSEG_COLD_NODE; } } -static int __get_segment_type(struct page *page, enum page_type p_type) +static int __get_segment_type(struct f2fs_io_info *fio) { - switch (F2FS_P_SB(page)->active_logs) { + int type = 0; + + switch (fio->sbi->active_logs) { case 2: - return __get_segment_type_2(page, p_type); + type = __get_segment_type_2(fio); + break; case 4: - return __get_segment_type_4(page, p_type); + type = __get_segment_type_4(fio); + break; + case 6: + type = __get_segment_type_6(fio); + break; + default: + f2fs_bug_on(fio->sbi, true); } - /* NR_CURSEG_TYPE(6) logs by default */ - f2fs_bug_on(F2FS_P_SB(page), - F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); - return __get_segment_type_6(page, p_type); + + if (IS_HOT(type)) + fio->temp = HOT; + else if (IS_WARM(type)) + fio->temp = WARM; + else + fio->temp = COLD; + return type; } void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, int type) + struct f2fs_summary *sum, int type, + struct f2fs_io_info *fio, bool add_list) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2135,29 +2205,35 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (page && IS_NODESEG(type)) fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + if (add_list) { + struct f2fs_bio_info *io; + + INIT_LIST_HEAD(&fio->list); + fio->in_list = true; + io = sbi->write_io[fio->type] + fio->temp; + spin_lock(&io->io_lock); + list_add_tail(&fio->list, &io->io_list); + spin_unlock(&io->io_lock); + } + mutex_unlock(&curseg->curseg_mutex); } static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - int type = __get_segment_type(fio->page, fio->type); + int type = __get_segment_type(fio); int err; - if (fio->type == NODE || fio->type == DATA) - mutex_lock(&fio->sbi->wio_mutex[fio->type]); reallocate: allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type); + &fio->new_blkaddr, sum, type, fio, true); /* writeout dirty page into bdev */ - err = f2fs_submit_page_mbio(fio); + err = f2fs_submit_page_write(fio); if (err == -EAGAIN) { fio->old_blkaddr = fio->new_blkaddr; goto reallocate; } - - if (fio->type == NODE || fio->type == DATA) - mutex_unlock(&fio->sbi->wio_mutex[fio->type]); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) @@ -2171,13 +2247,14 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) .new_blkaddr = page->index, .page = page, .encrypted_page = NULL, + .in_list = false, }; if (unlikely(page->index >= MAIN_BLKADDR(sbi))) fio.op_flags &= ~REQ_META; set_page_writeback(page); - f2fs_submit_page_mbio(&fio); + f2fs_submit_page_write(&fio); } void write_node_page(unsigned int nid, struct f2fs_io_info *fio) @@ -2296,8 +2373,8 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_bio_cond(sbi, page->mapping->host, - 0, page->index, type, WRITE); + f2fs_submit_merged_write_cond(sbi, page->mapping->host, + 0, page->index, type); if (ordered) wait_on_page_writeback(page); else @@ -2455,6 +2532,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) static int restore_curseg_summaries(struct f2fs_sb_info *sbi) { + struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal; + struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal; int type = CURSEG_HOT_DATA; int err; @@ -2481,6 +2560,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) return err; } + /* sanity check for summary blocks */ + if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || + sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) + return -EINVAL; + return 0; } @@ -3203,7 +3287,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sm_info->sit_entry_set); - if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { + if (!f2fs_readonly(sbi->sb)) { err = create_flush_cmd_control(sbi); if (err) return err; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 010f336a7573..6b871b492fd5 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -27,6 +27,10 @@ #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA) #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE) +#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA) +#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA) +#define IS_COLD(t) ((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA) + #define IS_CURSEG(sbi, seg) \ (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0b89b0b7b9f7..32e4c025e97e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -35,9 +36,7 @@ #define CREATE_TRACE_POINTS #include -static struct proc_dir_entry *f2fs_proc_root; static struct kmem_cache *f2fs_inode_cachep; -static struct kset *f2fs_kset; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -108,6 +107,8 @@ enum { Opt_fault_injection, Opt_lazytime, Opt_nolazytime, + Opt_usrquota, + Opt_grpquota, Opt_err, }; @@ -143,212 +144,11 @@ static match_table_t f2fs_tokens = { {Opt_fault_injection, "fault_injection=%u"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, + {Opt_usrquota, "usrquota"}, + {Opt_grpquota, "grpquota"}, {Opt_err, NULL}, }; -/* Sysfs support for f2fs */ -enum { - GC_THREAD, /* struct f2fs_gc_thread */ - SM_INFO, /* struct f2fs_sm_info */ - DCC_INFO, /* struct discard_cmd_control */ - NM_INFO, /* struct f2fs_nm_info */ - F2FS_SBI, /* struct f2fs_sb_info */ -#ifdef CONFIG_F2FS_FAULT_INJECTION - FAULT_INFO_RATE, /* struct f2fs_fault_info */ - FAULT_INFO_TYPE, /* struct f2fs_fault_info */ -#endif -}; - -struct f2fs_attr { - struct attribute attr; - ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); - ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, - const char *, size_t); - int struct_type; - int offset; -}; - -static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) -{ - if (struct_type == GC_THREAD) - return (unsigned char *)sbi->gc_thread; - else if (struct_type == SM_INFO) - return (unsigned char *)SM_I(sbi); - else if (struct_type == DCC_INFO) - return (unsigned char *)SM_I(sbi)->dcc_info; - else if (struct_type == NM_INFO) - return (unsigned char *)NM_I(sbi); - else if (struct_type == F2FS_SBI) - return (unsigned char *)sbi; -#ifdef CONFIG_F2FS_FAULT_INJECTION - else if (struct_type == FAULT_INFO_RATE || - struct_type == FAULT_INFO_TYPE) - return (unsigned char *)&sbi->fault_info; -#endif - return NULL; -} - -static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) -{ - struct super_block *sb = sbi->sb; - - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); - - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(sbi->kbytes_written + - BD_PART_WRITTEN(sbi))); -} - -static ssize_t f2fs_sbi_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) -{ - unsigned char *ptr = NULL; - unsigned int *ui; - - ptr = __struct_ptr(sbi, a->struct_type); - if (!ptr) - return -EINVAL; - - ui = (unsigned int *)(ptr + a->offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t f2fs_sbi_store(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned char *ptr; - unsigned long t; - unsigned int *ui; - ssize_t ret; - - ptr = __struct_ptr(sbi, a->struct_type); - if (!ptr) - return -EINVAL; - - ui = (unsigned int *)(ptr + a->offset); - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret < 0) - return ret; -#ifdef CONFIG_F2FS_FAULT_INJECTION - if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) - return -EINVAL; -#endif - *ui = t; - return count; -} - -static ssize_t f2fs_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); - - return a->show ? a->show(a, sbi, buf) : 0; -} - -static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); - - return a->store ? a->store(a, sbi, buf, len) : 0; -} - -static void f2fs_sb_release(struct kobject *kobj) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - complete(&sbi->s_kobj_unregister); -} - -#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \ -static struct f2fs_attr f2fs_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .struct_type = _struct_type, \ - .offset = _offset \ -} - -#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \ - F2FS_ATTR_OFFSET(struct_type, name, 0644, \ - f2fs_sbi_show, f2fs_sbi_store, \ - offsetof(struct struct_name, elname)) - -#define F2FS_GENERAL_RO_ATTR(name) \ -static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL) - -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); -#ifdef CONFIG_F2FS_FAULT_INJECTION -F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); -F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); -#endif -F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); - -#define ATTR_LIST(name) (&f2fs_attr_##name.attr) -static struct attribute *f2fs_attrs[] = { - ATTR_LIST(gc_min_sleep_time), - ATTR_LIST(gc_max_sleep_time), - ATTR_LIST(gc_no_gc_sleep_time), - ATTR_LIST(gc_idle), - ATTR_LIST(reclaim_segments), - ATTR_LIST(max_small_discards), - ATTR_LIST(batched_trim_sections), - ATTR_LIST(ipu_policy), - ATTR_LIST(min_ipu_util), - ATTR_LIST(min_fsync_blocks), - ATTR_LIST(min_hot_blocks), - ATTR_LIST(max_victim_search), - ATTR_LIST(dir_level), - ATTR_LIST(ram_thresh), - ATTR_LIST(ra_nid_pages), - ATTR_LIST(dirty_nats_ratio), - ATTR_LIST(cp_interval), - ATTR_LIST(idle_interval), -#ifdef CONFIG_F2FS_FAULT_INJECTION - ATTR_LIST(inject_rate), - ATTR_LIST(inject_type), -#endif - ATTR_LIST(lifetime_write_kbytes), - NULL, -}; - -static const struct sysfs_ops f2fs_attr_ops = { - .show = f2fs_attr_show, - .store = f2fs_attr_store, -}; - -static struct kobj_type f2fs_ktype = { - .default_attrs = f2fs_attrs, - .sysfs_ops = &f2fs_attr_ops, - .release = f2fs_sb_release, -}; - void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) { struct va_format vaf; @@ -585,6 +385,20 @@ static int parse_options(struct super_block *sb, char *options) case Opt_nolazytime: sb->s_flags &= ~MS_LAZYTIME; break; +#ifdef CONFIG_QUOTA + case Opt_usrquota: + set_opt(sbi, USRQUOTA); + break; + case Opt_grpquota: + set_opt(sbi, GRPQUOTA); + break; +#else + case Opt_usrquota: + case Opt_grpquota: + f2fs_msg(sb, KERN_INFO, + "quota operations not supported"); + break; +#endif default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -624,7 +438,12 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) mutex_init(&fi->inmem_lock); init_rwsem(&fi->dio_rwsem[READ]); init_rwsem(&fi->dio_rwsem[WRITE]); + init_rwsem(&fi->i_mmap_sem); +#ifdef CONFIG_QUOTA + memset(&fi->i_dquot, 0, sizeof(fi->i_dquot)); + fi->i_reserved_quota = 0; +#endif /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; return &fi->vfs_inode; @@ -765,18 +584,13 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) kfree(sbi->devs); } +static void f2fs_quota_off_umount(struct super_block *sb); static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; - if (sbi->s_proc) { - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry("segment_bits", sbi->s_proc); - remove_proc_entry(sb->s_id, f2fs_proc_root); - } - kobject_del(&sbi->s_kobj); - - stop_gc_thread(sbi); + f2fs_quota_off_umount(sb); /* prevent remaining shrinker jobs */ mutex_lock(&sbi->umount_mutex); @@ -797,7 +611,7 @@ static void f2fs_put_super(struct super_block *sb) /* be sure to wait for any on-going discard commands */ f2fs_wait_discard_bios(sbi); - if (!sbi->discard_blks) { + if (f2fs_discard_en(sbi) && !sbi->discard_blks) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; @@ -817,7 +631,7 @@ static void f2fs_put_super(struct super_block *sb) mutex_unlock(&sbi->umount_mutex); /* our cp_error case, we can wait for any writeback page */ - f2fs_flush_merged_bios(sbi); + f2fs_flush_merged_writes(sbi); iput(sbi->node_inode); iput(sbi->meta_inode); @@ -827,8 +641,8 @@ static void f2fs_put_super(struct super_block *sb) destroy_segment_manager(sbi); kfree(sbi->ckpt); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); + + f2fs_exit_sysfs(sbi); sb->s_fs_info = NULL; if (sbi->s_chksum_driver) @@ -838,6 +652,8 @@ static void f2fs_put_super(struct super_block *sb) destroy_device_list(sbi); mempool_destroy(sbi->write_io_dummy); destroy_percpu_info(sbi); + for (i = 0; i < NR_PAGE_TYPE; i++) + kfree(sbi->write_io[i]); kfree(sbi); } @@ -888,6 +704,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) struct f2fs_sb_info *sbi = F2FS_SB(sb); u64 id = huge_encode_dev(sb->s_bdev->bd_dev); block_t total_count, user_block_count, start_count, ovp_count; + u64 avail_node_count; total_count = le64_to_cpu(sbi->raw_super->block_count); user_block_count = sbi->user_block_count; @@ -898,11 +715,19 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; - buf->f_bavail = user_block_count - valid_user_blocks(sbi); + buf->f_bavail = user_block_count - valid_user_blocks(sbi) - + sbi->reserved_blocks; - buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; - buf->f_ffree = min(buf->f_files - valid_node_count(sbi), - buf->f_bavail); + avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + + if (avail_node_count > user_block_count) { + buf->f_files = user_block_count; + buf->f_ffree = buf->f_bavail; + } else { + buf->f_files = avail_node_count; + buf->f_ffree = min(avail_node_count - valid_node_count(sbi), + buf->f_bavail); + } buf->f_namelen = F2FS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; @@ -980,79 +805,19 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) - seq_puts(seq, ",fault_injection"); + seq_printf(seq, ",fault_injection=%u", + sbi->fault_info.inject_rate); +#endif +#ifdef CONFIG_QUOTA + if (test_opt(sbi, USRQUOTA)) + seq_puts(seq, ",usrquota"); + if (test_opt(sbi, GRPQUOTA)) + seq_puts(seq, ",grpquota"); #endif return 0; } -static int segment_info_seq_show(struct seq_file *seq, void *offset) -{ - struct super_block *sb = seq->private; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - unsigned int total_segs = - le32_to_cpu(sbi->raw_super->segment_count_main); - int i; - - seq_puts(seq, "format: segment_type|valid_blocks\n" - "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); - - for (i = 0; i < total_segs; i++) { - struct seg_entry *se = get_seg_entry(sbi, i); - - if ((i % 10) == 0) - seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u", se->type, - get_valid_blocks(sbi, i, false)); - if ((i % 10) == 9 || i == (total_segs - 1)) - seq_putc(seq, '\n'); - else - seq_putc(seq, ' '); - } - - return 0; -} - -static int segment_bits_seq_show(struct seq_file *seq, void *offset) -{ - struct super_block *sb = seq->private; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - unsigned int total_segs = - le32_to_cpu(sbi->raw_super->segment_count_main); - int i, j; - - seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" - "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); - - for (i = 0; i < total_segs; i++) { - struct seg_entry *se = get_seg_entry(sbi, i); - - seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u|", se->type, - get_valid_blocks(sbi, i, false)); - for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) - seq_printf(seq, " %.2x", se->cur_valid_map[j]); - seq_putc(seq, '\n'); - } - return 0; -} - -#define F2FS_PROC_FILE_DEF(_name) \ -static int _name##_open_fs(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, _name##_seq_show, PDE_DATA(inode)); \ -} \ - \ -static const struct file_operations f2fs_seq_##_name##_fops = { \ - .open = _name##_open_fs, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -}; - -F2FS_PROC_FILE_DEF(segment_info); -F2FS_PROC_FILE_DEF(segment_bits); - static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ @@ -1089,6 +854,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; + unsigned long old_sb_flags; int err, active_logs; bool need_restart_gc = false; bool need_stop_gc = false; @@ -1102,6 +868,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * need to restore them. */ org_mount_opt = sbi->mount_opt; + old_sb_flags = sb->s_flags; active_logs = sbi->active_logs; /* recover superblocks we couldn't write due to previous RO mount */ @@ -1113,7 +880,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_NEED_SB_WRITE); } - sbi->mount_opt.opt = 0; default_options(sbi); /* parse mount options */ @@ -1128,6 +894,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) goto skip; + if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { + err = dquot_suspend(sb, -1); + if (err < 0) + goto restore_opts; + } else { + /* dquot_resume needs RW */ + sb->s_flags &= ~MS_RDONLY; + dquot_resume(sb, -1); + } + /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; @@ -1192,12 +968,237 @@ restore_gc: restore_opts: sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; + sb->s_flags = old_sb_flags; #ifdef CONFIG_F2FS_FAULT_INJECTION sbi->fault_info = ffi; #endif return err; } +#ifdef CONFIG_QUOTA +/* Read data from quotafile */ +static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + struct address_space *mapping = inode->i_mapping; + block_t blkidx = F2FS_BYTES_TO_BLK(off); + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t toread; + loff_t i_size = i_size_read(inode); + struct page *page; + char *kaddr; + + if (off > i_size) + return 0; + + if (off + len > i_size) + len = i_size - off; + toread = len; + while (toread > 0) { + tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); +repeat: + page = read_mapping_page(mapping, blkidx, NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + f2fs_put_page(page, 1); + goto repeat; + } + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return -EIO; + } + + kaddr = kmap_atomic(page); + memcpy(data, kaddr + offset, tocopy); + kunmap_atomic(kaddr); + f2fs_put_page(page, 1); + + offset = 0; + toread -= tocopy; + data += tocopy; + blkidx++; + } + return len; +} + +/* Write to quotafile */ +static ssize_t f2fs_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + struct address_space *mapping = inode->i_mapping; + const struct address_space_operations *a_ops = mapping->a_ops; + int offset = off & (sb->s_blocksize - 1); + size_t towrite = len; + struct page *page; + char *kaddr; + int err = 0; + int tocopy; + + while (towrite > 0) { + tocopy = min_t(unsigned long, sb->s_blocksize - offset, + towrite); + + err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, + &page, NULL); + if (unlikely(err)) + break; + + kaddr = kmap_atomic(page); + memcpy(kaddr + offset, data, tocopy); + kunmap_atomic(kaddr); + flush_dcache_page(page); + + a_ops->write_end(NULL, mapping, off, tocopy, tocopy, + page, NULL); + offset = 0; + towrite -= tocopy; + off += tocopy; + data += tocopy; + cond_resched(); + } + + if (len == towrite) + return err; + inode->i_version++; + inode->i_mtime = inode->i_ctime = current_time(inode); + f2fs_mark_inode_dirty_sync(inode, false); + return len - towrite; +} + +static struct dquot **f2fs_get_dquots(struct inode *inode) +{ + return F2FS_I(inode)->i_dquot; +} + +static qsize_t *f2fs_get_reserved_space(struct inode *inode) +{ + return &F2FS_I(inode)->i_reserved_quota; +} + +static int f2fs_quota_sync(struct super_block *sb, int type) +{ + struct quota_info *dqopt = sb_dqopt(sb); + int cnt; + int ret; + + ret = dquot_writeback_dquots(sb, type); + if (ret) + return ret; + + /* + * Now when everything is written we can discard the pagecache so + * that userspace sees the changes. + */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (type != -1 && cnt != type) + continue; + if (!sb_has_quota_active(sb, cnt)) + continue; + + ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping); + if (ret) + return ret; + + inode_lock(dqopt->files[cnt]); + truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); + inode_unlock(dqopt->files[cnt]); + } + return 0; +} + +static int f2fs_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path) +{ + struct inode *inode; + int err; + + err = f2fs_quota_sync(sb, -1); + if (err) + return err; + + err = dquot_quota_on(sb, type, format_id, path); + if (err) + return err; + + inode = d_inode(path->dentry); + + inode_lock(inode); + F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL; + inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, + S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + f2fs_mark_inode_dirty_sync(inode, false); + + return 0; +} + +static int f2fs_quota_off(struct super_block *sb, int type) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + int err; + + if (!inode || !igrab(inode)) + return dquot_quota_off(sb, type); + + f2fs_quota_sync(sb, -1); + + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL); + inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + f2fs_mark_inode_dirty_sync(inode, false); +out_put: + iput(inode); + return err; +} + +static void f2fs_quota_off_umount(struct super_block *sb) +{ + int type; + + for (type = 0; type < MAXQUOTAS; type++) + f2fs_quota_off(sb, type); +} + +static const struct dquot_operations f2fs_quota_operations = { + .get_reserved_space = f2fs_get_reserved_space, + .write_dquot = dquot_commit, + .acquire_dquot = dquot_acquire, + .release_dquot = dquot_release, + .mark_dirty = dquot_mark_dquot_dirty, + .write_info = dquot_commit_info, + .alloc_dquot = dquot_alloc, + .destroy_dquot = dquot_destroy, + .get_next_id = dquot_get_next_id, +}; + +static const struct quotactl_ops f2fs_quotactl_ops = { + .quota_on = f2fs_quota_on, + .quota_off = f2fs_quota_off, + .quota_sync = f2fs_quota_sync, + .get_state = dquot_get_state, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, + .get_nextdqblk = dquot_get_next_dqblk, +}; +#else +static inline void f2fs_quota_off_umount(struct super_block *sb) +{ +} +#endif + static struct super_operations f2fs_sops = { .alloc_inode = f2fs_alloc_inode, .drop_inode = f2fs_drop_inode, @@ -1205,6 +1206,11 @@ static struct super_operations f2fs_sops = { .write_inode = f2fs_write_inode, .dirty_inode = f2fs_dirty_inode, .show_options = f2fs_show_options, +#ifdef CONFIG_QUOTA + .quota_read = f2fs_quota_read, + .quota_write = f2fs_quota_write, + .get_dquots = f2fs_get_dquots, +#endif .evict_inode = f2fs_evict_inode, .put_super = f2fs_put_super, .sync_fs = f2fs_sync_fs, @@ -1521,6 +1527,8 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned int ovp_segments, reserved_segments; + unsigned int main_segs, blocks_per_seg; + int i; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); @@ -1542,6 +1550,20 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + main_segs = le32_to_cpu(raw_super->segment_count_main); + blocks_per_seg = sbi->blocks_per_seg; + + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || + le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) + return 1; + } + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { + if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || + le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) + return 1; + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; @@ -1552,7 +1574,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; - int i; + int i, j; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -1584,8 +1606,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - mutex_init(&sbi->wio_mutex[NODE]); - mutex_init(&sbi->wio_mutex[DATA]); + for (i = 0; i < NR_PAGE_TYPE - 1; i++) + for (j = HOT; j < NR_TEMP_TYPE; j++) + mutex_init(&sbi->wio_mutex[i][j]); spin_lock_init(&sbi->cp_lock); } @@ -1908,6 +1931,7 @@ try_onemore: if (f2fs_sb_mounted_blkzoned(sb)) { f2fs_msg(sb, KERN_ERR, "Zoned block device support is not enabled\n"); + err = -EOPNOTSUPP; goto free_sb_buf; } #endif @@ -1929,6 +1953,12 @@ try_onemore: sb->s_max_links = F2FS_LINK_MAX; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); +#ifdef CONFIG_QUOTA + sb->dq_op = &f2fs_quota_operations; + sb->s_qcop = &f2fs_quotactl_ops; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; +#endif + sb->s_op = &f2fs_sops; sb->s_cop = &f2fs_cryptops; sb->s_xattr = f2fs_xattr_handlers; @@ -1950,13 +1980,24 @@ try_onemore: set_sbi_flag(sbi, SBI_POR_DOING); spin_lock_init(&sbi->stat_lock); - init_rwsem(&sbi->read_io.io_rwsem); - sbi->read_io.sbi = sbi; - sbi->read_io.bio = NULL; for (i = 0; i < NR_PAGE_TYPE; i++) { - init_rwsem(&sbi->write_io[i].io_rwsem); - sbi->write_io[i].sbi = sbi; - sbi->write_io[i].bio = NULL; + int n = (i == META) ? 1: NR_TEMP_TYPE; + int j; + + sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info), + GFP_KERNEL); + if (!sbi->write_io[i]) { + err = -ENOMEM; + goto free_options; + } + + for (j = HOT; j < n; j++) { + init_rwsem(&sbi->write_io[i][j].io_rwsem); + sbi->write_io[i][j].sbi = sbi; + sbi->write_io[i][j].bio = NULL; + spin_lock_init(&sbi->write_io[i][j].io_lock); + INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); + } } init_rwsem(&sbi->cp_rwsem); @@ -1970,8 +2011,10 @@ try_onemore: if (F2FS_IO_SIZE(sbi) > 1) { sbi->write_io_dummy = mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0); - if (!sbi->write_io_dummy) + if (!sbi->write_io_dummy) { + err = -ENOMEM; goto free_options; + } } /* get an inode for meta space */ @@ -2003,6 +2046,7 @@ try_onemore: sbi->total_valid_block_count = le64_to_cpu(sbi->ckpt->valid_block_count); sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->reserved_blocks = 0; for (i = 0; i < NR_INODE_TYPE; i++) { INIT_LIST_HEAD(&sbi->inode_list[i]); @@ -2078,22 +2122,9 @@ try_onemore: goto free_root_inode; } - if (f2fs_proc_root) - sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); - - if (sbi->s_proc) { - proc_create_data("segment_info", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_info_fops, sb); - proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_bits_fops, sb); - } - - sbi->s_kobj.kset = f2fs_kset; - init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, - "%s", sb->s_id); + err = f2fs_init_sysfs(sbi); if (err) - goto free_proc; + goto free_root_inode; /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -2104,7 +2135,7 @@ try_onemore: if (bdev_read_only(sb->s_bdev) && !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { err = -EROFS; - goto free_kobj; + goto free_sysfs; } if (need_fsck) @@ -2118,7 +2149,7 @@ try_onemore: need_fsck = true; f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%d", err); - goto free_kobj; + goto free_sysfs; } } else { err = recover_fsync_data(sbi, true); @@ -2127,7 +2158,7 @@ try_onemore: err = -EINVAL; f2fs_msg(sb, KERN_ERR, "Need to recover fsync data"); - goto free_kobj; + goto free_sysfs; } } skip_recovery: @@ -2142,7 +2173,7 @@ skip_recovery: /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) - goto free_kobj; + goto free_sysfs; } kfree(options); @@ -2160,17 +2191,9 @@ skip_recovery: f2fs_update_time(sbi, REQ_TIME); return 0; -free_kobj: +free_sysfs: f2fs_sync_inode_meta(sbi); - kobject_del(&sbi->s_kobj); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); -free_proc: - if (sbi->s_proc) { - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry("segment_bits", sbi->s_proc); - remove_proc_entry(sb->s_id, f2fs_proc_root); - } + f2fs_exit_sysfs(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; @@ -2202,6 +2225,8 @@ free_meta_inode: free_io_dummy: mempool_destroy(sbi->write_io_dummy); free_options: + for (i = 0; i < NR_PAGE_TYPE; i++) + kfree(sbi->write_io[i]); destroy_percpu_info(sbi); kfree(options); free_sb_buf: @@ -2228,8 +2253,11 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, static void kill_f2fs_super(struct super_block *sb) { - if (sb->s_root) + if (sb->s_root) { set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE); + stop_gc_thread(F2FS_SB(sb)); + stop_discard_thread(F2FS_SB(sb)); + } kill_block_super(sb); } @@ -2283,30 +2311,26 @@ static int __init init_f2fs_fs(void) err = create_extent_cache(); if (err) goto free_checkpoint_caches; - f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); - if (!f2fs_kset) { - err = -ENOMEM; + err = f2fs_register_sysfs(); + if (err) goto free_extent_cache; - } err = register_shrinker(&f2fs_shrinker_info); if (err) - goto free_kset; - + goto free_sysfs; err = register_filesystem(&f2fs_fs_type); if (err) goto free_shrinker; err = f2fs_create_root_stats(); if (err) goto free_filesystem; - f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); return 0; free_filesystem: unregister_filesystem(&f2fs_fs_type); free_shrinker: unregister_shrinker(&f2fs_shrinker_info); -free_kset: - kset_unregister(f2fs_kset); +free_sysfs: + f2fs_unregister_sysfs(); free_extent_cache: destroy_extent_cache(); free_checkpoint_caches: @@ -2323,11 +2347,10 @@ fail: static void __exit exit_f2fs_fs(void) { - remove_proc_entry("fs/f2fs", NULL); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); unregister_shrinker(&f2fs_shrinker_info); - kset_unregister(f2fs_kset); + f2fs_unregister_sysfs(); destroy_extent_cache(); destroy_checkpoint_caches(); destroy_segment_manager_caches(); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c new file mode 100644 index 000000000000..9adc202fcd6f --- /dev/null +++ b/fs/f2fs/sysfs.c @@ -0,0 +1,364 @@ +/* + * f2fs sysfs interface + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Copyright (c) 2017 Chao Yu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +#include "f2fs.h" +#include "segment.h" +#include "gc.h" + +static struct proc_dir_entry *f2fs_proc_root; +static struct kset *f2fs_kset; + +/* Sysfs support for f2fs */ +enum { + GC_THREAD, /* struct f2fs_gc_thread */ + SM_INFO, /* struct f2fs_sm_info */ + DCC_INFO, /* struct discard_cmd_control */ + NM_INFO, /* struct f2fs_nm_info */ + F2FS_SBI, /* struct f2fs_sb_info */ +#ifdef CONFIG_F2FS_FAULT_INJECTION + FAULT_INFO_RATE, /* struct f2fs_fault_info */ + FAULT_INFO_TYPE, /* struct f2fs_fault_info */ +#endif + RESERVED_BLOCKS, +}; + +struct f2fs_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); + ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, + const char *, size_t); + int struct_type; + int offset; +}; + +static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) +{ + if (struct_type == GC_THREAD) + return (unsigned char *)sbi->gc_thread; + else if (struct_type == SM_INFO) + return (unsigned char *)SM_I(sbi); + else if (struct_type == DCC_INFO) + return (unsigned char *)SM_I(sbi)->dcc_info; + else if (struct_type == NM_INFO) + return (unsigned char *)NM_I(sbi); + else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS) + return (unsigned char *)sbi; +#ifdef CONFIG_F2FS_FAULT_INJECTION + else if (struct_type == FAULT_INFO_RATE || + struct_type == FAULT_INFO_TYPE) + return (unsigned char *)&sbi->fault_info; +#endif + return NULL; +} + +static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->sb; + + if (!sb->s_bdev->bd_part) + return snprintf(buf, PAGE_SIZE, "0\n"); + + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)(sbi->kbytes_written + + BD_PART_WRITTEN(sbi))); +} + +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + unsigned char *ptr = NULL; + unsigned int *ui; + + ptr = __struct_ptr(sbi, a->struct_type); + if (!ptr) + return -EINVAL; + + ui = (unsigned int *)(ptr + a->offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned char *ptr; + unsigned long t; + unsigned int *ui; + ssize_t ret; + + ptr = __struct_ptr(sbi, a->struct_type); + if (!ptr) + return -EINVAL; + + ui = (unsigned int *)(ptr + a->offset); + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret < 0) + return ret; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) + return -EINVAL; +#endif + if (a->struct_type == RESERVED_BLOCKS) { + spin_lock(&sbi->stat_lock); + if ((unsigned long)sbi->total_valid_block_count + t > + (unsigned long)sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return -EINVAL; + } + *ui = t; + spin_unlock(&sbi->stat_lock); + return count; + } + *ui = t; + return count; +} + +static ssize_t f2fs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void f2fs_sb_release(struct kobject *kobj) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .struct_type = _struct_type, \ + .offset = _offset \ +} + +#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \ + F2FS_ATTR_OFFSET(struct_type, name, 0644, \ + f2fs_sbi_show, f2fs_sbi_store, \ + offsetof(struct struct_name, elname)) + +#define F2FS_GENERAL_RO_ATTR(name) \ +static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL) + +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); +F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); +#ifdef CONFIG_F2FS_FAULT_INJECTION +F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); +F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); +#endif +F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); + +#define ATTR_LIST(name) (&f2fs_attr_##name.attr) +static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_min_sleep_time), + ATTR_LIST(gc_max_sleep_time), + ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_idle), + ATTR_LIST(reclaim_segments), + ATTR_LIST(max_small_discards), + ATTR_LIST(batched_trim_sections), + ATTR_LIST(ipu_policy), + ATTR_LIST(min_ipu_util), + ATTR_LIST(min_fsync_blocks), + ATTR_LIST(min_hot_blocks), + ATTR_LIST(max_victim_search), + ATTR_LIST(dir_level), + ATTR_LIST(ram_thresh), + ATTR_LIST(ra_nid_pages), + ATTR_LIST(dirty_nats_ratio), + ATTR_LIST(cp_interval), + ATTR_LIST(idle_interval), +#ifdef CONFIG_F2FS_FAULT_INJECTION + ATTR_LIST(inject_rate), + ATTR_LIST(inject_type), +#endif + ATTR_LIST(lifetime_write_kbytes), + ATTR_LIST(reserved_blocks), + NULL, +}; + +static const struct sysfs_ops f2fs_attr_ops = { + .show = f2fs_attr_show, + .store = f2fs_attr_store, +}; + +static struct kobj_type f2fs_ktype = { + .default_attrs = f2fs_attrs, + .sysfs_ops = &f2fs_attr_ops, + .release = f2fs_sb_release, +}; + +static int segment_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = + le32_to_cpu(sbi->raw_super->segment_count_main); + int i; + + seq_puts(seq, "format: segment_type|valid_blocks\n" + "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); + + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + + if ((i % 10) == 0) + seq_printf(seq, "%-10d", i); + seq_printf(seq, "%d|%-3u", se->type, + get_valid_blocks(sbi, i, false)); + if ((i % 10) == 9 || i == (total_segs - 1)) + seq_putc(seq, '\n'); + else + seq_putc(seq, ' '); + } + + return 0; +} + +static int segment_bits_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = + le32_to_cpu(sbi->raw_super->segment_count_main); + int i, j; + + seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" + "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); + + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + + seq_printf(seq, "%-10d", i); + seq_printf(seq, "%d|%-3u|", se->type, + get_valid_blocks(sbi, i, false)); + for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) + seq_printf(seq, " %.2x", se->cur_valid_map[j]); + seq_putc(seq, '\n'); + } + return 0; +} + +#define F2FS_PROC_FILE_DEF(_name) \ +static int _name##_open_fs(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, _name##_seq_show, PDE_DATA(inode)); \ +} \ + \ +static const struct file_operations f2fs_seq_##_name##_fops = { \ + .open = _name##_open_fs, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +}; + +F2FS_PROC_FILE_DEF(segment_info); +F2FS_PROC_FILE_DEF(segment_bits); + +int __init f2fs_register_sysfs(void) +{ + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + + f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); + if (!f2fs_kset) + return -ENOMEM; + return 0; +} + +void f2fs_unregister_sysfs(void) +{ + kset_unregister(f2fs_kset); + remove_proc_entry("fs/f2fs", NULL); +} + +int f2fs_init_sysfs(struct f2fs_sb_info *sbi) +{ + struct super_block *sb = sbi->sb; + int err; + + if (f2fs_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + + if (sbi->s_proc) { + proc_create_data("segment_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_info_fops, sb); + proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_bits_fops, sb); + } + + sbi->s_kobj.kset = f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, + "%s", sb->s_id); + if (err) + goto err_out; + return 0; +err_out: + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry("segment_bits", sbi->s_proc); + remove_proc_entry(sb->s_id, f2fs_proc_root); + } + return err; +} + +void f2fs_exit_sysfs(struct f2fs_sb_info *sbi) +{ + kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry("segment_bits", sbi->s_proc); + remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); + } +} diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 15da88c5c3a4..6f77a2755abb 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -19,6 +19,9 @@ TRACE_DEFINE_ENUM(INMEM_INVALIDATE); TRACE_DEFINE_ENUM(INMEM_REVOKE); TRACE_DEFINE_ENUM(IPU); TRACE_DEFINE_ENUM(OPU); +TRACE_DEFINE_ENUM(HOT); +TRACE_DEFINE_ENUM(WARM); +TRACE_DEFINE_ENUM(COLD); TRACE_DEFINE_ENUM(CURSEG_HOT_DATA); TRACE_DEFINE_ENUM(CURSEG_WARM_DATA); TRACE_DEFINE_ENUM(CURSEG_COLD_DATA); @@ -59,6 +62,12 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { IPU, "IN-PLACE" }, \ { OPU, "OUT-OF-PLACE" }) +#define show_block_temp(temp) \ + __print_symbolic(temp, \ + { HOT, "HOT" }, \ + { WARM, "WARM" }, \ + { COLD, "COLD" }) + #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_PREFLUSH | REQ_FUA) #define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS) @@ -757,6 +766,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __field(block_t, new_blkaddr) __field(int, op) __field(int, op_flags) + __field(int, temp) __field(int, type) ), @@ -768,16 +778,18 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __entry->new_blkaddr = fio->new_blkaddr; __entry->op = fio->op; __entry->op_flags = fio->op_flags; + __entry->temp = fio->temp; __entry->type = fio->type; ), TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s", + "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s_%s", show_dev_ino(__entry), (unsigned long)__entry->index, (unsigned long long)__entry->old_blkaddr, (unsigned long long)__entry->new_blkaddr, show_bio_type(__entry->op, __entry->op_flags), + show_block_temp(__entry->temp), show_block_type(__entry->type)) ); @@ -790,7 +802,7 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, TP_CONDITION(page->mapping) ); -DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_write, TP_PROTO(struct page *page, struct f2fs_io_info *fio),