diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f9b3e0a83526..a33d8fb1bf2a 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -243,8 +243,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, */ ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), sb->s_blocksize * 8, bh->b_data); - ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); - ext4_group_desc_csum_set(sb, block_group, gdp); return 0; } @@ -340,20 +338,25 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, /* check whether block bitmap block number is set */ blk = ext4_block_bitmap(sb, desc); offset = blk - group_first_block; - if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) + if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize || + !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) /* bad block bitmap */ return blk; /* check whether the inode bitmap block number is set */ blk = ext4_inode_bitmap(sb, desc); offset = blk - group_first_block; - if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) + if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize || + !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) /* bad block bitmap */ return blk; /* check whether the inode table block number is set */ blk = ext4_inode_table(sb, desc); offset = blk - group_first_block; + if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize || + EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= sb->s_blocksize) + return blk; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, EXT4_B2C(sbi, offset + sbi->s_itb_per_group), EXT4_B2C(sbi, offset)); @@ -419,6 +422,7 @@ struct buffer_head * ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) { struct ext4_group_desc *desc; + struct ext4_sb_info *sbi = EXT4_SB(sb); struct buffer_head *bh; ext4_fsblk_t bitmap_blk; int err; @@ -427,6 +431,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) if (!desc) return ERR_PTR(-EFSCORRUPTED); bitmap_blk = ext4_block_bitmap(sb, desc); + if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (bitmap_blk >= ext4_blocks_count(sbi->s_es))) { + ext4_error(sb, "Invalid block bitmap block %llu in " + "block_group %u", bitmap_blk, block_group); + return ERR_PTR(-EFSCORRUPTED); + } bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { ext4_error(sb, "Cannot get buffer for block bitmap - " @@ -448,6 +458,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); + set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); if (err) { diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index da87cf757f7d..e2902d394f1b 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -365,13 +365,15 @@ static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; int dx_dir = is_dx_dir(inode); - loff_t htree_max = ext4_get_htree_eof(file); + loff_t ret, htree_max = ext4_get_htree_eof(file); if (likely(dx_dir)) - return generic_file_llseek_size(file, offset, whence, + ret = generic_file_llseek_size(file, offset, whence, htree_max, htree_max); else - return ext4_llseek(file, offset, whence); + ret = ext4_llseek(file, offset, whence); + file->f_version = inode_peek_iversion(inode) - 1; + return ret; } /* diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3241475a1733..a42e71203e53 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1522,8 +1522,6 @@ enum { EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ EXT4_STATE_NEWENTRY, /* File just added to dir */ - EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read - nolocking */ EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */ @@ -3181,21 +3179,6 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); } -/* - * Disable DIO read nolock optimization, so new dioreaders will be forced - * to grab i_mutex - */ -static inline void ext4_inode_block_unlocked_dio(struct inode *inode) -{ - ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); - smp_mb(); -} -static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) -{ - smp_mb(); - ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); -} - #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) /* For ioend & aio unwritten conversion wait queues */ diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 2d593201cf7a..7c70b08d104c 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -166,13 +166,6 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line, might_sleep(); if (ext4_handle_valid(handle)) { - struct super_block *sb; - - sb = handle->h_transaction->t_journal->j_private; - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) { - jbd2_journal_abort_handle(handle); - return -EIO; - } err = jbd2_journal_get_write_access(handle, bh); if (err) ext4_journal_abort_handle(where, line, __func__, bh, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 054416e9d827..0a7315961bac 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4796,7 +4796,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, flags |= EXT4_GET_BLOCKS_KEEP_SIZE; /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); /* Preallocate the range including the unaligned edges */ @@ -4807,7 +4806,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, round_down(offset, 1 << blkbits)) >> blkbits, new_size, flags); if (ret) - goto out_dio; + goto out_mutex; } @@ -4824,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, ret = ext4_update_disksize_before_punch(inode, offset, len); if (ret) { up_write(&EXT4_I(inode)->i_mmap_sem); - goto out_dio; + goto out_mutex; } /* Now release the pages and zero block aligned part of pages */ truncate_pagecache_range(inode, start, end - 1); @@ -4834,10 +4833,10 @@ static long ext4_zero_range(struct file *file, loff_t offset, flags); up_write(&EXT4_I(inode)->i_mmap_sem); if (ret) - goto out_dio; + goto out_mutex; } if (!partial_begin && !partial_end) - goto out_dio; + goto out_mutex; /* * In worst case we have to writeout two nonadjacent unwritten @@ -4850,7 +4849,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (IS_ERR(handle)) { ret = PTR_ERR(handle); ext4_std_error(inode->i_sb, ret); - goto out_dio; + goto out_mutex; } inode->i_mtime = inode->i_ctime = current_time(inode); @@ -4875,8 +4874,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, ext4_handle_sync(handle); ext4_journal_stop(handle); -out_dio: - ext4_inode_resume_unlocked_dio(inode); out_mutex: inode_unlock(inode); return ret; @@ -4964,11 +4961,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) } /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); - ext4_inode_resume_unlocked_dio(inode); if (ret) goto out; @@ -5485,7 +5480,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) } /* Wait for existing dio to complete */ - ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); /* @@ -5562,7 +5556,6 @@ out_stop: ext4_journal_stop(handle); out_mmap: up_write(&EXT4_I(inode)->i_mmap_sem); - ext4_inode_resume_unlocked_dio(inode); out_mutex: inode_unlock(inode); return ret; @@ -5635,7 +5628,6 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) } /* Wait for existing dio to complete */ - ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); /* @@ -5737,7 +5729,6 @@ out_stop: ext4_journal_stop(handle); out_mmap: up_write(&EXT4_I(inode)->i_mmap_sem); - ext4_inode_resume_unlocked_dio(inode); out_mutex: inode_unlock(inode); return ret; @@ -5751,7 +5742,7 @@ out_mutex: * @lblk1: Start block for first inode * @lblk2: Start block for second inode * @count: Number of blocks to swap - * @mark_unwritten: Mark second inode's extents as unwritten after swap + * @unwritten: Mark second inode's extents as unwritten after swap * @erp: Pointer to save error value * * This helper routine does exactly what is promise "swap extents". All other @@ -5765,7 +5756,7 @@ out_mutex: */ int ext4_swap_extents(handle_t *handle, struct inode *inode1, - struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2, + struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2, ext4_lblk_t count, int unwritten, int *erp) { struct ext4_ext_path *path1 = NULL; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 7830d28df331..df92e3ec9913 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -66,44 +66,6 @@ void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); } -/* Initializes an uninitialized inode bitmap */ -static int ext4_init_inode_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t block_group, - struct ext4_group_desc *gdp) -{ - struct ext4_group_info *grp; - struct ext4_sb_info *sbi = EXT4_SB(sb); - J_ASSERT_BH(bh, buffer_locked(bh)); - - /* If checksum is bad mark all blocks and inodes use to prevent - * allocation, essentially implementing a per-group read-only flag. */ - if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - grp = ext4_get_group_info(sb, block_group); - if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) - percpu_counter_sub(&sbi->s_freeclusters_counter, - grp->bb_free); - set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); - if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { - int count; - count = ext4_free_inodes_count(sb, gdp); - percpu_counter_sub(&sbi->s_freeinodes_counter, - count); - } - set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return -EFSBADCRC; - } - - memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); - ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, - bh->b_data); - ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_INODES_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, block_group, gdp); - - return 0; -} - void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) { if (uptodate) { @@ -160,6 +122,7 @@ static struct buffer_head * ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) { struct ext4_group_desc *desc; + struct ext4_sb_info *sbi = EXT4_SB(sb); struct buffer_head *bh = NULL; ext4_fsblk_t bitmap_blk; int err; @@ -169,6 +132,12 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) return ERR_PTR(-EFSCORRUPTED); bitmap_blk = ext4_inode_bitmap(sb, desc); + if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (bitmap_blk >= ext4_blocks_count(sbi->s_es))) { + ext4_error(sb, "Invalid inode bitmap blk %llu in " + "block_group %u", bitmap_blk, block_group); + return ERR_PTR(-EFSCORRUPTED); + } bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { ext4_error(sb, "Cannot read inode bitmap - " @@ -187,17 +156,14 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - err = ext4_init_inode_bitmap(sb, bh, block_group, desc); + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); + ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), + sb->s_blocksize * 8, bh->b_data); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) { - ext4_error(sb, "Failed to init inode bitmap for group " - "%u: %d", block_group, err); - goto out; - } return bh; } ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c94780075b04..18aa2ef963ad 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2694,15 +2694,6 @@ out: return err; } -static int __writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct address_space *mapping = data; - int ret = ext4_writepage(page, wbc); - mapping_set_error(mapping, ret); - return ret; -} - static int ext4_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -2740,11 +2731,7 @@ static int ext4_writepages(struct address_space *mapping, goto out_writepages; if (ext4_should_journal_data(inode)) { - struct blk_plug plug; - - blk_start_plug(&plug); - ret = write_cache_pages(mapping, wbc, __writepage, mapping); - blk_finish_plug(&plug); + ret = generic_writepages(mapping, wbc); goto out_writepages; } @@ -3524,7 +3511,7 @@ retry: iomap->flags |= IOMAP_F_DIRTY; iomap->bdev = inode->i_sb->s_bdev; iomap->dax_dev = sbi->s_daxdev; - iomap->offset = first_block << blkbits; + iomap->offset = (u64)first_block << blkbits; iomap->length = (u64)map.m_len << blkbits; if (ret == 0) { @@ -3669,7 +3656,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) int orphan = 0; handle_t *handle; - if (final_size > inode->i_size) { + if (final_size > inode->i_size || final_size > ei->i_disksize) { /* Credits for sb + inode write */ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) { @@ -3682,7 +3669,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) goto out; } orphan = 1; - ei->i_disksize = inode->i_size; + ext4_update_i_disksize(inode, inode->i_size); ext4_journal_stop(handle); } @@ -3789,9 +3776,10 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) ext4_orphan_del(handle, inode); if (ret > 0) { loff_t end = offset + ret; - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); + if (end > inode->i_size || end > ei->i_disksize) { + ext4_update_i_disksize(inode, end); + if (end > inode->i_size) + i_size_write(inode, end); /* * We're going to return a positive `ret' * here due to non-zero-length I/O, so there's @@ -4251,7 +4239,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); /* @@ -4324,7 +4311,6 @@ out_stop: ext4_journal_stop(handle); out_dio: up_write(&EXT4_I(inode)->i_mmap_sem); - ext4_inode_resume_unlocked_dio(inode); out_mutex: inode_unlock(inode); return ret; @@ -4746,6 +4732,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) goto bad_inode; raw_inode = ext4_raw_inode(&iloc); + if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) { + EXT4_ERROR_INODE(inode, "root inode unallocated"); + ret = -EFSCORRUPTED; + goto bad_inode; + } + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > @@ -5506,9 +5498,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) */ if (orphan) { if (!ext4_should_journal_data(inode)) { - ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); - ext4_inode_resume_unlocked_dio(inode); } else ext4_wait_for_tail_page_commit(inode); } @@ -5999,7 +5989,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return -EROFS; /* Wait for all existing dio workers */ - ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); /* @@ -6015,7 +6004,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = filemap_write_and_wait(inode->i_mapping); if (err < 0) { up_write(&EXT4_I(inode)->i_mmap_sem); - ext4_inode_resume_unlocked_dio(inode); return err; } } @@ -6038,7 +6026,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) if (err < 0) { jbd2_journal_unlock_updates(journal); percpu_up_write(&sbi->s_journal_flag_rwsem); - ext4_inode_resume_unlocked_dio(inode); return err; } ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); @@ -6050,7 +6037,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) if (val) up_write(&EXT4_I(inode)->i_mmap_sem); - ext4_inode_resume_unlocked_dio(inode); /* Finally we can mark the inode as dirty. */ diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7e99ad02f1ba..a7074115d6f6 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -124,8 +124,6 @@ static long swap_inode_boot_loader(struct super_block *sb, truncate_inode_pages(&inode_bl->i_data, 0); /* Wait for all existing dio workers */ - ext4_inode_block_unlocked_dio(inode); - ext4_inode_block_unlocked_dio(inode_bl); inode_dio_wait(inode); inode_dio_wait(inode_bl); @@ -186,8 +184,6 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_double_up_write_data_sem(inode, inode_bl); journal_err_out: - ext4_inode_resume_unlocked_dio(inode); - ext4_inode_resume_unlocked_dio(inode_bl); unlock_two_nondirectories(inode, inode_bl); iput(inode_bl); return err; @@ -481,6 +477,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg) return 0; ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags); + trace_ext4_shutdown(sb, flags); switch (flags) { case EXT4_GOING_FLAGS_DEFAULT: @@ -492,15 +489,13 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg) set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) { (void) ext4_force_commit(sb); - jbd2_journal_abort(sbi->s_journal, 0); + jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN); } break; case EXT4_GOING_FLAGS_NOLOGFLUSH: set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); - if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) { - msleep(100); - jbd2_journal_abort(sbi->s_journal, 0); - } + if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) + jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN); break; default: return -EINVAL; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index b96e4bd3b3ec..8e17efdcbf11 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -601,8 +601,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, lock_two_nondirectories(orig_inode, donor_inode); /* Wait for all existing dio workers */ - ext4_inode_block_unlocked_dio(orig_inode); - ext4_inode_block_unlocked_dio(donor_inode); inode_dio_wait(orig_inode); inode_dio_wait(donor_inode); @@ -693,8 +691,6 @@ out: ext4_ext_drop_refs(path); kfree(path); ext4_double_up_write_data_sem(orig_inode, donor_inode); - ext4_inode_resume_unlocked_dio(orig_inode); - ext4_inode_resume_unlocked_dio(donor_inode); unlock_two_nondirectories(orig_inode, donor_inode); return ret; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 39bf464c35f1..185f7e61f4cf 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -101,15 +101,13 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, * i_data_sem (rw) * * truncate: - * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) -> - * i_mmap_rwsem (w) -> page lock - * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) -> - * transaction start -> i_data_sem (rw) + * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock + * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start -> + * i_data_sem (rw) * * direct IO: - * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem - * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> - * transaction start -> i_data_sem (rw) + * sb_start_write -> i_mutex -> mmap_sem + * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw) * * writepages: * transaction start -> page lock(s) -> i_data_sem (rw) @@ -448,6 +446,7 @@ void __ext4_error(struct super_block *sb, const char *function, if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) return; + trace_ext4_error(sb, function, line); if (ext4_error_ratelimit(sb)) { va_start(args, fmt); vaf.fmt = fmt; @@ -472,6 +471,7 @@ void __ext4_error_inode(struct inode *inode, const char *function, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return; + trace_ext4_error(inode->i_sb, function, line); es->s_last_error_ino = cpu_to_le32(inode->i_ino); es->s_last_error_block = cpu_to_le64(block); if (ext4_error_ratelimit(inode->i_sb)) { @@ -507,6 +507,7 @@ void __ext4_error_file(struct file *file, const char *function, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return; + trace_ext4_error(inode->i_sb, function, line); es = EXT4_SB(inode->i_sb)->s_es; es->s_last_error_ino = cpu_to_le32(inode->i_ino); if (ext4_error_ratelimit(inode->i_sb)) { @@ -719,6 +720,7 @@ __acquires(bitlock) if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) return; + trace_ext4_error(sb, function, line); es->s_last_error_ino = cpu_to_le32(ino); es->s_last_error_block = cpu_to_le64(block); __save_error_info(sb, function, line); @@ -2019,7 +2021,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; + int def_errors, def_mount_opt = sbi->s_def_mount_opt; const struct mount_opts *m; char sep = nodefs ? '\n' : ','; @@ -2034,7 +2036,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || (m->flags & MOPT_CLEAR_ERR)) continue; - if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) + if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) continue; /* skip if same as the default */ if ((want_set && (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || @@ -2068,7 +2070,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PUTS("i_version"); if (nodefs || sbi->s_stripe) SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); - if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { + if (nodefs || EXT4_MOUNT_DATA_FLAGS & + (sbi->s_mount_opt ^ def_mount_opt)) { if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) SEQ_OPTS_PUTS("data=journal"); else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) @@ -2081,7 +2084,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("inode_readahead_blks=%u", sbi->s_inode_readahead_blks); - if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && + if (test_opt(sb, INIT_INODE_TABLE) && (nodefs || (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); if (nodefs || sbi->s_max_dir_size_kb) @@ -2333,6 +2336,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u overlaps " "superblock", i); + if (!sb_rdonly(sb)) + return 0; } if (block_bitmap < first_block || block_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -2345,6 +2350,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u overlaps " "superblock", i); + if (!sb_rdonly(sb)) + return 0; } if (inode_bitmap < first_block || inode_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -2357,6 +2364,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode table for group %u overlaps " "superblock", i); + if (!sb_rdonly(sb)) + return 0; } if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { @@ -3490,15 +3499,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } /* Load the checksum driver */ - if (ext4_has_feature_metadata_csum(sb) || - ext4_has_feature_ea_inode(sb)) { - sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); - ret = PTR_ERR(sbi->s_chksum_driver); - sbi->s_chksum_driver = NULL; - goto failed_mount; - } + sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); + if (IS_ERR(sbi->s_chksum_driver)) { + ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); + ret = PTR_ERR(sbi->s_chksum_driver); + sbi->s_chksum_driver = NULL; + goto failed_mount; } /* Check superblock checksum */ @@ -3660,6 +3666,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_INFO, "mounting ext2 file system " "using the ext4 subsystem"); else { + /* + * If we're probing be silent, if this looks like + * it's actually an ext[34] filesystem. + */ + if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) + goto failed_mount; ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " "to feature incompatibilities"); goto failed_mount; @@ -3671,6 +3683,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_INFO, "mounting ext3 file system " "using the ext4 subsystem"); else { + /* + * If we're probing be silent, if this looks like + * it's actually an ext4 filesystem. + */ + if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) + goto failed_mount; ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " "to feature incompatibilities"); goto failed_mount; @@ -4094,10 +4112,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * cope, else JOURNAL_DATA */ if (jbd2_journal_check_available_features - (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) + (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { set_opt(sb, ORDERED_DATA); - else + sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA; + } else { set_opt(sb, JOURNAL_DATA); + sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; + } break; case EXT4_MOUNT_ORDERED_DATA: diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 1205261f130c..9ebd26c957c2 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -49,8 +49,7 @@ struct ext4_attr { } u; }; -static ssize_t session_write_kbytes_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) +static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; @@ -61,8 +60,7 @@ static ssize_t session_write_kbytes_show(struct ext4_attr *a, sbi->s_sectors_written_start) >> 1); } -static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) +static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; @@ -74,8 +72,7 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, EXT4_SB(sb)->s_sectors_written_start) >> 1))); } -static ssize_t inode_readahead_blks_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, +static ssize_t inode_readahead_blks_store(struct ext4_sb_info *sbi, const char *buf, size_t count) { unsigned long t; @@ -92,8 +89,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, return count; } -static ssize_t reserved_clusters_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, +static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi, const char *buf, size_t count) { unsigned long long val; @@ -109,8 +105,7 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a, return count; } -static ssize_t trigger_test_error(struct ext4_attr *a, - struct ext4_sb_info *sbi, +static ssize_t trigger_test_error(struct ext4_sb_info *sbi, const char *buf, size_t count) { int len = count; @@ -268,9 +263,9 @@ static ssize_t ext4_attr_show(struct kobject *kobj, (s64) EXT4_C2B(sbi, percpu_counter_sum(&sbi->s_dirtyclusters_counter))); case attr_session_write_kbytes: - return session_write_kbytes_show(a, sbi, buf); + return session_write_kbytes_show(sbi, buf); case attr_lifetime_write_kbytes: - return lifetime_write_kbytes_show(a, sbi, buf); + return lifetime_write_kbytes_show(sbi, buf); case attr_reserved_clusters: return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long) @@ -306,7 +301,7 @@ static ssize_t ext4_attr_store(struct kobject *kobj, switch (a->attr_id) { case attr_reserved_clusters: - return reserved_clusters_store(a, sbi, buf, len); + return reserved_clusters_store(sbi, buf, len); case attr_pointer_ui: if (!ptr) return 0; @@ -316,9 +311,9 @@ static ssize_t ext4_attr_store(struct kobject *kobj, *((unsigned int *) ptr) = t; return len; case attr_inode_readahead: - return inode_readahead_blks_store(a, sbi, buf, len); + return inode_readahead_blks_store(sbi, buf, len); case attr_trigger_test_error: - return trigger_test_error(a, sbi, buf, len); + return trigger_test_error(sbi, buf, len); } return 0; } @@ -330,13 +325,6 @@ static void ext4_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } -static void ext4_kset_release(struct kobject *kobj) -{ - struct kset *kset = container_of(kobj, struct kset, kobj); - - kfree(kset); -} - static const struct sysfs_ops ext4_attr_ops = { .show = ext4_attr_show, .store = ext4_attr_store, @@ -348,19 +336,14 @@ static struct kobj_type ext4_sb_ktype = { .release = ext4_sb_release, }; -static struct kobj_type ext4_ktype = { - .sysfs_ops = &ext4_attr_ops, - .release = ext4_kset_release, -}; - -static struct kset *ext4_kset; - static struct kobj_type ext4_feat_ktype = { .default_attrs = ext4_feat_attrs, .sysfs_ops = &ext4_attr_ops, .release = (void (*)(struct kobject *))kfree, }; +static struct kobject *ext4_root; + static struct kobject *ext4_feat; #define PROC_FILE_SHOW_DEFN(name) \ @@ -398,9 +381,8 @@ int ext4_register_sysfs(struct super_block *sb) const struct ext4_proc_files *p; int err; - sbi->s_kobj.kset = ext4_kset; init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL, + err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root, "%s", sb->s_id); if (err) { kobject_put(&sbi->s_kobj); @@ -436,26 +418,18 @@ int __init ext4_init_sysfs(void) { int ret; - ext4_kset = kzalloc(sizeof(*ext4_kset), GFP_KERNEL); - if (!ext4_kset) + ext4_root = kobject_create_and_add("ext4", fs_kobj); + if (!ext4_root) return -ENOMEM; - kobject_set_name(&ext4_kset->kobj, "ext4"); - ext4_kset->kobj.parent = fs_kobj; - ext4_kset->kobj.ktype = &ext4_ktype; - ret = kset_register(ext4_kset); - if (ret) - goto kset_err; - ext4_feat = kzalloc(sizeof(*ext4_feat), GFP_KERNEL); if (!ext4_feat) { ret = -ENOMEM; - goto kset_err; + goto root_err; } - ext4_feat->kset = ext4_kset; ret = kobject_init_and_add(ext4_feat, &ext4_feat_ktype, - NULL, "features"); + ext4_root, "features"); if (ret) goto feat_err; @@ -464,17 +438,19 @@ int __init ext4_init_sysfs(void) feat_err: kobject_put(ext4_feat); -kset_err: - kset_unregister(ext4_kset); - ext4_kset = NULL; + ext4_feat = NULL; +root_err: + kobject_put(ext4_root); + ext4_root = NULL; return ret; } void ext4_exit_sysfs(void) { kobject_put(ext4_feat); - kset_unregister(ext4_kset); - ext4_kset = NULL; + ext4_feat = NULL; + kobject_put(ext4_root); + ext4_root = NULL; remove_proc_entry(proc_dirname, NULL); ext4_proc_root = NULL; } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 63656dbafdc4..499cb4b1fbd2 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -195,10 +195,13 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end, /* Check the values */ while (!IS_LAST_ENTRY(entry)) { - if (entry->e_value_size != 0 && - entry->e_value_inum == 0) { + u32 size = le32_to_cpu(entry->e_value_size); + + if (size > EXT4_XATTR_SIZE_MAX) + return -EFSCORRUPTED; + + if (size != 0 && entry->e_value_inum == 0) { u16 offs = le16_to_cpu(entry->e_value_offs); - u32 size = le32_to_cpu(entry->e_value_size); void *value; /* @@ -222,25 +225,36 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end, } static inline int -ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) +__ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh, + const char *function, unsigned int line) { - int error; + int error = -EFSCORRUPTED; if (buffer_verified(bh)) return 0; if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) - return -EFSCORRUPTED; + goto errout; + error = -EFSBADCRC; if (!ext4_xattr_block_csum_verify(inode, bh)) - return -EFSBADCRC; + goto errout; error = ext4_xattr_check_entries(BFIRST(bh), bh->b_data + bh->b_size, bh->b_data); - if (!error) +errout: + if (error) + __ext4_error_inode(inode, function, line, 0, + "corrupted xattr block %llu", + (unsigned long long) bh->b_blocknr); + else set_buffer_verified(bh); return error; } +#define ext4_xattr_check_block(inode, bh) \ + __ext4_xattr_check_block((inode), (bh), __func__, __LINE__) + + static int __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, void *end, const char *function, unsigned int line) @@ -262,18 +276,22 @@ errout: __xattr_check_inode((inode), (header), (end), __func__, __LINE__) static int -ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index, - const char *name, int sorted) +xattr_find_entry(struct inode *inode, struct ext4_xattr_entry **pentry, + void *end, int name_index, const char *name, int sorted) { - struct ext4_xattr_entry *entry; + struct ext4_xattr_entry *entry, *next; size_t name_len; int cmp = 1; if (name == NULL) return -EINVAL; name_len = strlen(name); - entry = *pentry; - for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { + for (entry = *pentry; !IS_LAST_ENTRY(entry); entry = next) { + next = EXT4_XATTR_NEXT(entry); + if ((void *) next >= end) { + EXT4_ERROR_INODE(inode, "corrupted xattr entries"); + return -EFSCORRUPTED; + } cmp = name_index - entry->e_name_index; if (!cmp) cmp = name_len - entry->e_name_len; @@ -495,6 +513,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, struct buffer_head *bh = NULL; struct ext4_xattr_entry *entry; size_t size; + void *end; int error; struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); @@ -511,20 +530,20 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); - if (ext4_xattr_check_block(inode, bh)) { - EXT4_ERROR_INODE(inode, "bad block %llu", - EXT4_I(inode)->i_file_acl); - error = -EFSCORRUPTED; + error = ext4_xattr_check_block(inode, bh); + if (error) goto cleanup; - } ext4_xattr_block_cache_insert(ea_block_cache, bh); entry = BFIRST(bh); - error = ext4_xattr_find_entry(&entry, name_index, name, 1); + end = bh->b_data + bh->b_size; + error = xattr_find_entry(inode, &entry, end, name_index, name, 1); if (error) goto cleanup; size = le32_to_cpu(entry->e_value_size); + error = -ERANGE; + if (unlikely(size > EXT4_XATTR_SIZE_MAX)) + goto cleanup; if (buffer) { - error = -ERANGE; if (size > buffer_size) goto cleanup; if (entry->e_value_inum) { @@ -533,8 +552,12 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, if (error) goto cleanup; } else { - memcpy(buffer, bh->b_data + - le16_to_cpu(entry->e_value_offs), size); + u16 offset = le16_to_cpu(entry->e_value_offs); + void *p = bh->b_data + offset; + + if (unlikely(p + size > end)) + goto cleanup; + memcpy(buffer, p, size); } } error = size; @@ -568,12 +591,14 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, if (error) goto cleanup; entry = IFIRST(header); - error = ext4_xattr_find_entry(&entry, name_index, name, 0); + error = xattr_find_entry(inode, &entry, end, name_index, name, 0); if (error) goto cleanup; size = le32_to_cpu(entry->e_value_size); + error = -ERANGE; + if (unlikely(size > EXT4_XATTR_SIZE_MAX)) + goto cleanup; if (buffer) { - error = -ERANGE; if (size > buffer_size) goto cleanup; if (entry->e_value_inum) { @@ -582,8 +607,12 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, if (error) goto cleanup; } else { - memcpy(buffer, (void *)IFIRST(header) + - le16_to_cpu(entry->e_value_offs), size); + u16 offset = le16_to_cpu(entry->e_value_offs); + void *p = (void *)IFIRST(header) + offset; + + if (unlikely(p + size > end)) + goto cleanup; + memcpy(buffer, p, size); } } error = size; @@ -676,12 +705,9 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); - if (ext4_xattr_check_block(inode, bh)) { - EXT4_ERROR_INODE(inode, "bad block %llu", - EXT4_I(inode)->i_file_acl); - error = -EFSCORRUPTED; + error = ext4_xattr_check_block(inode, bh); + if (error) goto cleanup; - } ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh); error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); @@ -808,10 +834,9 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) goto out; } - if (ext4_xattr_check_block(inode, bh)) { - ret = -EFSCORRUPTED; + ret = ext4_xattr_check_block(inode, bh); + if (ret) goto out; - } for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) @@ -1793,19 +1818,16 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, ea_bdebug(bs->bh, "b_count=%d, refcount=%d", atomic_read(&(bs->bh->b_count)), le32_to_cpu(BHDR(bs->bh)->h_refcount)); - if (ext4_xattr_check_block(inode, bs->bh)) { - EXT4_ERROR_INODE(inode, "bad block %llu", - EXT4_I(inode)->i_file_acl); - error = -EFSCORRUPTED; + error = ext4_xattr_check_block(inode, bs->bh); + if (error) goto cleanup; - } /* Find the named attribute. */ bs->s.base = BHDR(bs->bh); bs->s.first = BFIRST(bs->bh); bs->s.end = bs->bh->b_data + bs->bh->b_size; bs->s.here = bs->s.first; - error = ext4_xattr_find_entry(&bs->s.here, i->name_index, - i->name, 1); + error = xattr_find_entry(inode, &bs->s.here, bs->s.end, + i->name_index, i->name, 1); if (error && error != -ENODATA) goto cleanup; bs->s.not_found = error; @@ -2164,8 +2186,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, if (error) return error; /* Find the named attribute. */ - error = ext4_xattr_find_entry(&is->s.here, i->name_index, - i->name, 0); + error = xattr_find_entry(inode, &is->s.here, is->s.end, + i->name_index, i->name, 0); if (error && error != -ENODATA) return error; is->s.not_found = error; @@ -2721,13 +2743,9 @@ retry: error = -EIO; if (!bh) goto cleanup; - if (ext4_xattr_check_block(inode, bh)) { - EXT4_ERROR_INODE(inode, "bad block %llu", - EXT4_I(inode)->i_file_acl); - error = -EFSCORRUPTED; - brelse(bh); + error = ext4_xattr_check_block(inode, bh); + if (error) goto cleanup; - } base = BHDR(bh); end = bh->b_data + bh->b_size; min_offs = end - base; @@ -2884,11 +2902,8 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, goto cleanup; } error = ext4_xattr_check_block(inode, bh); - if (error) { - EXT4_ERROR_INODE(inode, "bad block %llu (error %d)", - EXT4_I(inode)->i_file_acl, error); + if (error) goto cleanup; - } if (ext4_has_feature_ea_inode(inode->i_sb)) { for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index dd54c4f995c8..f39cad2abe2a 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -70,6 +70,17 @@ struct ext4_xattr_entry { EXT4_I(inode)->i_extra_isize)) #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) +/* + * XATTR_SIZE_MAX is currently 64k, but for the purposes of checking + * for file system consistency errors, we use a somewhat bigger value. + * This allows XATTR_SIZE_MAX to grow in the future, but by using this + * instead of INT_MAX for certain consistency checks, we don't need to + * worry about arithmetic overflows. (Actually XATTR_SIZE_MAX is + * defined in include/uapi/linux/limits.h, so changing it is going + * not going to be trivial....) + */ +#define EXT4_XATTR_SIZE_MAX (1 << 24) + /* * The minimum size of EA value when you start storing it in an external inode * size of block - size of header - size of 1 entry - 4 null bytes diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 3fbf48ec2188..dfb057900e79 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -974,7 +974,7 @@ out: } /* - * This is a variaon of __jbd2_update_log_tail which checks for validity of + * This is a variation of __jbd2_update_log_tail which checks for validity of * provided log tail and locks j_checkpoint_mutex. So it is safe against races * with other threads updating log tail. */ @@ -1417,6 +1417,9 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, journal_superblock_t *sb = journal->j_superblock; int ret; + if (is_journal_aborted(journal)) + return -EIO; + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", tail_block, tail_tid); @@ -1483,12 +1486,15 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op) void jbd2_journal_update_sb_errno(journal_t *journal) { journal_superblock_t *sb = journal->j_superblock; + int errcode; read_lock(&journal->j_state_lock); - jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", - journal->j_errno); - sb->s_errno = cpu_to_be32(journal->j_errno); + errcode = journal->j_errno; read_unlock(&journal->j_state_lock); + if (errcode == -ESHUTDOWN) + errcode = 0; + jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode); + sb->s_errno = cpu_to_be32(errcode); jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA); } @@ -2105,12 +2111,22 @@ void __jbd2_journal_abort_hard(journal_t *journal) * but don't do any other IO. */ static void __journal_abort_soft (journal_t *journal, int errno) { - if (journal->j_flags & JBD2_ABORT) - return; + int old_errno; - if (!journal->j_errno) + write_lock(&journal->j_state_lock); + old_errno = journal->j_errno; + if (!journal->j_errno || errno == -ESHUTDOWN) journal->j_errno = errno; + if (journal->j_flags & JBD2_ABORT) { + write_unlock(&journal->j_state_lock); + if (!old_errno && old_errno != -ESHUTDOWN && + errno == -ESHUTDOWN) + jbd2_journal_update_sb_errno(journal); + return; + } + write_unlock(&journal->j_state_lock); + __jbd2_journal_abort_hard(journal); if (errno) { diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index f99910b69c78..a4967b27ffb6 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -600,8 +600,8 @@ static int do_one_pass(journal_t *journal, success = -EFSBADCRC; printk(KERN_ERR "JBD2: Invalid " "checksum recovering " - "block %llu in log\n", - blocknr); + "data block %llu in " + "log\n", blocknr); block_error = 1; goto skip_write; } diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 4d0e3af4e561..0e31eb136c57 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2585,6 +2585,49 @@ DEFINE_GETFSMAP_EVENT(ext4_getfsmap_low_key); DEFINE_GETFSMAP_EVENT(ext4_getfsmap_high_key); DEFINE_GETFSMAP_EVENT(ext4_getfsmap_mapping); +TRACE_EVENT(ext4_shutdown, + TP_PROTO(struct super_block *sb, unsigned long flags), + + TP_ARGS(sb, flags), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( unsigned, flags ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->flags = flags; + ), + + TP_printk("dev %d,%d flags %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->flags) +); + +TRACE_EVENT(ext4_error, + TP_PROTO(struct super_block *sb, const char *function, + unsigned int line), + + TP_ARGS(sb, function, line), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( const char *, function ) + __field( unsigned, line ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->function = function; + __entry->line = line; + ), + + TP_printk("dev %d,%d function %s line %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->function, __entry->line) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */