diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ad14227f509e..455e6e6e5cb9 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -970,7 +970,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock, if (max_blocks > DIO_MAX_BLOCKS) max_blocks = DIO_MAX_BLOCKS; handle = ext3_journal_start(inode, DIO_CREDITS + - 2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb)); + EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; @@ -3146,8 +3146,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) /* (user+group)*(old+new) structure, inode write (sb, * inode block, ? - but truncate inode update has it) */ - handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+ - EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3); + handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ + EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3); if (IS_ERR(handle)) { error = PTR_ERR(handle); goto err_out; @@ -3239,7 +3239,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode) #ifdef CONFIG_QUOTA /* We know that structure was already allocated during vfs_dq_init so * we will be updating only the data blocks + inodes */ - ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb); + ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); #endif return ret; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index aad6400c9b77..7b0e44f7d66f 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1699,7 +1699,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1733,7 +1733,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry, retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1769,7 +1769,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode) retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1920,7 +1920,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode) struct ext3_iloc iloc; int err = 0, rc; - lock_super(sb); + mutex_lock(&EXT3_SB(sb)->s_orphan_lock); if (!list_empty(&EXT3_I(inode)->i_orphan)) goto out_unlock; @@ -1929,9 +1929,13 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode) /* @@@ FIXME: Observation from aviro: * I think I can trigger J_ASSERT in ext3_orphan_add(). We block - * here (on lock_super()), so race with ext3_link() which might bump + * here (on s_orphan_lock), so race with ext3_link() which might bump * ->i_nlink. For, say it, character device. Not a regular file, * not a directory, not a symlink and ->i_nlink > 0. + * + * tytso, 4/25/2009: I'm not sure how that could happen; + * shouldn't the fs core protect us from these sort of + * unlink()/link() races? */ J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); @@ -1968,7 +1972,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode) jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); out_unlock: - unlock_super(sb); + mutex_unlock(&EXT3_SB(sb)->s_orphan_lock); ext3_std_error(inode->i_sb, err); return err; } @@ -1986,11 +1990,9 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode) struct ext3_iloc iloc; int err = 0; - lock_super(inode->i_sb); - if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } + mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock); + if (list_empty(&ei->i_orphan)) + goto out; ino_next = NEXT_ORPHAN(inode); prev = ei->i_orphan.prev; @@ -2040,7 +2042,7 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode) out_err: ext3_std_error(inode->i_sb, err); out: - unlock_super(inode->i_sb); + mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock); return err; out_brelse: @@ -2175,7 +2177,7 @@ static int ext3_symlink (struct inode * dir, retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + - 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 5f83b6179178..54351ac7cef9 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -209,7 +209,7 @@ static int setup_new_group_blocks(struct super_block *sb, if (IS_ERR(handle)) return PTR_ERR(handle); - lock_super(sb); + mutex_lock(&sbi->s_resize_lock); if (input->group != sbi->s_groups_count) { err = -EBUSY; goto exit_journal; @@ -324,7 +324,7 @@ exit_bh: brelse(bh); exit_journal: - unlock_super(sb); + mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext3_journal_stop(handle)) && !err) err = err2; @@ -662,11 +662,12 @@ exit_free: * important part is that the new block and inode counts are in the backup * superblocks, and the location of the new group metadata in the GDT backups. * - * We do not need lock_super() for this, because these blocks are not - * otherwise touched by the filesystem code when it is mounted. We don't - * need to worry about last changing from sbi->s_groups_count, because the - * worst that can happen is that we do not copy the full number of backups - * at this time. The resize which changed s_groups_count will backup again. + * We do not need take the s_resize_lock for this, because these + * blocks are not otherwise touched by the filesystem code when it is + * mounted. We don't need to worry about last changing from + * sbi->s_groups_count, because the worst that can happen is that we + * do not copy the full number of backups at this time. The resize + * which changed s_groups_count will backup again. */ static void update_backups(struct super_block *sb, int blk_off, char *data, int size) @@ -825,7 +826,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) goto exit_put; } - lock_super(sb); + mutex_lock(&sbi->s_resize_lock); if (input->group != sbi->s_groups_count) { ext3_warning(sb, __func__, "multiple resizers run on filesystem!"); @@ -856,7 +857,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) /* * OK, now we've set up the new group. Time to make it active. * - * Current kernels don't lock all allocations via lock_super(), + * We do not lock all allocations via s_resize_lock * so we have to be safe wrt. concurrent accesses the group * data. So we need to be careful to set all of the relevant * group descriptor data etc. *before* we enable the group. @@ -900,12 +901,12 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) * * The precise rules we use are: * - * * Writers of s_groups_count *must* hold lock_super + * * Writers of s_groups_count *must* hold s_resize_lock * AND * * Writers must perform a smp_wmb() after updating all dependent * data and before modifying the groups count * - * * Readers must hold lock_super() over the access + * * Readers must hold s_resize_lock over the access * OR * * Readers must perform an smp_rmb() after reading the groups count * and before reading any dependent data. @@ -936,7 +937,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) ext3_journal_dirty_metadata(handle, sbi->s_sbh); exit_journal: - unlock_super(sb); + mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext3_journal_stop(handle)) && !err) err = err2; if (!err) { @@ -973,7 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, /* We don't need to worry about locking wrt other resizers just * yet: we're going to revalidate es->s_blocks_count after - * taking lock_super() below. */ + * taking the s_resize_lock below. */ o_blocks_count = le32_to_cpu(es->s_blocks_count); o_groups_count = EXT3_SB(sb)->s_groups_count; @@ -1045,11 +1046,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, goto exit_put; } - lock_super(sb); + mutex_lock(&EXT3_SB(sb)->s_resize_lock); if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { ext3_warning(sb, __func__, "multiple resizers run on filesystem!"); - unlock_super(sb); + mutex_unlock(&EXT3_SB(sb)->s_resize_lock); ext3_journal_stop(handle); err = -EBUSY; goto exit_put; @@ -1059,13 +1060,13 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, EXT3_SB(sb)->s_sbh))) { ext3_warning(sb, __func__, "error %d on journal write access", err); - unlock_super(sb); + mutex_unlock(&EXT3_SB(sb)->s_resize_lock); ext3_journal_stop(handle); goto exit_put; } es->s_blocks_count = cpu_to_le32(o_blocks_count + add); ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - unlock_super(sb); + mutex_unlock(&EXT3_SB(sb)->s_resize_lock); ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, o_blocks_count + add); ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 7ad1e8c30bd0..afa2b569da10 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1928,6 +1928,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) sb->dq_op = &ext3_quota_operations; #endif INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ + mutex_init(&sbi->s_orphan_lock); + mutex_init(&sbi->s_resize_lock); sb->s_root = NULL; @@ -2014,14 +2016,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock - * in numerous places. Here we just pop the lock - it's relatively - * harmless, because we are now ready to accept write_super() requests, - * and aviro says that's the only reason for hanging onto the - * superblock lock. - */ + EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; ext3_orphan_cleanup(sb, es); EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; @@ -2403,13 +2398,11 @@ static void ext3_mark_recovery_complete(struct super_block * sb, if (journal_flush(journal) < 0) goto out; - lock_super(sb); if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && sb->s_flags & MS_RDONLY) { EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); ext3_commit_super(sb, es, 1); } - unlock_super(sb); out: journal_unlock_updates(journal); @@ -2601,13 +2594,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) (sbi->s_mount_state & EXT3_VALID_FS)) es->s_state = cpu_to_le16(sbi->s_mount_state); - /* - * We have to unlock super so that we can wait for - * transactions. - */ - unlock_super(sb); ext3_mark_recovery_complete(sb, es); - lock_super(sb); } else { __le32 ret; if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ab31e65d46d0..56f9271ee8cc 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -704,6 +704,10 @@ struct ext4_inode_info { __u16 i_extra_isize; spinlock_t i_block_reservation_lock; +#ifdef CONFIG_QUOTA + /* quota space reservation, managed internally by quota code */ + qsize_t i_reserved_quota; +#endif /* completed async DIOs that might need unwritten extents handling */ struct list_head i_aio_dio_complete_list; @@ -1435,7 +1439,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); -extern qsize_t ext4_get_reserved_space(struct inode *inode); +extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern int flush_aio_dio_completed_IO(struct inode *inode); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5352db1a3086..ab807963a614 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1003,17 +1003,12 @@ out: return err; } -qsize_t ext4_get_reserved_space(struct inode *inode) +#ifdef CONFIG_QUOTA +qsize_t *ext4_get_reserved_space(struct inode *inode) { - unsigned long long total; - - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - total = EXT4_I(inode)->i_reserved_data_blocks + - EXT4_I(inode)->i_reserved_meta_blocks; - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - - return (total << inode->i_blkbits); + return &EXT4_I(inode)->i_reserved_quota; } +#endif /* * Calculate the number of metadata blocks need to reserve * to allocate @blocks for non extent file based file @@ -1051,7 +1046,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, int blocks) static void ext4_da_update_reserve_space(struct inode *inode, int used) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - int total, mdb, mdb_free; + int total, mdb, mdb_free, mdb_claim = 0; spin_lock(&EXT4_I(inode)->i_block_reservation_lock); /* recalculate the number of metablocks still need to be reserved */ @@ -1064,7 +1059,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) if (mdb_free) { /* Account for allocated meta_blocks */ - mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; + mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; + BUG_ON(mdb_free < mdb_claim); + mdb_free -= mdb_claim; /* update fs dirty blocks counter */ percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); @@ -1075,8 +1072,11 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) /* update per-inode reservations */ BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); EXT4_I(inode)->i_reserved_data_blocks -= used; + percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + vfs_dq_claim_block(inode, used + mdb_claim); + /* * free those over-booking quota for metadata blocks */ @@ -1816,19 +1816,17 @@ repeat: md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; total = md_needed + nrblocks; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); /* * Make quota reservation here to prevent quota overflow * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, total)) { - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + if (vfs_dq_reserve_block(inode, total)) return -EDQUOT; - } if (ext4_claim_free_blocks(sbi, total)) { - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); vfs_dq_release_reservation_block(inode, total); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { yield(); @@ -1836,10 +1834,11 @@ repeat: } return -ENOSPC; } + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); EXT4_I(inode)->i_reserved_data_blocks += nrblocks; - EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; - + EXT4_I(inode)->i_reserved_meta_blocks += md_needed; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + return 0; /* success */ } @@ -4794,6 +4793,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); ei->i_disksize = inode->i_size; +#ifdef CONFIG_QUOTA + ei->i_reserved_quota = 0; +#endif inode->i_generation = le32_to_cpu(raw_inode->i_generation); ei->i_block_group = iloc.block_group; ei->i_last_alloc_group = ~0; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b1fd3daadc9c..d34afad3e137 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2755,12 +2755,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) /* release all the reserved blocks if non delalloc */ percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); - else { - percpu_counter_sub(&sbi->s_dirtyblocks_counter, - ac->ac_b_ex.fe_len); - /* convert reserved quota blocks to real quota blocks */ - vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len); - } if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 827bde1f2594..6ed9aa91f27d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -704,6 +704,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_allocated_meta_blocks = 0; ei->i_delalloc_reserved_flag = 0; spin_lock_init(&(ei->i_block_reservation_lock)); +#ifdef CONFIG_QUOTA + ei->i_reserved_quota = 0; +#endif INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); ei->cur_aio_dio = NULL; ei->i_sync_tid = 0; @@ -1014,7 +1017,9 @@ static const struct dquot_operations ext4_quota_operations = { .reserve_space = dquot_reserve_space, .claim_space = dquot_claim_space, .release_rsv = dquot_release_reserved_space, +#ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, +#endif .alloc_inode = dquot_alloc_inode, .free_space = dquot_free_space, .free_inode = dquot_free_inode, diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 4160afad6d00..bd224eec9b07 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1913,7 +1913,7 @@ static void __init jbd_create_debugfs_entry(void) { jbd_debugfs_dir = debugfs_create_dir("jbd", NULL); if (jbd_debugfs_dir) - jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO, + jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO | S_IWUSR, jbd_debugfs_dir, &journal_enable_debug); } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b7ca3a92a4db..17af879e6e9e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2115,7 +2115,8 @@ static void __init jbd2_create_debugfs_entry(void) { jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL); if (jbd2_debugfs_dir) - jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, S_IRUGO, + jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, + S_IRUGO | S_IWUSR, jbd2_debugfs_dir, &jbd2_journal_enable_debug); } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index cd6bb9a33c13..dea86abdf2e7 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -323,6 +323,30 @@ int dquot_mark_dquot_dirty(struct dquot *dquot) } EXPORT_SYMBOL(dquot_mark_dquot_dirty); +/* Dirtify all the dquots - this can block when journalling */ +static inline int mark_all_dquot_dirty(struct dquot * const *dquot) +{ + int ret, err, cnt; + + ret = err = 0; + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (dquot[cnt]) + /* Even in case of error we have to continue */ + ret = mark_dquot_dirty(dquot[cnt]); + if (!err) + err = ret; + } + return err; +} + +static inline void dqput_all(struct dquot **dquot) +{ + unsigned int cnt; + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + dqput(dquot[cnt]); +} + /* This function needs dq_list_lock */ static inline int clear_dquot_dirty(struct dquot *dquot) { @@ -1268,8 +1292,7 @@ int dquot_initialize(struct inode *inode, int type) out_err: up_write(&sb_dqopt(sb)->dqptr_sem); /* Drop unused references */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - dqput(got[cnt]); + dqput_all(got); return ret; } EXPORT_SYMBOL(dquot_initialize); @@ -1288,9 +1311,7 @@ int dquot_drop(struct inode *inode) inode->i_dquot[cnt] = NULL; } up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - dqput(put[cnt]); + dqput_all(put); return 0; } EXPORT_SYMBOL(dquot_drop); @@ -1318,6 +1339,67 @@ void vfs_dq_drop(struct inode *inode) } EXPORT_SYMBOL(vfs_dq_drop); +/* + * inode_reserved_space is managed internally by quota, and protected by + * i_lock similar to i_blocks+i_bytes. + */ +static qsize_t *inode_reserved_space(struct inode * inode) +{ + /* Filesystem must explicitly define it's own method in order to use + * quota reservation interface */ + BUG_ON(!inode->i_sb->dq_op->get_reserved_space); + return inode->i_sb->dq_op->get_reserved_space(inode); +} + +static void inode_add_rsv_space(struct inode *inode, qsize_t number) +{ + spin_lock(&inode->i_lock); + *inode_reserved_space(inode) += number; + spin_unlock(&inode->i_lock); +} + + +static void inode_claim_rsv_space(struct inode *inode, qsize_t number) +{ + spin_lock(&inode->i_lock); + *inode_reserved_space(inode) -= number; + __inode_add_bytes(inode, number); + spin_unlock(&inode->i_lock); +} + +static void inode_sub_rsv_space(struct inode *inode, qsize_t number) +{ + spin_lock(&inode->i_lock); + *inode_reserved_space(inode) -= number; + spin_unlock(&inode->i_lock); +} + +static qsize_t inode_get_rsv_space(struct inode *inode) +{ + qsize_t ret; + spin_lock(&inode->i_lock); + ret = *inode_reserved_space(inode); + spin_unlock(&inode->i_lock); + return ret; +} + +static void inode_incr_space(struct inode *inode, qsize_t number, + int reserve) +{ + if (reserve) + inode_add_rsv_space(inode, number); + else + inode_add_bytes(inode, number); +} + +static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) +{ + if (reserve) + inode_sub_rsv_space(inode, number); + else + inode_sub_bytes(inode, number); +} + /* * Following four functions update i_blocks+i_bytes fields and * quota information (together with appropriate checks) @@ -1336,6 +1418,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int cnt, ret = QUOTA_OK; char warntype[MAXQUOTAS]; + /* + * First test before acquiring mutex - solves deadlocks when we + * re-enter the quota code and are already holding the mutex + */ + if (IS_NOQUOTA(inode)) { + inode_incr_space(inode, number, reserve); + goto out; + } + + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + if (IS_NOQUOTA(inode)) { + inode_incr_space(inode, number, reserve); + goto out_unlock; + } + for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = QUOTA_NL_NOWARN; @@ -1346,7 +1443,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) == NO_QUOTA) { ret = NO_QUOTA; - goto out_unlock; + spin_unlock(&dq_data_lock); + goto out_flush_warn; } } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1357,64 +1455,29 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, else dquot_incr_space(inode->i_dquot[cnt], number); } - if (!reserve) - inode_add_bytes(inode, number); -out_unlock: + inode_incr_space(inode, number, reserve); spin_unlock(&dq_data_lock); + + if (reserve) + goto out_flush_warn; + mark_all_dquot_dirty(inode->i_dquot); +out_flush_warn: flush_warnings(inode->i_dquot, warntype); +out_unlock: + up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); +out: return ret; } int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) { - int cnt, ret = QUOTA_OK; - - /* - * First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex - */ - if (IS_NOQUOTA(inode)) { - inode_add_bytes(inode, number); - goto out; - } - - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - if (IS_NOQUOTA(inode)) { - inode_add_bytes(inode, number); - goto out_unlock; - } - - ret = __dquot_alloc_space(inode, number, warn, 0); - if (ret == NO_QUOTA) - goto out_unlock; - - /* Dirtify all the dquots - this can block when journalling */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (inode->i_dquot[cnt]) - mark_dquot_dirty(inode->i_dquot[cnt]); -out_unlock: - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); -out: - return ret; + return __dquot_alloc_space(inode, number, warn, 0); } EXPORT_SYMBOL(dquot_alloc_space); int dquot_reserve_space(struct inode *inode, qsize_t number, int warn) { - int ret = QUOTA_OK; - - if (IS_NOQUOTA(inode)) - goto out; - - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - if (IS_NOQUOTA(inode)) - goto out_unlock; - - ret = __dquot_alloc_space(inode, number, warn, 1); -out_unlock: - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); -out: - return ret; + return __dquot_alloc_space(inode, number, warn, 1); } EXPORT_SYMBOL(dquot_reserve_space); @@ -1455,10 +1518,7 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number) warn_put_all: spin_unlock(&dq_data_lock); if (ret == QUOTA_OK) - /* Dirtify all the dquots - this can block when journalling */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (inode->i_dquot[cnt]) - mark_dquot_dirty(inode->i_dquot[cnt]); + mark_all_dquot_dirty(inode->i_dquot); flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return ret; @@ -1471,14 +1531,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number) int ret = QUOTA_OK; if (IS_NOQUOTA(inode)) { - inode_add_bytes(inode, number); + inode_claim_rsv_space(inode, number); goto out; } down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); if (IS_NOQUOTA(inode)) { up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - inode_add_bytes(inode, number); + inode_claim_rsv_space(inode, number); goto out; } @@ -1490,51 +1550,19 @@ int dquot_claim_space(struct inode *inode, qsize_t number) number); } /* Update inode bytes */ - inode_add_bytes(inode, number); + inode_claim_rsv_space(inode, number); spin_unlock(&dq_data_lock); - /* Dirtify all the dquots - this can block when journalling */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (inode->i_dquot[cnt]) - mark_dquot_dirty(inode->i_dquot[cnt]); + mark_all_dquot_dirty(inode->i_dquot); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); out: return ret; } EXPORT_SYMBOL(dquot_claim_space); -/* - * Release reserved quota space - */ -void dquot_release_reserved_space(struct inode *inode, qsize_t number) -{ - int cnt; - - if (IS_NOQUOTA(inode)) - goto out; - - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - if (IS_NOQUOTA(inode)) - goto out_unlock; - - spin_lock(&dq_data_lock); - /* Release reserved dquots */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) - dquot_free_reserved_space(inode->i_dquot[cnt], number); - } - spin_unlock(&dq_data_lock); - -out_unlock: - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); -out: - return; -} -EXPORT_SYMBOL(dquot_release_reserved_space); - /* * This operation can block, but only after everything is updated */ -int dquot_free_space(struct inode *inode, qsize_t number) +int __dquot_free_space(struct inode *inode, qsize_t number, int reserve) { unsigned int cnt; char warntype[MAXQUOTAS]; @@ -1543,7 +1571,7 @@ int dquot_free_space(struct inode *inode, qsize_t number) * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) { out_sub: - inode_sub_bytes(inode, number); + inode_decr_space(inode, number, reserve); return QUOTA_OK; } @@ -1558,20 +1586,39 @@ out_sub: if (!inode->i_dquot[cnt]) continue; warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); - dquot_decr_space(inode->i_dquot[cnt], number); + if (reserve) + dquot_free_reserved_space(inode->i_dquot[cnt], number); + else + dquot_decr_space(inode->i_dquot[cnt], number); } - inode_sub_bytes(inode, number); + inode_decr_space(inode, number, reserve); spin_unlock(&dq_data_lock); - /* Dirtify all the dquots - this can block when journalling */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (inode->i_dquot[cnt]) - mark_dquot_dirty(inode->i_dquot[cnt]); + + if (reserve) + goto out_unlock; + mark_all_dquot_dirty(inode->i_dquot); +out_unlock: flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; } + +int dquot_free_space(struct inode *inode, qsize_t number) +{ + return __dquot_free_space(inode, number, 0); +} EXPORT_SYMBOL(dquot_free_space); +/* + * Release reserved quota space + */ +void dquot_release_reserved_space(struct inode *inode, qsize_t number) +{ + __dquot_free_space(inode, number, 1); + +} +EXPORT_SYMBOL(dquot_release_reserved_space); + /* * This operation can block, but only after everything is updated */ @@ -1599,29 +1646,13 @@ int dquot_free_inode(const struct inode *inode, qsize_t number) dquot_decr_inodes(inode->i_dquot[cnt], number); } spin_unlock(&dq_data_lock); - /* Dirtify all the dquots - this can block when journalling */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (inode->i_dquot[cnt]) - mark_dquot_dirty(inode->i_dquot[cnt]); + mark_all_dquot_dirty(inode->i_dquot); flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; } EXPORT_SYMBOL(dquot_free_inode); -/* - * call back function, get reserved quota space from underlying fs - */ -qsize_t dquot_get_reserved_space(struct inode *inode) -{ - qsize_t reserved_space = 0; - - if (sb_any_quota_active(inode->i_sb) && - inode->i_sb->dq_op->get_reserved_space) - reserved_space = inode->i_sb->dq_op->get_reserved_space(inode); - return reserved_space; -} - /* * Transfer the number of inode and blocks from one diskquota to an other. * @@ -1665,7 +1696,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) } spin_lock(&dq_data_lock); cur_space = inode_get_bytes(inode); - rsv_space = dquot_get_reserved_space(inode); + rsv_space = inode_get_rsv_space(inode); space = cur_space + rsv_space; /* Build the transfer_from list and check the limits */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1709,25 +1740,18 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) spin_unlock(&dq_data_lock); up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - /* Dirtify all the dquots - this can block when journalling */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (transfer_from[cnt]) - mark_dquot_dirty(transfer_from[cnt]); - if (transfer_to[cnt]) { - mark_dquot_dirty(transfer_to[cnt]); - /* The reference we got is transferred to the inode */ - transfer_to[cnt] = NULL; - } - } + mark_all_dquot_dirty(transfer_from); + mark_all_dquot_dirty(transfer_to); + /* The reference we got is transferred to the inode */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + transfer_to[cnt] = NULL; warn_put_all: flush_warnings(transfer_to, warntype_to); flush_warnings(transfer_from, warntype_from_inodes); flush_warnings(transfer_from, warntype_from_space); put_all: - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - dqput(transfer_from[cnt]); - dqput(transfer_to[cnt]); - } + dqput_all(transfer_from); + dqput_all(transfer_to); return ret; over_quota: spin_unlock(&dq_data_lock); diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index 3dfc23e02135..e3da02f4986f 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -97,8 +97,11 @@ static int v2_read_file_info(struct super_block *sb, int type) unsigned int version; if (!v2_read_header(sb, type, &dqhead)) - return 0; + return -1; version = le32_to_cpu(dqhead.dqh_version); + if ((info->dqi_fmt_id == QFMT_VFS_V0 && version != 0) || + (info->dqi_fmt_id == QFMT_VFS_V1 && version != 1)) + return -1; size = sb->s_op->quota_read(sb, type, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); @@ -120,8 +123,8 @@ static int v2_read_file_info(struct super_block *sb, int type) info->dqi_maxilimit = 0xffffffff; } else { /* used space is stored as unsigned 64-bit value */ - info->dqi_maxblimit = 0xffffffffffffffff; /* 2^64-1 */ - info->dqi_maxilimit = 0xffffffffffffffff; + info->dqi_maxblimit = 0xffffffffffffffffULL; /* 2^64-1 */ + info->dqi_maxilimit = 0xffffffffffffffffULL; } info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); diff --git a/fs/stat.c b/fs/stat.c index 075694e31d8b..c4ecd52c5737 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -401,9 +401,9 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, } #endif /* __ARCH_WANT_STAT64 */ -void inode_add_bytes(struct inode *inode, loff_t bytes) +/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ +void __inode_add_bytes(struct inode *inode, loff_t bytes) { - spin_lock(&inode->i_lock); inode->i_blocks += bytes >> 9; bytes &= 511; inode->i_bytes += bytes; @@ -411,6 +411,12 @@ void inode_add_bytes(struct inode *inode, loff_t bytes) inode->i_blocks++; inode->i_bytes -= 512; } +} + +void inode_add_bytes(struct inode *inode, loff_t bytes) +{ + spin_lock(&inode->i_lock); + __inode_add_bytes(inode, bytes); spin_unlock(&inode->i_lock); } diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index f07f34de2f0e..258088ab3c6b 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -72,6 +72,8 @@ struct ext3_sb_info { struct inode * s_journal_inode; struct journal_s * s_journal; struct list_head s_orphan; + struct mutex s_orphan_lock; + struct mutex s_resize_lock; unsigned long s_commit_interval; struct block_device *journal_bdev; #ifdef CONFIG_JBD_DEBUG diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h index cf82d519be40..d7b5ddca99c2 100644 --- a/include/linux/ext3_jbd.h +++ b/include/linux/ext3_jbd.h @@ -44,13 +44,13 @@ #define EXT3_DATA_TRANS_BLOCKS(sb) (EXT3_SINGLEDATA_TRANS_BLOCKS + \ EXT3_XATTR_TRANS_BLOCKS - 2 + \ - 2*EXT3_QUOTA_TRANS_BLOCKS(sb)) + EXT3_MAXQUOTAS_TRANS_BLOCKS(sb)) /* Delete operations potentially hit one directory's namespace plus an * entire inode, plus arbitrary amounts of bitmap/indirection data. Be * generous. We can grow the delete transaction later if necessary. */ -#define EXT3_DELETE_TRANS_BLOCKS(sb) (2 * EXT3_DATA_TRANS_BLOCKS(sb) + 64) +#define EXT3_DELETE_TRANS_BLOCKS(sb) (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64) /* Define an arbitrary limit for the amount of data we will anticipate * writing to any given transaction. For unbounded transactions such as @@ -86,6 +86,9 @@ #define EXT3_QUOTA_INIT_BLOCKS(sb) 0 #define EXT3_QUOTA_DEL_BLOCKS(sb) 0 #endif +#define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb)) +#define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb)) +#define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb)) int ext3_mark_iloc_dirty(handle_t *handle, diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e3012e0ac06..9147ca88f253 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2297,6 +2297,7 @@ extern const struct inode_operations page_symlink_inode_operations; extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); loff_t inode_get_bytes(struct inode *inode); diff --git a/include/linux/quota.h b/include/linux/quota.h index e70e62194243..a6861f117480 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -315,8 +315,9 @@ struct dquot_operations { int (*claim_space) (struct inode *, qsize_t); /* release rsved quota for delayed alloc */ void (*release_rsv) (struct inode *, qsize_t); - /* get reserved quota for delayed alloc */ - qsize_t (*get_reserved_space) (struct inode *); + /* get reserved quota for delayed alloc, value returned is managed by + * quota code only */ + qsize_t *(*get_reserved_space) (struct inode *); }; /* Operations handling requests from userspace */