From 460bcf57b128ce1c0dd553d905fedc097f9955c6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 12 May 2009 07:37:56 -0400 Subject: [PATCH 1/2] Fix nobh_truncate_page() to not pass stack garbage to get_block() The nobh_truncate_page() function is used by ext2, exofs, and jfs. Of these three, only ext2 and jfs's get_block() function pays attention to bh->b_size --- which is normally always the filesystem blocksize except when the get_block() function is called by either mpage_readpage(), mpage_readpages(), or the direct I/O routines in fs/direct_io.c. Unfortunately, nobh_truncate_page() does not initialize map_bh before calling the filesystem-supplied get_block() function. So ext2 and jfs will try to calculate the number of blocks to map by taking stack garbage and shifting it left by inode->i_blkbits. This should be *mostly* harmless (except the filesystem will do some unnneeded work) unless the stack garbage is less than filesystem's blocksize, in which case maxblocks will be zero, and the attempt to find out whether or not the filesystem has a hole at a given logical block will fail, and the page cache entry might not get zero'ed out. Also if the stack garbage in in map_bh->state happens to have the BH_Mapped bit set, there could be an attempt to call readpage() on a non-existent page, which could cause nobh_truncate_page() to return an error when it should not. Fix this by initializing map_bh->state and map_bh->size. Fortunately, it's probably fairly unlikely that ext2 and jfs users mount with nobh these days. Signed-off-by: "Theodore Ts'o" Cc: Dave Kleikamp Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- fs/buffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/buffer.c b/fs/buffer.c index aed297739eb0..49106127a4aa 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2736,6 +2736,8 @@ has_buffers: pos += blocksize; } + map_bh.b_size = blocksize; + map_bh.b_state = 0; err = get_block(inode, iblock, &map_bh, 0); if (err) goto unlock; From 72a43d63cb51057393edfbcfc4596066205ad15d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 13 May 2009 19:13:40 +0100 Subject: [PATCH 2/2] ext3/4 with synchronous writes gets wedged by Postfix OK, that's probably the easiest way to do that, as much as I don't like it... Since iget() et.al. will not accept I_FREEING (will wait to go away and restart), and since we'd better have serialization between new/free on fs data structures anyway, we can afford simply skipping I_FREEING et.al. in insert_inode_locked(). We do that from new_inode, so it won't race with free_inode in any interesting ways and it won't race with iget (of any origin; nfsd or in case of fs corruption a lookup) since both still will wait for I_LOCK. Reviewed-by: "Theodore Ts'o" Acked-by: Jan Kara Tested-by: David Watson Signed-off-by: Al Viro --- fs/inode.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 0571983755dc..a4876e561953 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1053,13 +1053,22 @@ int insert_inode_locked(struct inode *inode) struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; struct hlist_head *head = inode_hashtable + hash(sb, ino); - struct inode *old; inode->i_state |= I_LOCK|I_NEW; while (1) { + struct hlist_node *node; + struct inode *old = NULL; spin_lock(&inode_lock); - old = find_inode_fast(sb, head, ino); - if (likely(!old)) { + hlist_for_each_entry(old, node, head, i_hash) { + if (old->i_ino != ino) + continue; + if (old->i_sb != sb) + continue; + if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) + continue; + break; + } + if (likely(!node)) { hlist_add_head(&inode->i_hash, head); spin_unlock(&inode_lock); return 0; @@ -1081,14 +1090,24 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, { struct super_block *sb = inode->i_sb; struct hlist_head *head = inode_hashtable + hash(sb, hashval); - struct inode *old; inode->i_state |= I_LOCK|I_NEW; while (1) { + struct hlist_node *node; + struct inode *old = NULL; + spin_lock(&inode_lock); - old = find_inode(sb, head, test, data); - if (likely(!old)) { + hlist_for_each_entry(old, node, head, i_hash) { + if (old->i_sb != sb) + continue; + if (!test(old, data)) + continue; + if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) + continue; + break; + } + if (likely(!node)) { hlist_add_head(&inode->i_hash, head); spin_unlock(&inode_lock); return 0;