From 460bcf57b128ce1c0dd553d905fedc097f9955c6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 12 May 2009 07:37:56 -0400
Subject: [PATCH 1/2] Fix nobh_truncate_page() to not pass stack garbage to
 get_block()

The nobh_truncate_page() function is used by ext2, exofs, and jfs.  Of
these three, only ext2 and jfs's get_block() function pays attention
to bh->b_size --- which is normally always the filesystem blocksize
except when the get_block() function is called by either
mpage_readpage(), mpage_readpages(), or the direct I/O routines in
fs/direct_io.c.

Unfortunately, nobh_truncate_page() does not initialize map_bh before
calling the filesystem-supplied get_block() function.  So ext2 and jfs
will try to calculate the number of blocks to map by taking stack
garbage and shifting it left by inode->i_blkbits.  This should be
*mostly* harmless (except the filesystem will do some unnneeded work)
unless the stack garbage is less than filesystem's blocksize, in which
case maxblocks will be zero, and the attempt to find out whether or
not the filesystem has a hole at a given logical block will fail, and
the page cache entry might not get zero'ed out.

Also if the stack garbage in in map_bh->state happens to have the
BH_Mapped bit set, there could be an attempt to call readpage() on a
non-existent page, which could cause nobh_truncate_page() to return an
error when it should not.

Fix this by initializing map_bh->state and map_bh->size.

Fortunately, it's probably fairly unlikely that ext2 and jfs users
mount with nobh these days.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/buffer.c b/fs/buffer.c
index aed297739eb0..49106127a4aa 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2736,6 +2736,8 @@ has_buffers:
 		pos += blocksize;
 	}
 
+	map_bh.b_size = blocksize;
+	map_bh.b_state = 0;
 	err = get_block(inode, iblock, &map_bh, 0);
 	if (err)
 		goto unlock;

From 72a43d63cb51057393edfbcfc4596066205ad15d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 13 May 2009 19:13:40 +0100
Subject: [PATCH 2/2] ext3/4 with synchronous writes gets wedged by Postfix

OK, that's probably the easiest way to do that, as much as I don't like it...
Since iget() et.al. will not accept I_FREEING (will wait to go away
and restart), and since we'd better have serialization between new/free
on fs data structures anyway, we can afford simply skipping I_FREEING
et.al. in insert_inode_locked().

We do that from new_inode, so it won't race with free_inode in any interesting
ways and it won't race with iget (of any origin; nfsd or in case of fs
corruption a lookup) since both still will wait for I_LOCK.

Reviewed-by: "Theodore Ts'o" <tytso@mit.edu>
Acked-by: Jan Kara <jack@suse.cz>
Tested-by: David Watson <dbwatson@ukfsn.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 0571983755dc..a4876e561953 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1053,13 +1053,22 @@ int insert_inode_locked(struct inode *inode)
 	struct super_block *sb = inode->i_sb;
 	ino_t ino = inode->i_ino;
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
-	struct inode *old;
 
 	inode->i_state |= I_LOCK|I_NEW;
 	while (1) {
+		struct hlist_node *node;
+		struct inode *old = NULL;
 		spin_lock(&inode_lock);
-		old = find_inode_fast(sb, head, ino);
-		if (likely(!old)) {
+		hlist_for_each_entry(old, node, head, i_hash) {
+			if (old->i_ino != ino)
+				continue;
+			if (old->i_sb != sb)
+				continue;
+			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+				continue;
+			break;
+		}
+		if (likely(!node)) {
 			hlist_add_head(&inode->i_hash, head);
 			spin_unlock(&inode_lock);
 			return 0;
@@ -1081,14 +1090,24 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 {
 	struct super_block *sb = inode->i_sb;
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
-	struct inode *old;
 
 	inode->i_state |= I_LOCK|I_NEW;
 
 	while (1) {
+		struct hlist_node *node;
+		struct inode *old = NULL;
+
 		spin_lock(&inode_lock);
-		old = find_inode(sb, head, test, data);
-		if (likely(!old)) {
+		hlist_for_each_entry(old, node, head, i_hash) {
+			if (old->i_sb != sb)
+				continue;
+			if (!test(old, data))
+				continue;
+			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+				continue;
+			break;
+		}
+		if (likely(!node)) {
 			hlist_add_head(&inode->i_hash, head);
 			spin_unlock(&inode_lock);
 			return 0;