From 4c6ab3ee4cdb86cbd4e9400dd22fad7701cbe795 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 21 Sep 2009 23:21:53 -0700
Subject: [PATCH 01/70] crypto: padlock-sha - Fix stack alignment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PadLock hardware requires the output buffer for SHA to be
128-bit aligned.  We currentply place the buffer on the stack,
and ask gcc to align it to 128 bits.  That doesn't work on i386
because the kernel stack is only aligned to 32 bits.  This patch
changes the code to align the buffer by hand so that the hardware
doesn't fault on unaligned buffers.

Reported-by: Séguier Régis <rguier@e-teleport.net>
Tested-by: Séguier Régis <rguier@e-teleport.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/padlock-sha.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index 76cb6b345e7b..0af80577dc7b 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -24,6 +24,12 @@
 #include <asm/i387.h>
 #include "padlock.h"
 
+#ifdef CONFIG_64BIT
+#define STACK_ALIGN 16
+#else
+#define STACK_ALIGN 4
+#endif
+
 struct padlock_sha_desc {
 	struct shash_desc fallback;
 };
@@ -64,7 +70,9 @@ static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
 	/* We can't store directly to *out as it may be unaligned. */
 	/* BTW Don't reduce the buffer size below 128 Bytes!
 	 *     PadLock microcode needs it that big. */
-	char result[128] __attribute__ ((aligned(PADLOCK_ALIGNMENT)));
+	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
+		((aligned(STACK_ALIGN)));
+	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
 	struct sha1_state state;
 	unsigned int space;
@@ -128,7 +136,9 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
 	/* We can't store directly to *out as it may be unaligned. */
 	/* BTW Don't reduce the buffer size below 128 Bytes!
 	 *     PadLock microcode needs it that big. */
-	char result[128] __attribute__ ((aligned(PADLOCK_ALIGNMENT)));
+	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
+		((aligned(STACK_ALIGN)));
+	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
 	struct sha256_state state;
 	unsigned int space;

From 5f5eeff4c93256ee93435a3bf08cf18c45e9a994 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 12 Oct 2009 21:35:00 -0700
Subject: [PATCH 02/70] Input: synaptics - add another Protege M300 to rate
 blacklist

Apparently some of Toshiba Protege M300 identify themselves as
"Portable PC" in DMI so we need to add that to the DMI table as
well. We need DMI data so we can automatically lower Synaptics
reporting rate from 80 to 40 pps to avoid over-taxing their
keyboard controllers.

Tested-by: Rod Davison <roddavison@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/mouse/synaptics.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index b66ff1ac7dea..f4a61252bcc9 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -652,6 +652,16 @@ static const struct dmi_system_id toshiba_dmi_table[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE M300"),
 		},
+
+	},
+	{
+		.ident = "Toshiba Portege M300",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Portable PC"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Version 1.0"),
+		},
+
 	},
 	{ }
 };

From 7dcc9c230fcef0067efee17c1f8c3c84494a7ec8 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 12 Oct 2009 21:34:25 -0700
Subject: [PATCH 03/70] Input: i8042 - make pnp_data_busted variable boolean
 instead of int

This small snippet escaped last round of int -> bool conversion.

Reported-by: Joe Perches <joe@perches.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/i8042-x86ia64io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index a39bc4eb902b..77ff205c8577 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -661,7 +661,7 @@ static void i8042_pnp_exit(void)
 static int __init i8042_pnp_init(void)
 {
 	char kbd_irq_str[4] = { 0 }, aux_irq_str[4] = { 0 };
-	int pnp_data_busted = false;
+	bool pnp_data_busted = false;
 	int err;
 
 #ifdef CONFIG_X86

From fc0eb28c0031ec2da872dd296b551453eb1963c9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 12 Oct 2009 22:47:25 -0700
Subject: [PATCH 04/70] Input: atkbd - restore resetting LED state at startup

Fix breakage caused by commit 9605fb48e1998935a5ee70c965f90ad1ac023add
While the input core indeed takes care of restoring led state and
typematic settings upon resume the driver still need to initialize
them properly when registering a new device

Reported-and-tested-by: Marin Mitov <mitov@issp.bas.bg>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/atkbd.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 904d4c9fbf22..73b530424729 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -770,6 +770,30 @@ static int atkbd_select_set(struct atkbd *atkbd, int target_set, int allow_extra
 	return 3;
 }
 
+static int atkbd_reset_state(struct atkbd *atkbd)
+{
+        struct ps2dev *ps2dev = &atkbd->ps2dev;
+	unsigned char param[1];
+
+/*
+ * Set the LEDs to a predefined state (all off).
+ */
+
+	param[0] = 0;
+	if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS))
+		return -1;
+
+/*
+ * Set autorepeat to fastest possible.
+ */
+
+	param[0] = 0;
+	if (ps2_command(ps2dev, param, ATKBD_CMD_SETREP))
+		return -1;
+
+	return 0;
+}
+
 static int atkbd_activate(struct atkbd *atkbd)
 {
 	struct ps2dev *ps2dev = &atkbd->ps2dev;
@@ -1087,6 +1111,7 @@ static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
 		}
 
 		atkbd->set = atkbd_select_set(atkbd, atkbd_set, atkbd_extra);
+		atkbd_reset_state(atkbd);
 		atkbd_activate(atkbd);
 
 	} else {
@@ -1267,6 +1292,7 @@ static ssize_t atkbd_set_extra(struct atkbd *atkbd, const char *buf, size_t coun
 
 		atkbd->dev = new_dev;
 		atkbd->set = atkbd_select_set(atkbd, atkbd->set, value);
+		atkbd_reset_state(atkbd);
 		atkbd_activate(atkbd);
 		atkbd_set_keycode_table(atkbd);
 		atkbd_set_device_attrs(atkbd);

From 94dfb0d6334a281a979fe5bee187a3698a4dc176 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 13 Oct 2009 23:39:17 -0700
Subject: [PATCH 05/70] Input: atkbd - postpone restoring LED/repeat rate at
 resume

We need to postpone restoring LED state and typematic settings until
keyboard is finished reconnecting upon resume. Normally driver core
and PM infrastructure takes care of proper ordering and dependencies,
but or case actual reconnect is done asynchronously from kseriod.
So while driver core thinks that keyboard was resumed and it is time
to let input core run it's resume handlers in reality keyboard is not
ready yet. The solution is to keep rescheduling work that adjusts LED
and rate settings until keyboard is fully enabled.

Reported-by: Carlos R. Mafra <crmafra2@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/atkbd.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 73b530424729..de520386f13f 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -574,11 +574,22 @@ static void atkbd_event_work(struct work_struct *work)
 
 	mutex_lock(&atkbd->event_mutex);
 
-	if (test_and_clear_bit(ATKBD_LED_EVENT_BIT, &atkbd->event_mask))
-		atkbd_set_leds(atkbd);
+	if (!atkbd->enabled) {
+		/*
+		 * Serio ports are resumed asynchronously so while driver core
+		 * thinks that device is already fully operational in reality
+		 * it may not be ready yet. In this case we need to keep
+		 * rescheduling till reconnect completes.
+		 */
+		schedule_delayed_work(&atkbd->event_work,
+					msecs_to_jiffies(100));
+	} else {
+		if (test_and_clear_bit(ATKBD_LED_EVENT_BIT, &atkbd->event_mask))
+			atkbd_set_leds(atkbd);
 
-	if (test_and_clear_bit(ATKBD_REP_EVENT_BIT, &atkbd->event_mask))
-		atkbd_set_repeat_rate(atkbd);
+		if (test_and_clear_bit(ATKBD_REP_EVENT_BIT, &atkbd->event_mask))
+			atkbd_set_repeat_rate(atkbd);
+	}
 
 	mutex_unlock(&atkbd->event_mutex);
 }

From 1572ca2a842a839b78780d9074d2f140b31907cc Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 13 Oct 2009 23:37:30 -0700
Subject: [PATCH 06/70] Input: fix locking issue in /proc/bus/input/ handlers

input_devices_seq_start() uses mutex_lock_interruptible() to acquire
the input_mutex, but doesn't properly handle the situation when the
call fails (for example due to interrupt). Instead of returning NULL
(which indicates that there is no more data) we should return
ERR_PTR()-encoded error.

We also need explicit flag indicating whether input_mutex was acquired
since input_devices_seq_stop() is called whether input_devices_seq_start()
was successful or not.

The same applies to input_handlers_seq_start().

Reported-by: iceberg <strakh@ispras.ru>
Reviewed-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 65 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 17 deletions(-)

diff --git a/drivers/input/input.c b/drivers/input/input.c
index b8ed4294fccd..60a4eaabb7d7 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -781,10 +781,29 @@ static unsigned int input_proc_devices_poll(struct file *file, poll_table *wait)
 	return 0;
 }
 
+union input_seq_state {
+	struct {
+		unsigned short pos;
+		bool mutex_acquired;
+	};
+	void *p;
+};
+
 static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	if (mutex_lock_interruptible(&input_mutex))
-		return NULL;
+	union input_seq_state *state = (union input_seq_state *)&seq->private;
+	int error;
+
+	/* We need to fit into seq->private pointer */
+	BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));
+
+	error = mutex_lock_interruptible(&input_mutex);
+	if (error) {
+		state->mutex_acquired = false;
+		return ERR_PTR(error);
+	}
+
+	state->mutex_acquired = true;
 
 	return seq_list_start(&input_dev_list, *pos);
 }
@@ -794,9 +813,12 @@ static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	return seq_list_next(v, &input_dev_list, pos);
 }
 
-static void input_devices_seq_stop(struct seq_file *seq, void *v)
+static void input_seq_stop(struct seq_file *seq, void *v)
 {
-	mutex_unlock(&input_mutex);
+	union input_seq_state *state = (union input_seq_state *)&seq->private;
+
+	if (state->mutex_acquired)
+		mutex_unlock(&input_mutex);
 }
 
 static void input_seq_print_bitmap(struct seq_file *seq, const char *name,
@@ -860,7 +882,7 @@ static int input_devices_seq_show(struct seq_file *seq, void *v)
 static const struct seq_operations input_devices_seq_ops = {
 	.start	= input_devices_seq_start,
 	.next	= input_devices_seq_next,
-	.stop	= input_devices_seq_stop,
+	.stop	= input_seq_stop,
 	.show	= input_devices_seq_show,
 };
 
@@ -880,40 +902,49 @@ static const struct file_operations input_devices_fileops = {
 
 static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	if (mutex_lock_interruptible(&input_mutex))
-		return NULL;
+	union input_seq_state *state = (union input_seq_state *)&seq->private;
+	int error;
+
+	/* We need to fit into seq->private pointer */
+	BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));
+
+	error = mutex_lock_interruptible(&input_mutex);
+	if (error) {
+		state->mutex_acquired = false;
+		return ERR_PTR(error);
+	}
+
+	state->mutex_acquired = true;
+	state->pos = *pos;
 
-	seq->private = (void *)(unsigned long)*pos;
 	return seq_list_start(&input_handler_list, *pos);
 }
 
 static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	seq->private = (void *)(unsigned long)(*pos + 1);
-	return seq_list_next(v, &input_handler_list, pos);
-}
+	union input_seq_state *state = (union input_seq_state *)&seq->private;
 
-static void input_handlers_seq_stop(struct seq_file *seq, void *v)
-{
-	mutex_unlock(&input_mutex);
+	state->pos = *pos + 1;
+	return seq_list_next(v, &input_handler_list, pos);
 }
 
 static int input_handlers_seq_show(struct seq_file *seq, void *v)
 {
 	struct input_handler *handler = container_of(v, struct input_handler, node);
+	union input_seq_state *state = (union input_seq_state *)&seq->private;
 
-	seq_printf(seq, "N: Number=%ld Name=%s",
-		   (unsigned long)seq->private, handler->name);
+	seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name);
 	if (handler->fops)
 		seq_printf(seq, " Minor=%d", handler->minor);
 	seq_putc(seq, '\n');
 
 	return 0;
 }
+
 static const struct seq_operations input_handlers_seq_ops = {
 	.start	= input_handlers_seq_start,
 	.next	= input_handlers_seq_next,
-	.stop	= input_handlers_seq_stop,
+	.stop	= input_seq_stop,
 	.show	= input_handlers_seq_show,
 };
 

From 73d540f282c0d8ce48fafd7fcc844e91f31d4103 Mon Sep 17 00:00:00 2001
From: Sathya Perla <sathyap@serverengines.com>
Date: Wed, 14 Oct 2009 20:20:42 +0000
Subject: [PATCH 07/70] be2net: fix promiscuous and multicast promiscuous modes
 being enabled always

Signed-off-by: Sathya Perla <sathyap@serverengines.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/benet/be_cmds.c |  8 ++++----
 drivers/net/benet/be_cmds.h |  5 +++--
 drivers/net/benet/be_main.c | 19 +++++++++++--------
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c
index 89876ade5e33..b57abc0c9f48 100644
--- a/drivers/net/benet/be_cmds.c
+++ b/drivers/net/benet/be_cmds.c
@@ -729,8 +729,8 @@ int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
 /* Create an rx filtering policy configuration on an i/f
  * Uses mbox
  */
-int be_cmd_if_create(struct be_adapter *adapter, u32 flags, u8 *mac,
-		bool pmac_invalid, u32 *if_handle, u32 *pmac_id)
+int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags,
+		u8 *mac, bool pmac_invalid, u32 *if_handle, u32 *pmac_id)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_if_create *req;
@@ -746,8 +746,8 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 flags, u8 *mac,
 	be_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
 		OPCODE_COMMON_NTWK_INTERFACE_CREATE, sizeof(*req));
 
-	req->capability_flags = cpu_to_le32(flags);
-	req->enable_flags = cpu_to_le32(flags);
+	req->capability_flags = cpu_to_le32(cap_flags);
+	req->enable_flags = cpu_to_le32(en_flags);
 	req->pmac_invalid = pmac_invalid;
 	if (!pmac_invalid)
 		memcpy(req->mac_addr, mac, ETH_ALEN);
diff --git a/drivers/net/benet/be_cmds.h b/drivers/net/benet/be_cmds.h
index a86f917f85f4..49953787e41c 100644
--- a/drivers/net/benet/be_cmds.h
+++ b/drivers/net/benet/be_cmds.h
@@ -720,8 +720,9 @@ extern int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
 extern int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr,
 			u32 if_id, u32 *pmac_id);
 extern int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, u32 pmac_id);
-extern int be_cmd_if_create(struct be_adapter *adapter, u32 if_flags, u8 *mac,
-			bool pmac_invalid, u32 *if_handle, u32 *pmac_id);
+extern int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags,
+			u32 en_flags, u8 *mac, bool pmac_invalid,
+			u32 *if_handle, u32 *pmac_id);
 extern int be_cmd_if_destroy(struct be_adapter *adapter, u32 if_handle);
 extern int be_cmd_eq_create(struct be_adapter *adapter,
 			struct be_queue_info *eq, int eq_delay);
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 6d5e81f7046f..36cb94869b01 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1620,19 +1620,22 @@ static int be_open(struct net_device *netdev)
 static int be_setup(struct be_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	u32 if_flags;
+	u32 cap_flags, en_flags;
 	int status;
 
-	if_flags = BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_PROMISCUOUS |
-		BE_IF_FLAGS_MCAST_PROMISCUOUS | BE_IF_FLAGS_UNTAGGED |
-		BE_IF_FLAGS_PASS_L3L4_ERRORS;
-	status = be_cmd_if_create(adapter, if_flags, netdev->dev_addr,
-			false/* pmac_invalid */, &adapter->if_handle,
-			&adapter->pmac_id);
+	cap_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
+			BE_IF_FLAGS_MCAST_PROMISCUOUS |
+			BE_IF_FLAGS_PROMISCUOUS |
+			BE_IF_FLAGS_PASS_L3L4_ERRORS;
+	en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
+			BE_IF_FLAGS_PASS_L3L4_ERRORS;
+
+	status = be_cmd_if_create(adapter, cap_flags, en_flags,
+			netdev->dev_addr, false/* pmac_invalid */,
+			&adapter->if_handle, &adapter->pmac_id);
 	if (status != 0)
 		goto do_none;
 
-
 	status = be_tx_queues_create(adapter);
 	if (status != 0)
 		goto if_destroy;

From 43a04fdc369ce4fb6718b95e1c930ff8661e65c1 Mon Sep 17 00:00:00 2001
From: Sathya Perla <sathyap@serverengines.com>
Date: Wed, 14 Oct 2009 20:21:17 +0000
Subject: [PATCH 08/70] be2net: fix support for PCI hot plug

Before issuing any cmds to the FW, the driver must first wait
till the fW becomes ready. This is needed for PCI hot plug when
the driver can be probed while the card fw is being initialized.

Signed-off-by: Sathya Perla <sathyap@serverengines.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/benet/be_cmds.c | 25 ++++++++++++++++++-------
 drivers/net/benet/be_main.c |  8 ++++----
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c
index b57abc0c9f48..28a0eda92680 100644
--- a/drivers/net/benet/be_cmds.c
+++ b/drivers/net/benet/be_cmds.c
@@ -243,15 +243,26 @@ static int be_POST_stage_get(struct be_adapter *adapter, u16 *stage)
 
 int be_cmd_POST(struct be_adapter *adapter)
 {
-	u16 stage, error;
+	u16 stage;
+	int status, timeout = 0;
 
-	error = be_POST_stage_get(adapter, &stage);
-	if (error || stage != POST_STAGE_ARMFW_RDY) {
-		dev_err(&adapter->pdev->dev, "POST failed.\n");
-		return -1;
-	}
+	do {
+		status = be_POST_stage_get(adapter, &stage);
+		if (status) {
+			dev_err(&adapter->pdev->dev, "POST error; stage=0x%x\n",
+				stage);
+			return -1;
+		} else if (stage != POST_STAGE_ARMFW_RDY) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(2 * HZ);
+			timeout += 2;
+		} else {
+			return 0;
+		}
+	} while (timeout < 20);
 
-	return 0;
+	dev_err(&adapter->pdev->dev, "POST timeout; stage=0x%x\n", stage);
+	return -1;
 }
 
 static inline void *embedded_payload(struct be_mcc_wrb *wrb)
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 36cb94869b01..1f941f027718 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -2058,6 +2058,10 @@ static int be_hw_up(struct be_adapter *adapter)
 	if (status)
 		return status;
 
+	status = be_cmd_reset_function(adapter);
+	if (status)
+		return status;
+
 	status = be_cmd_get_fw_ver(adapter, adapter->fw_ver);
 	if (status)
 		return status;
@@ -2111,10 +2115,6 @@ static int __devinit be_probe(struct pci_dev *pdev,
 	if (status)
 		goto free_netdev;
 
-	status = be_cmd_reset_function(adapter);
-	if (status)
-		goto ctrl_clean;
-
 	status = be_stats_init(adapter);
 	if (status)
 		goto ctrl_clean;

From ed79bab847d8e5a2986d8ff43c49c6fb8ee3265f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 14 Oct 2009 14:36:43 +0000
Subject: [PATCH 09/70] virtio_net: use dev_kfree_skb_any() in
 free_old_xmit_skbs()

Because netpoll can call netdevice start_xmit() method with
irqs disabled, drivers should not call kfree_skb() from
their start_xmit(), but use dev_kfree_skb_any() instead.

Oct  8 11:16:52 172.30.1.31 [113074.791813] ------------[ cut here ]------------
Oct  8 11:16:52 172.30.1.31 [113074.791813] WARNING: at net/core/skbuff.c:398 \
                skb_release_head_state+0x64/0xc8()
Oct  8 11:16:52 172.30.1.31 [113074.791813] Hardware name:
Oct  8 11:16:52 172.30.1.31 [113074.791813] Modules linked in: netconsole ocfs2 jbd2 quota_tree \
ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs crc32c drbd cn loop \
serio_raw psmouse snd_pcm snd_timer snd soundcore snd_page_alloc virtio_net pcspkr parport_pc parport \
i2c_piix4 i2c_core button processor evdev ext3 jbd mbcache dm_mirror dm_region_hash dm_log dm_snapshot \
dm_mod ide_cd_mod cdrom ata_generic ata_piix virtio_blk libata scsi_mod piix ide_pci_generic ide_core \
                virtio_pci virtio_ring virtio floppy thermal fan thermal_sys [last unloaded: netconsole]
Oct  8 11:16:52 172.30.1.31 [113074.791813] Pid: 11132, comm: php5-cgi Tainted: G        W  \
                2.6.31.2-vserver #1
Oct  8 11:16:52 172.30.1.31 [113074.791813] Call Trace:
Oct  8 11:16:52 172.30.1.31 [113074.791813] <IRQ>  [<ffffffff81253cd5>] ? \
                skb_release_head_state+0x64/0xc8
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81253cd5>] ? skb_release_head_state+0x64/0xc8
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81049ae1>] ? warn_slowpath_common+0x77/0xa3
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81253cd5>] ? skb_release_head_state+0x64/0xc8
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81253a1a>] ? __kfree_skb+0x9/0x7d
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffffa01cb139>] ? free_old_xmit_skbs+0x51/0x6e \
                [virtio_net]
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffffa01cbc85>] ? start_xmit+0x26/0xf2 [virtio_net]
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff8126934f>] ? netpoll_send_skb+0xd2/0x205
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffffa0429216>] ? write_msg+0x90/0xeb [netconsole]
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81049f06>] ? __call_console_drivers+0x5e/0x6f
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff8102b49d>] ? kvm_clock_read+0x4d/0x52
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff8104a082>] ? release_console_sem+0x115/0x1ba
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff8104a632>] ? vprintk+0x2f2/0x34b
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff8106b142>] ? vx_update_load+0x18/0x13e
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81308309>] ? printk+0x4e/0x5d
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff8102b49d>] ? kvm_clock_read+0x4d/0x52
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81070b62>] ? getnstimeofday+0x55/0xaf
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81062683>] ? ktime_get_ts+0x21/0x49
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff810626b7>] ? ktime_get+0xc/0x41
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81062788>] ? hrtimer_interrupt+0x9c/0x146
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81024a4b>] ? smp_apic_timer_interrupt+0x80/0x93
Oct  8 11:16:52 172.30.1.31 [113074.791813] [<ffffffff81011663>] ? apic_timer_interrupt+0x13/0x20
Oct  8 11:16:52 172.30.1.31 [113074.791813] <EOI>  [<ffffffff8130a9eb>] ? _spin_unlock_irq+0xd/0x31

Reported-and-tested-by: Massimo Cetra <mcetra@navynet.it>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Bug-Entry: http://bugzilla.kernel.org/show_bug.cgi?id=14378
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8d009760277c..54bf0912b737 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -454,7 +454,7 @@ static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
 		vi->dev->stats.tx_bytes += skb->len;
 		vi->dev->stats.tx_packets++;
 		tot_sgs += skb_vnet_hdr(skb)->num_sg;
-		kfree_skb(skb);
+		dev_kfree_skb_any(skb);
 	}
 	return tot_sgs;
 }

From 7d948b1114c7eded14e5a31f440af751d70ecde0 Mon Sep 17 00:00:00 2001
From: "JosephChan@via.com.tw" <JosephChan@via.com.tw>
Date: Fri, 16 Oct 2009 15:45:23 +0800
Subject: [PATCH 10/70] pata_via: extend the rev_max for VT6330

Fix the VT6330 issue, it's because the rev_max of VT6330 exceeds 0x2f.
The VT6415 and VT6330 share the same device ID.

Signed-off-by: Joseph Chan <josephchan@via.com.tw>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/pata_via.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 45657cacec43..88984b803d6d 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -111,7 +111,7 @@ static const struct via_isa_bridge {
 	{ "vt8251",	PCI_DEVICE_ID_VIA_8251,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
 	{ "cx700",	PCI_DEVICE_ID_VIA_CX700,    0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA },
 	{ "vt6410",	PCI_DEVICE_ID_VIA_6410,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES },
-	{ "vt6415",	PCI_DEVICE_ID_VIA_6415,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES },
+	{ "vt6415",	PCI_DEVICE_ID_VIA_6415,     0x00, 0xff, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES },
 	{ "vt8237a",	PCI_DEVICE_ID_VIA_8237A,    0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
 	{ "vt8237",	PCI_DEVICE_ID_VIA_8237,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
 	{ "vt8235",	PCI_DEVICE_ID_VIA_8235,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },

From 726206f84c67303cc004aacfd45d37f9277a29f6 Mon Sep 17 00:00:00 2001
From: peer chen <peerchen@gmail.com>
Date: Thu, 15 Oct 2009 16:34:56 +0800
Subject: [PATCH 11/70] ahci: Add the AHCI controller Linux Device ID for
 NVIDIA chipsets.

Add the generic device ID for NVIDIA AHCI controller.

Signed-off-by: Peer Chen <peerchen@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index b1a257746a19..8c5c0dd4f6fe 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -605,6 +605,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(NVIDIA, 0x0559), board_ahci_yesncq },	/* MCP67 */
 	{ PCI_VDEVICE(NVIDIA, 0x055a), board_ahci_yesncq },	/* MCP67 */
 	{ PCI_VDEVICE(NVIDIA, 0x055b), board_ahci_yesncq },	/* MCP67 */
+	{ PCI_VDEVICE(NVIDIA, 0x0580), board_ahci_yesncq },	/* Linux ID */
 	{ PCI_VDEVICE(NVIDIA, 0x07f0), board_ahci_yesncq },	/* MCP73 */
 	{ PCI_VDEVICE(NVIDIA, 0x07f1), board_ahci_yesncq },	/* MCP73 */
 	{ PCI_VDEVICE(NVIDIA, 0x07f2), board_ahci_yesncq },	/* MCP73 */

From 5deab536654f95345ea11e8ec6ed5c778df348b5 Mon Sep 17 00:00:00 2001
From: Shane Huang <shane.huang@amd.com>
Date: Tue, 13 Oct 2009 11:14:00 +0800
Subject: [PATCH 12/70] ahci / atiixp / pci quirks: rename AMD SB900 into
 Hudson-2

This patch renames the code name SB900 into Hudson-2

Signed-off-by: Shane Huang <shane.huang@amd.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c        | 2 +-
 drivers/ata/pata_atiixp.c | 2 +-
 drivers/ide/atiixp.c      | 2 +-
 drivers/pci/quirks.c      | 6 +++---
 include/linux/pci_ids.h   | 6 ++----
 5 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 8c5c0dd4f6fe..a06f5d6375a8 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -575,7 +575,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(ATI, 0x4395), board_ahci_sb700 }, /* ATI SB700/800 */
 
 	/* AMD */
-	{ PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD SB900 */
+	{ PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */
 	/* AMD is using RAID class only for ahci controllers */
 	{ PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
 	  PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci },
diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index aa4b3f6ae771..ae4454d4e955 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -246,7 +246,7 @@ static const struct pci_device_id atiixp[] = {
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP400_IDE), },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP600_IDE), },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP700_IDE), },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_SB900_IDE), },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_HUDSON2_IDE), },
 
 	{ },
 };
diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c
index 6396c3ad3252..837322b10a4c 100644
--- a/drivers/ide/atiixp.c
+++ b/drivers/ide/atiixp.c
@@ -177,7 +177,7 @@ static const struct pci_device_id atiixp_pci_tbl[] = {
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP400_IDE), 0 },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP600_IDE), 1 },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP700_IDE), 0 },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_SB900_IDE), 0 },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_HUDSON2_IDE), 0 },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, atiixp_pci_tbl);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index a790b1771f9f..245d2cdb4765 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1009,7 +1009,7 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82454NX,
 
 static void __devinit quirk_amd_ide_mode(struct pci_dev *pdev)
 {
-	/* set SBX00 SATA in IDE mode to AHCI mode */
+	/* set SBX00/Hudson-2 SATA in IDE mode to AHCI mode */
 	u8 tmp;
 
 	pci_read_config_byte(pdev, PCI_CLASS_DEVICE, &tmp);
@@ -1028,8 +1028,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_SB900_SATA_IDE, quirk_amd_ide_mode);
-DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_SB900_SATA_IDE, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE, quirk_amd_ide_mode);
 
 /*
  *	Serverworks CSB5 IDE does not fully support native mode
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f490e7a7307a..86257a412732 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -379,9 +379,6 @@
 #define PCI_DEVICE_ID_ATI_IXP600_IDE	0x438c
 #define PCI_DEVICE_ID_ATI_IXP700_SATA	0x4390
 #define PCI_DEVICE_ID_ATI_IXP700_IDE	0x439c
-/* AMD SB Chipset */
-#define PCI_DEVICE_ID_AMD_SB900_IDE	 0x780c
-#define PCI_DEVICE_ID_AMD_SB900_SATA_IDE 0x7800
 
 #define PCI_VENDOR_ID_VLSI		0x1004
 #define PCI_DEVICE_ID_VLSI_82C592	0x0005
@@ -553,9 +550,10 @@
 #define PCI_DEVICE_ID_AMD_CS5536_UDC    0x2096
 #define PCI_DEVICE_ID_AMD_CS5536_UOC    0x2097
 #define PCI_DEVICE_ID_AMD_CS5536_IDE    0x209A
-
 #define PCI_DEVICE_ID_AMD_LX_VIDEO  0x2081
 #define PCI_DEVICE_ID_AMD_LX_AES    0x2082
+#define PCI_DEVICE_ID_AMD_HUDSON2_IDE		0x780c
+#define PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE	0x7800
 
 #define PCI_VENDOR_ID_TRIDENT		0x1023
 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX	0x2000

From 6489e3262e6b188a1a009b65e8a94b7aa17645b7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Oct 2009 11:18:28 +0900
Subject: [PATCH 13/70] sata_nv: make sure link is brough up online when
 skipping hardreset

prereset doesn't bring link online if hardreset is about to happen and
nv_hardreset() may skip if conditions are not right so softreset may
be entered with non-working link status if the system firmware didn't
bring it up before entering OS code which can happen during resume.
This patch makes nv_hardreset() to bring up the link if it's skipping
reset.

This bug was reported by frodone@gmail.com in the following bug entry.

  http://bugzilla.kernel.org/show_bug.cgi?id=14329

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: frodone@gmail.com
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/sata_nv.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 86a40582999c..1eb4e020eb5c 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -1594,9 +1594,21 @@ static int nv_hardreset(struct ata_link *link, unsigned int *class,
 	    !ata_dev_enabled(link->device))
 		sata_link_hardreset(link, sata_deb_timing_hotplug, deadline,
 				    NULL, NULL);
-	else if (!(ehc->i.flags & ATA_EHI_QUIET))
-		ata_link_printk(link, KERN_INFO,
-				"nv: skipping hardreset on occupied port\n");
+	else {
+		const unsigned long *timing = sata_ehc_deb_timing(ehc);
+		int rc;
+
+		if (!(ehc->i.flags & ATA_EHI_QUIET))
+			ata_link_printk(link, KERN_INFO, "nv: skipping "
+					"hardreset on occupied port\n");
+
+		/* make sure the link is online */
+		rc = sata_link_resume(link, timing, deadline);
+		/* whine about phy resume failure but proceed */
+		if (rc && rc != -EOPNOTSUPP)
+			ata_link_printk(link, KERN_WARNING, "failed to resume "
+					"link (errno=%d)\n", rc);
+	}
 
 	/* device signature acquisition is unreliable */
 	return -EAGAIN;

From 4f7c2874995ac48a4622755b8bd159eb2fb6d8f4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 15 Oct 2009 23:37:32 +0900
Subject: [PATCH 14/70] libata: fix PMP initialization

Commit 842faa6c1a1d6faddf3377948e5cf214812c6c90 fixed error handling
during attach by not committing detected device class to dev->class
while attaching a new device.  However, this change missed the PMP
class check in the configuration loop causing a new PMP device to go
through ata_dev_configure() as if it were an ATA or ATAPI device.

As PMP device doesn't have a regular IDENTIFY data, this makes
ata_dev_configure() tries to configure a PMP device using an invalid
data.  For the most part, it wasn't too harmful and went unnoticed but
this ends up clearing dev->flags which may have ATA_DFLAG_AN set by
sata_pmp_attach().  This means that SATA_PMP_FEAT_NOTIFY ends up being
disabled on PMPs and on PMPs which honor the flag breaks hotplug
support.

This problem was discovered and reported by Ethan Hsiao.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Ethan Hsiao <ethanhsiao@jmicron.com>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-eh.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 0a97822da211..bba2ae5df1c2 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2981,12 +2981,14 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
 	 * device detection messages backwards.
 	 */
 	ata_for_each_dev(dev, link, ALL) {
-		if (!(new_mask & (1 << dev->devno)) ||
-		    dev->class == ATA_DEV_PMP)
+		if (!(new_mask & (1 << dev->devno)))
 			continue;
 
 		dev->class = ehc->classes[dev->devno];
 
+		if (dev->class == ATA_DEV_PMP)
+			continue;
+
 		ehc->i.flags |= ATA_EHI_PRINTINFO;
 		rc = ata_dev_configure(dev);
 		ehc->i.flags &= ~ATA_EHI_PRINTINFO;

From f4b31db92d163df8a639f5a8c8633bdeb6e8432d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 Oct 2009 13:00:51 +0900
Subject: [PATCH 15/70] libata: fix internal command failure handling

When an internal command fails, it should be failed directly without
invoking EH.  In the original implemetation, this was accomplished by
letting internal command bypass failure handling in ata_qc_complete().
However, later changes added post-successful-completion handling to
that code path and the success path is no longer adequate as internal
command failure path.  One of the visible problems is that internal
command failure due to timeout or other freeze conditions would
spuriously trigger WARN_ON_ONCE() in the success path.

This patch updates failure path such that internal command failure
handling is contained there.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index b525a0981348..d7f0f1b1ae3e 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5028,12 +5028,14 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
 			qc->flags |= ATA_QCFLAG_FAILED;
 
 		if (unlikely(qc->flags & ATA_QCFLAG_FAILED)) {
-			if (!ata_tag_internal(qc->tag)) {
-				/* always fill result TF for failed qc */
-				fill_result_tf(qc);
+			/* always fill result TF for failed qc */
+			fill_result_tf(qc);
+
+			if (!ata_tag_internal(qc->tag))
 				ata_qc_schedule_eh(qc);
-				return;
-			}
+			else
+				__ata_qc_complete(qc);
+			return;
 		}
 
 		WARN_ON_ONCE(ap->pflags & ATA_PFLAG_FROZEN);

From 6d4f950e9ea15816c6a4f266ce6b9e438346771e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Tue, 6 Oct 2009 16:07:51 +0100
Subject: [PATCH 16/70] pata_sc1200: Fix crash on boot

The SC1200 needs a NULL terminator or it may cause a crash on boot.

Bug #14227

Also correct a bogus comment as the driver had serializing added so can run
dual port.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/pata_sc1200.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/ata/pata_sc1200.c b/drivers/ata/pata_sc1200.c
index f49814d6fd2e..3bbed8322ecf 100644
--- a/drivers/ata/pata_sc1200.c
+++ b/drivers/ata/pata_sc1200.c
@@ -235,8 +235,7 @@ static int sc1200_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 		.udma_mask = ATA_UDMA2,
 		.port_ops = &sc1200_port_ops
 	};
-	/* Can't enable port 2 yet, see top comments */
-	const struct ata_port_info *ppi[] = { &info, };
+	const struct ata_port_info *ppi[] = { &info, NULL };
 
 	return ata_pci_sff_init_one(dev, ppi, &sc1200_sht, NULL);
 }

From 159a7ff7a13f9a02c75006f40c0561a3a81aefcd Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@google.com>
Date: Mon, 12 Oct 2009 15:44:00 -0700
Subject: [PATCH 17/70] sata_mv: Prevent PIO commands to be defered too long if
 traffic in progress.

Use excl_link when non NCQ commands are defered, to be sure they are processed
as soon as outstanding commands are completed. It prevents some commands to be
defered indifinitely when using a port multiplier.

Signed-off-by: Gwendal Grignou <gwendal@google.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/sata_mv.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 17f9ff9067a2..6f5093b7c8c5 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -1382,6 +1382,25 @@ static int mv_qc_defer(struct ata_queued_cmd *qc)
 	 */
 	if (pp->pp_flags & MV_PP_FLAG_DELAYED_EH)
 		return ATA_DEFER_PORT;
+
+	/* PIO commands need exclusive link: no other commands [DMA or PIO]
+	 * can run concurrently.
+	 * set excl_link when we want to send a PIO command in DMA mode
+	 * or a non-NCQ command in NCQ mode.
+	 * When we receive a command from that link, and there are no
+	 * outstanding commands, mark a flag to clear excl_link and let
+	 * the command go through.
+	 */
+	if (unlikely(ap->excl_link)) {
+		if (link == ap->excl_link) {
+			if (ap->nr_active_links)
+				return ATA_DEFER_PORT;
+			qc->flags |= ATA_QCFLAG_CLEAR_EXCL;
+			return 0;
+		} else
+			return ATA_DEFER_PORT;
+	}
+
 	/*
 	 * If the port is completely idle, then allow the new qc.
 	 */
@@ -1395,8 +1414,14 @@ static int mv_qc_defer(struct ata_queued_cmd *qc)
 	 * doesn't allow it.
 	 */
 	if ((pp->pp_flags & MV_PP_FLAG_EDMA_EN) &&
-	    (pp->pp_flags & MV_PP_FLAG_NCQ_EN) && ata_is_ncq(qc->tf.protocol))
-		return 0;
+	    (pp->pp_flags & MV_PP_FLAG_NCQ_EN)) {
+		if (ata_is_ncq(qc->tf.protocol))
+			return 0;
+		else {
+			ap->excl_link = link;
+			return ATA_DEFER_PORT;
+		}
+	}
 
 	return ATA_DEFER_PORT;
 }

From ace1546487a0fe4634e3251067f8a32cb2cdc099 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 8 Oct 2009 10:55:03 -0300
Subject: [PATCH 18/70] KVM: use proper hrtimer function to retrieve expiration
 time

hrtimer->base can be temporarily NULL due to racing hrtimer_start.
See switch_hrtimer_base/lock_hrtimer_base.

Use hrtimer_get_remaining which is robust against it.

CC: stable@kernel.org
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8254.c | 2 +-
 arch/x86/kvm/lapic.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 82ad523b4901..144e7f60b5e2 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -116,7 +116,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
 	 * itself with the initial count and continues counting
 	 * from there.
 	 */
-	remaining = hrtimer_expires_remaining(&ps->pit_timer.timer);
+	remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
 	elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
 	elapsed = mod_64(elapsed, ps->pit_timer.period);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 7024224f0fc8..23c217692ea9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -521,7 +521,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
 	if (apic_get_reg(apic, APIC_TMICT) == 0)
 		return 0;
 
-	remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer);
+	remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
 	if (ktime_to_ns(remaining) < 0)
 		remaining = ktime_set(0, 0);
 

From 8a8365c560b8b631e0a2d1ac032fbca66a9645bc Mon Sep 17 00:00:00 2001
From: Frederik Deweerdt <frederik.deweerdt@xprog.eu>
Date: Fri, 9 Oct 2009 11:42:56 +0000
Subject: [PATCH 19/70] KVM: MMU: fix pointer cast
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On a 32 bits compile, commit 3da0dd433dc399a8c0124d0614d82a09b6a49bce
introduced the following warnings:

arch/x86/kvm/mmu.c: In function ‘kvm_set_pte_rmapp’:
arch/x86/kvm/mmu.c:770: warning: cast to pointer from integer of different size
arch/x86/kvm/mmu.c: In function ‘kvm_set_spte_hva’:
arch/x86/kvm/mmu.c:849: warning: cast from pointer to integer of different size

The following patch uses 'unsigned long' instead of u64 to match the
pointer size on both arches.

Signed-off-by: Frederik Deweerdt <frederik.deweerdt@xprog.eu>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 685a4ffac8e6..818b92ad82cf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -748,7 +748,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 	return write_protected;
 }
 
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
+			   unsigned long data)
 {
 	u64 *spte;
 	int need_tlb_flush = 0;
@@ -763,7 +764,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
 	return need_tlb_flush;
 }
 
-static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
+static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
+			     unsigned long data)
 {
 	int need_flush = 0;
 	u64 *spte, new_spte;
@@ -799,9 +801,10 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
 	return 0;
 }
 
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data,
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+			  unsigned long data,
 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
-					 u64 data))
+					 unsigned long data))
 {
 	int i, j;
 	int retval = 0;
@@ -846,10 +849,11 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
-	kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp);
+	kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
 }
 
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+			 unsigned long data)
 {
 	u64 *spte;
 	int young = 0;

From 0ea4ed8e948c30f88c824c973ee4b9529015fe65 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Wed, 14 Oct 2009 16:21:00 -0700
Subject: [PATCH 20/70] KVM: Prevent kvm_init from corrupting debugfs
 structures

I'm seeing an oops condition when kvm-intel and kvm-amd are modprobe'd
during boot (say on an Intel system) and then rmmod'd:

   # modprobe kvm-intel
     kvm_init()
     kvm_init_debug()
     kvm_arch_init()  <-- stores debugfs dentries internally
     (success, etc)

   # modprobe kvm-amd
     kvm_init()
     kvm_init_debug() <-- second initialization clobbers kvm's
                          internal pointers to dentries
     kvm_arch_init()
     kvm_exit_debug() <-- and frees them

   # rmmod kvm-intel
     kvm_exit()
     kvm_exit_debug() <-- double free of debugfs files!

     *BOOM*

If execution gets to the end of kvm_init(), then the calling module has been
established as the kvm provider.  Move the debugfs initialization to the end of
the function, and remove the now-unnecessary call to kvm_exit_debug() from the
error path.  That way we avoid trampling on the debugfs entries and freeing
them twice.

Cc: stable@kernel.org
Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 virt/kvm/kvm_main.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b7c78a403dc2..7495ce347344 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2717,8 +2717,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 	int r;
 	int cpu;
 
-	kvm_init_debug();
-
 	r = kvm_arch_init(opaque);
 	if (r)
 		goto out_fail;
@@ -2785,6 +2783,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 	kvm_preempt_ops.sched_in = kvm_sched_in;
 	kvm_preempt_ops.sched_out = kvm_sched_out;
 
+	kvm_init_debug();
+
 	return 0;
 
 out_free:
@@ -2807,7 +2807,6 @@ out_free_0:
 out:
 	kvm_arch_exit();
 out_fail:
-	kvm_exit_debug();
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_init);
@@ -2815,6 +2814,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
 void kvm_exit(void)
 {
 	tracepoint_synchronize_unregister();
+	kvm_exit_debug();
 	misc_deregister(&kvm_dev);
 	kmem_cache_destroy(kvm_vcpu_cache);
 	sysdev_unregister(&kvm_sysdev);
@@ -2824,7 +2824,6 @@ void kvm_exit(void)
 	on_each_cpu(hardware_disable, NULL, 1);
 	kvm_arch_hardware_unsetup();
 	kvm_arch_exit();
-	kvm_exit_debug();
 	free_cpumask_var(cpus_hardware_enabled);
 	__free_page(bad_page);
 }

From 4997811e3b9e4d6f37380701894f063c62f14929 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 12 Oct 2009 17:23:03 +0200
Subject: [PATCH 21/70] amd64_edac: fix DRAM base and limit extraction masks,
 v2

This is a proper fix as a follow-up to 66216a7 and 916d11b.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 drivers/edac/amd64_edac.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 4f4ac82382f7..d4560d9d5a83 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1122,7 +1122,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
 		debugf0("Reading K8_DRAM_BASE_LOW failed\n");
 
 	/* Extract parts into separate data entries */
-	pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 24;
+	pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8;
 	pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7;
 	pvt->dram_rw_en[dram] = (low & 0x3);
 
@@ -1135,7 +1135,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
 	 * Extract parts into separate data entries. Limit is the HIGHEST memory
 	 * location of the region, so lower 24 bits need to be all ones
 	 */
-	pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 24) | 0x00FFFFFF;
+	pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 8) | 0x00FFFFFF;
 	pvt->dram_IntlvSel[dram] = (low >> 8) & 0x7;
 	pvt->dram_DstNode[dram] = (low & 0x7);
 }
@@ -1369,7 +1369,7 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
 	pvt->dram_IntlvEn[dram] = (low_base >> 8) & 0x7;
 
 	pvt->dram_base[dram] = (((u64)high_base & 0x000000FF) << 40) |
-			       (((u64)low_base  & 0xFFFF0000) << 24);
+			       (((u64)low_base  & 0xFFFF0000) << 8);
 
 	low_offset = K8_DRAM_LIMIT_LOW + (dram << 3);
 	high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3);
@@ -1391,7 +1391,7 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
 	 * memory location of the region, so low 24 bits need to be all ones.
 	 */
 	pvt->dram_limit[dram] = (((u64)high_limit & 0x000000FF) << 40) |
-				(((u64) low_limit & 0xFFFF0000) << 24) |
+				(((u64) low_limit & 0xFFFF0000) << 8) |
 				0x00FFFFFF;
 }
 

From 6d45d93ead319423099b82a4efd775bc0f159121 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 16 Oct 2009 23:18:14 +0100
Subject: [PATCH 22/70] dm snapshot: sort by chunk size to fix race

Avoid a race causing corruption when snapshots of the same origin have
different chunk sizes by sorting the internal list of snapshots by chunk
size, largest first.
  https://bugzilla.redhat.com/show_bug.cgi?id=182659

For example, let's have two snapshots with different chunk sizes. The
first snapshot (1) has small chunk size and the second snapshot (2) has
large chunk size.  Let's have chunks A, B, C in these snapshots:
snapshot1: ====A====   ====B====
snapshot2: ==========C==========

(Chunk size is a power of 2. Chunks are aligned.)

A write to the origin at a position within A and C comes along. It
triggers reallocation of A, then reallocation of C and links them
together using A as the 'primary' exception.

Then another write to the origin comes along at a position within B and
C.  It creates pending exception for B.  C already has a reallocation in
progress and it already has a primary exception (A), so nothing is done
to it: B and C are not linked.

If the reallocation of B finishes before the reallocation of C, because
there is no link with the pending exception for C it does not know to
wait for it and, the second write is dispatched to the origin and causes
data corruption in the chunk C in snapshot2.

To avoid this situation, we maintain snapshots sorted in descending
order of chunk size.  This leads to a guaranteed ordering on the links
between the pending exceptions and avoids the problem explained above -
both A and B now get linked to C.

Cc: stable@kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 57f1bf7f3b7a..3a53a5a9bec8 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -296,6 +296,7 @@ static void __insert_origin(struct origin *o)
  */
 static int register_snapshot(struct dm_snapshot *snap)
 {
+	struct dm_snapshot *l;
 	struct origin *o, *new_o;
 	struct block_device *bdev = snap->origin->bdev;
 
@@ -319,7 +320,11 @@ static int register_snapshot(struct dm_snapshot *snap)
 		__insert_origin(o);
 	}
 
-	list_add_tail(&snap->list, &o->snapshots);
+	/* Sort the list according to chunk size, largest-first smallest-last */
+	list_for_each_entry(l, &o->snapshots, list)
+		if (l->store->chunk_size < snap->store->chunk_size)
+			break;
+	list_add_tail(&snap->list, &l->list);
 
 	up_write(&_origins_lock);
 	return 0;

From 034a186d29dbcef099e57ab23ec39440596be911 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Fri, 16 Oct 2009 23:18:14 +0100
Subject: [PATCH 23/70] dm snapshot: free exception store on init failure

While initializing the snapshot module, if we fail to register
the snapshot target then we must back-out the exception store
module initialization.

Cc: stable@kernel.org
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Reviewed-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 3a53a5a9bec8..53f4063f7ea4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1470,7 +1470,7 @@ static int __init dm_snapshot_init(void)
 	r = dm_register_target(&snapshot_target);
 	if (r) {
 		DMERR("snapshot target register failed %d", r);
-		return r;
+		goto bad_register_snapshot_target;
 	}
 
 	r = dm_register_target(&origin_target);
@@ -1527,6 +1527,9 @@ bad2:
 	dm_unregister_target(&origin_target);
 bad1:
 	dm_unregister_target(&snapshot_target);
+
+bad_register_snapshot_target:
+	dm_exception_store_exit();
 	return r;
 }
 

From bca915aae803cf01fde4461fc9c093cf5a86d7fc Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 16 Oct 2009 23:18:15 +0100
Subject: [PATCH 24/70] dm log: userspace fix incorrect luid cast in
 userspace_ctr

mips:

drivers/md/dm-log-userspace-base.c: In function `userspace_ctr':
drivers/md/dm-log-userspace-base.c:159: warning: cast from pointer to integer of different size

Cc: stable@kernel.org
Cc: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-log-userspace-base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index 652bd33109e3..7ac2c1450d10 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -156,7 +156,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 	}
 
 	/* The ptr value is sufficient for local unique id */
-	lc->luid = (uint64_t)lc;
+	lc->luid = (unsigned long)lc;
 
 	lc->ti = ti;
 

From 03022c54b9725026c0370a810168975c387ad04c Mon Sep 17 00:00:00 2001
From: Zdenek Kabelac <zkabelac@redhat.com>
Date: Fri, 16 Oct 2009 23:18:15 +0100
Subject: [PATCH 25/70] dm: add missing del_gendisk to alloc_dev error path

Add missing del_gendisk() to error path when creation of workqueue fails.
Otherwice there is a resource leak and following warning is shown:

WARNING: at fs/sysfs/dir.c:487 sysfs_add_one+0xc5/0x160()
sysfs: cannot create duplicate filename '/devices/virtual/block/dm-0'

Cc: stable@kernel.org
Signed-off-by: Zdenek Kabelac <zkabelac@redhat.com>
Reviewed-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 376f1ab48a24..c5f9918dab24 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1822,6 +1822,7 @@ static struct mapped_device *alloc_dev(int minor)
 bad_bdev:
 	destroy_workqueue(md->wq);
 bad_thread:
+	del_gendisk(md->disk);
 	put_disk(md->disk);
 bad_disk:
 	blk_cleanup_queue(md->queue);

From f88fb981183e71daf40bbd84bc8251bbf7b59e19 Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Fri, 16 Oct 2009 23:18:15 +0100
Subject: [PATCH 26/70] dm: dec_pending needs locking to save error value

Multiple instances of dec_pending() can run concurrently so a lock is
needed when it saves the first error code.

I have never experienced actual problem without locking and just found
this during code inspection while implementing the barrier support
patch for request-based dm.

This patch adds the locking.
I've done compile, boot and basic I/O testings.

Cc: stable@kernel.org
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c5f9918dab24..724efc63904d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -47,6 +47,7 @@ struct dm_io {
 	atomic_t io_count;
 	struct bio *bio;
 	unsigned long start_time;
+	spinlock_t endio_lock;
 };
 
 /*
@@ -578,8 +579,12 @@ static void dec_pending(struct dm_io *io, int error)
 	struct mapped_device *md = io->md;
 
 	/* Push-back supersedes any I/O errors */
-	if (error && !(io->error > 0 && __noflush_suspending(md)))
-		io->error = error;
+	if (unlikely(error)) {
+		spin_lock_irqsave(&io->endio_lock, flags);
+		if (!(io->error > 0 && __noflush_suspending(md)))
+			io->error = error;
+		spin_unlock_irqrestore(&io->endio_lock, flags);
+	}
 
 	if (atomic_dec_and_test(&io->io_count)) {
 		if (io->error == DM_ENDIO_REQUEUE) {
@@ -1226,6 +1231,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 	atomic_set(&ci.io->io_count, 1);
 	ci.io->bio = bio;
 	ci.io->md = md;
+	spin_lock_init(&ci.io->endio_lock);
 	ci.sector = bio->bi_sector;
 	ci.sector_count = bio_sectors(bio);
 	if (unlikely(bio_empty_barrier(bio)))

From 3f2412dc85260e5aae7ebb03bf50d5b1407e3083 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 16 Oct 2009 23:18:16 +0100
Subject: [PATCH 27/70] dm snapshot: require non zero chunk size by end of ctr

If we are creating snapshot with memory-stored exception store, fail if
the user didn't specify chunk size. Zero chunk size would probably crash
a lot of places in the rest of snapshot code.

Cc: stable@kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Jonathan Brassow <jbrassow@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 53f4063f7ea4..9cb392b3e920 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -673,6 +673,11 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	bio_list_init(&s->queued_bios);
 	INIT_WORK(&s->queued_bios_work, flush_queued_bios);
 
+	if (!s->store->chunk_size) {
+		ti->error = "Chunk size not set";
+		goto bad_load_and_register;
+	}
+
 	/* Add snapshot to the list of snapshots for this origin */
 	/* Exceptions aren't triggered till snapshot_resume() is called */
 	if (register_snapshot(s)) {

From 0e8c4e4e3ebb15756ddc4170a88149a2cd323cfe Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 16 Oct 2009 23:18:16 +0100
Subject: [PATCH 28/70] dm exception store: fix failed set_chunk_size error
 path

Properly close the device if failing because of an invalid chunk size.

Cc: stable@kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-exception-store.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 556acff3952f..e5de7627c52d 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -251,7 +251,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
 
 	r = set_chunk_size(tmp_store, argv[2], &ti->error);
 	if (r)
-		goto bad_cow;
+		goto bad_ctr;
 
 	r = type->ctr(tmp_store, 0, NULL);
 	if (r) {

From 4c6fff445d7aa753957856278d4d93bcad6e2c14 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 16 Oct 2009 23:18:16 +0100
Subject: [PATCH 29/70] dm snapshot: lock snapshot while supplying status

This patch locks the snapshot when returning status.  It fixes a race
when it could return an invalid number of free chunks if someone
was simultaneously modifying it.

Cc: stable@kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 9cb392b3e920..9bc814aa2bbd 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1152,6 +1152,8 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 	unsigned sz = 0;
 	struct dm_snapshot *snap = ti->private;
 
+	down_write(&snap->lock);
+
 	switch (type) {
 	case STATUSTYPE_INFO:
 		if (!snap->valid)
@@ -1183,6 +1185,8 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 		break;
 	}
 
+	up_write(&snap->lock);
+
 	return 0;
 }
 

From df96eee679ba28c98cf722fa7c9f4286ee1ed0bd Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 16 Oct 2009 23:18:17 +0100
Subject: [PATCH 30/70] dm snapshot: use unsigned integer chunk size

Use unsigned integer chunk size.

Maximum chunk size is 512kB, there won't ever be need to use 4GB chunk size,
so the number can be 32-bit. This fixes compiler failure on 32-bit systems
with large block devices.

Cc: stable@kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-exception-store.c | 20 +++++++++++---------
 drivers/md/dm-exception-store.h |  8 ++++----
 drivers/md/dm-snap-persistent.c | 16 ++++++++--------
 drivers/md/dm-snap.c            |  4 ++--
 4 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index e5de7627c52d..932d1b123143 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -155,7 +155,8 @@ static int set_chunk_size(struct dm_exception_store *store,
 	char *value;
 
 	chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
-	if (*chunk_size_arg == '\0' || *value != '\0') {
+	if (*chunk_size_arg == '\0' || *value != '\0' ||
+	    chunk_size_ulong > UINT_MAX) {
 		*error = "Invalid chunk size";
 		return -EINVAL;
 	}
@@ -171,34 +172,35 @@ static int set_chunk_size(struct dm_exception_store *store,
 	 */
 	chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
 
-	return dm_exception_store_set_chunk_size(store, chunk_size_ulong,
+	return dm_exception_store_set_chunk_size(store,
+						 (unsigned) chunk_size_ulong,
 						 error);
 }
 
 int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
-				      unsigned long chunk_size_ulong,
+				      unsigned chunk_size,
 				      char **error)
 {
 	/* Check chunk_size is a power of 2 */
-	if (!is_power_of_2(chunk_size_ulong)) {
+	if (!is_power_of_2(chunk_size)) {
 		*error = "Chunk size is not a power of 2";
 		return -EINVAL;
 	}
 
 	/* Validate the chunk size against the device block size */
-	if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) {
+	if (chunk_size % (bdev_logical_block_size(store->cow->bdev) >> 9)) {
 		*error = "Chunk size is not a multiple of device blocksize";
 		return -EINVAL;
 	}
 
-	if (chunk_size_ulong > INT_MAX >> SECTOR_SHIFT) {
+	if (chunk_size > INT_MAX >> SECTOR_SHIFT) {
 		*error = "Chunk size is too high";
 		return -EINVAL;
 	}
 
-	store->chunk_size = chunk_size_ulong;
-	store->chunk_mask = chunk_size_ulong - 1;
-	store->chunk_shift = ffs(chunk_size_ulong) - 1;
+	store->chunk_size = chunk_size;
+	store->chunk_mask = chunk_size - 1;
+	store->chunk_shift = ffs(chunk_size) - 1;
 
 	return 0;
 }
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 812c71872ba0..8a223a48802c 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -101,9 +101,9 @@ struct dm_exception_store {
 	struct dm_dev *cow;
 
 	/* Size of data blocks saved - must be a power of 2 */
-	chunk_t chunk_size;
-	chunk_t chunk_mask;
-	chunk_t chunk_shift;
+	unsigned chunk_size;
+	unsigned chunk_mask;
+	unsigned chunk_shift;
 
 	void *context;
 };
@@ -169,7 +169,7 @@ int dm_exception_store_type_register(struct dm_exception_store_type *type);
 int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
 
 int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
-				      unsigned long chunk_size_ulong,
+				      unsigned chunk_size,
 				      char **error);
 
 int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index d5b2e08750d5..0c746420c008 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -284,12 +284,13 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 {
 	int r;
 	struct disk_header *dh;
-	chunk_t chunk_size;
+	unsigned chunk_size;
 	int chunk_size_supplied = 1;
 	char *chunk_err;
 
 	/*
-	 * Use default chunk size (or hardsect_size, if larger) if none supplied
+	 * Use default chunk size (or logical_block_size, if larger)
+	 * if none supplied
 	 */
 	if (!ps->store->chunk_size) {
 		ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
@@ -334,10 +335,9 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 		return 0;
 
 	if (chunk_size_supplied)
-		DMWARN("chunk size %llu in device metadata overrides "
-		       "table chunk size of %llu.",
-		       (unsigned long long)chunk_size,
-		       (unsigned long long)ps->store->chunk_size);
+		DMWARN("chunk size %u in device metadata overrides "
+		       "table chunk size of %u.",
+		       chunk_size, ps->store->chunk_size);
 
 	/* We had a bogus chunk_size. Fix stuff up. */
 	free_area(ps);
@@ -345,8 +345,8 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 	r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
 					      &chunk_err);
 	if (r) {
-		DMERR("invalid on-disk chunk size %llu: %s.",
-		      (unsigned long long)chunk_size, chunk_err);
+		DMERR("invalid on-disk chunk size %u: %s.",
+		      chunk_size, chunk_err);
 		return r;
 	}
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 9bc814aa2bbd..3a3ba46e6d4b 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -961,7 +961,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
 
 	src.bdev = bdev;
 	src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
-	src.count = min(s->store->chunk_size, dev_size - src.sector);
+	src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
 
 	dest.bdev = s->store->cow->bdev;
 	dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
@@ -1402,7 +1402,7 @@ static void origin_resume(struct dm_target *ti)
 	struct dm_dev *dev = ti->private;
 	struct dm_snapshot *snap;
 	struct origin *o;
-	chunk_t chunk_size = 0;
+	unsigned chunk_size = 0;
 
 	down_read(&_origins_lock);
 	o = __lookup_origin(dev->bdev);

From c1cc65caa19bb8a1b2e371000ef2719581db1691 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 16 Oct 2009 23:18:22 +0100
Subject: [PATCH 31/70] dm snapshot: allow chunk size to be less than page size

Allow the snapshot chunk size to be smaller than the page size
The code is now capable of handling this due to some previous
fixes and enhancements.

As the page size varies between computers, prior to this patch,
the chunk size of a snapshot dictated which machines could read it:
Snapshots created on one machine might not be readable on another.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-exception-store.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 932d1b123143..7dbe652efb5a 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -138,16 +138,6 @@ int dm_exception_store_type_unregister(struct dm_exception_store_type *type)
 }
 EXPORT_SYMBOL(dm_exception_store_type_unregister);
 
-/*
- * Round a number up to the nearest 'size' boundary.  size must
- * be a power of 2.
- */
-static ulong round_up(ulong n, ulong size)
-{
-	size--;
-	return (n + size) & ~size;
-}
-
 static int set_chunk_size(struct dm_exception_store *store,
 			  const char *chunk_size_arg, char **error)
 {
@@ -166,12 +156,6 @@ static int set_chunk_size(struct dm_exception_store *store,
 		return 0;
 	}
 
-	/*
-	 * Chunk size must be multiple of page size.  Silently
-	 * round up if it's not.
-	 */
-	chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
-
 	return dm_exception_store_set_chunk_size(store,
 						 (unsigned) chunk_size_ulong,
 						 error);

From f6965582ac9b87d875aac8e23afdb03fe35ee33d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 16 Oct 2009 17:54:34 -0700
Subject: [PATCH 32/70] vmxnet3: use dev_dbg, fix build for CONFIG_BLOCK=n

vmxnet3 was using dprintk() for debugging output.  This was
defined in <linux/dst.h> and was the only thing that was
used from that header file.  This caused compile errors
when CONFIG_BLOCK was not enabled due to bio* and BIO*
uses in the header file, so change this driver to use
dev_dbg() for debugging output.

include/linux/dst.h:520: error: dereferencing pointer to incomplete type
include/linux/dst.h:520: error: 'BIO_POOL_BITS' undeclared (first use in this function)
include/linux/dst.h:521: error: dereferencing pointer to incomplete type
include/linux/dst.h:522: error: dereferencing pointer to incomplete type
include/linux/dst.h:525: error: dereferencing pointer to incomplete type
make[4]: *** [drivers/net/vmxnet3/vmxnet3_drv.o] Error 1

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Bhavesh Davda <bhavesh@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_drv.c | 27 ++++++++++++++++++---------
 drivers/net/vmxnet3/vmxnet3_int.h |  2 +-
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 6a16f76f277e..004353a46af0 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -481,7 +481,8 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
 	}
 	rq->uncommitted[ring_idx] += num_allocated;
 
-	dprintk(KERN_ERR "alloc_rx_buf: %d allocated, next2fill %u, next2comp "
+	dev_dbg(&adapter->netdev->dev,
+		"alloc_rx_buf: %d allocated, next2fill %u, next2comp "
 		"%u, uncommited %u\n", num_allocated, ring->next2fill,
 		ring->next2comp, rq->uncommitted[ring_idx]);
 
@@ -539,7 +540,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 		tbi = tq->buf_info + tq->tx_ring.next2fill;
 		tbi->map_type = VMXNET3_MAP_NONE;
 
-		dprintk(KERN_ERR "txd[%u]: 0x%Lx 0x%x 0x%x\n",
+		dev_dbg(&adapter->netdev->dev,
+			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
 			tq->tx_ring.next2fill, ctx->sop_txd->txd.addr,
 			ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
 		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
@@ -572,7 +574,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 		gdesc->dword[2] = dw2 | buf_size;
 		gdesc->dword[3] = 0;
 
-		dprintk(KERN_ERR "txd[%u]: 0x%Lx 0x%x 0x%x\n",
+		dev_dbg(&adapter->netdev->dev,
+			"txd[%u]: 0x%Lx 0x%x 0x%x\n",
 			tq->tx_ring.next2fill, gdesc->txd.addr,
 			gdesc->dword[2], gdesc->dword[3]);
 		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
@@ -600,7 +603,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 		gdesc->dword[2] = dw2 | frag->size;
 		gdesc->dword[3] = 0;
 
-		dprintk(KERN_ERR "txd[%u]: 0x%llu %u %u\n",
+		dev_dbg(&adapter->netdev->dev,
+			"txd[%u]: 0x%llu %u %u\n",
 			tq->tx_ring.next2fill, gdesc->txd.addr,
 			gdesc->dword[2], gdesc->dword[3]);
 		vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
@@ -697,7 +701,8 @@ vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
 
 	memcpy(tdd->data, skb->data, ctx->copy_size);
-	dprintk(KERN_ERR "copy %u bytes to dataRing[%u]\n",
+	dev_dbg(&adapter->netdev->dev,
+		"copy %u bytes to dataRing[%u]\n",
 		ctx->copy_size, tq->tx_ring.next2fill);
 	return 1;
 
@@ -808,7 +813,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 
 	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
 		tq->stats.tx_ring_full++;
-		dprintk(KERN_ERR "tx queue stopped on %s, next2comp %u"
+		dev_dbg(&adapter->netdev->dev,
+			"tx queue stopped on %s, next2comp %u"
 			" next2fill %u\n", adapter->netdev->name,
 			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
 
@@ -853,7 +859,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 
 	/* finally flips the GEN bit of the SOP desc */
 	gdesc->dword[2] ^= VMXNET3_TXD_GEN;
-	dprintk(KERN_ERR "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
+	dev_dbg(&adapter->netdev->dev,
+		"txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
 		(u32)((union Vmxnet3_GenericDesc *)ctx.sop_txd -
 		tq->tx_ring.base), gdesc->txd.addr, gdesc->dword[2],
 		gdesc->dword[3]);
@@ -990,7 +997,8 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 			if (unlikely(rcd->len == 0)) {
 				/* Pretend the rx buffer is skipped. */
 				BUG_ON(!(rcd->sop && rcd->eop));
-				dprintk(KERN_ERR "rxRing[%u][%u] 0 length\n",
+				dev_dbg(&adapter->netdev->dev,
+					"rxRing[%u][%u] 0 length\n",
 					ring_idx, idx);
 				goto rcd_done;
 			}
@@ -1683,7 +1691,8 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
 	int err;
 	u32 ret;
 
-	dprintk(KERN_ERR "%s: skb_buf_size %d, rx_buf_per_pkt %d, ring sizes"
+	dev_dbg(&adapter->netdev->dev,
+		"%s: skb_buf_size %d, rx_buf_per_pkt %d, ring sizes"
 		" %u %u %u\n", adapter->netdev->name, adapter->skb_buf_size,
 		adapter->rx_buf_per_pkt, adapter->tx_queue.tx_ring.size,
 		adapter->rx_queue.rx_ring[0].size,
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 6bb91576e999..3c0d70d58111 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -30,6 +30,7 @@
 #include <linux/types.h>
 #include <linux/ethtool.h>
 #include <linux/delay.h>
+#include <linux/device.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
 #include <linux/ethtool.h>
@@ -59,7 +60,6 @@
 #include <linux/if_vlan.h>
 #include <linux/if_arp.h>
 #include <linux/inetdevice.h>
-#include <linux/dst.h>
 
 #include "vmxnet3_defs.h"
 

From b0225e8f1023ff1323e2de4caa148813eea71385 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 15 Oct 2009 09:46:48 -0700
Subject: [PATCH 33/70] Input: i8042 - add Sony Vaio VGN-FZ240E to the nomux
 list

On this model, when KBD is in active multiplexing mode, acknowledgements
to reset and get ID commands issued on KBD port sometimes are delivered
to AUX3 port (touchpad) which messes up device detection. Legacy KBC
mode works fine and since there are no external PS/2 ports on this laptop
and no support for docking station we can safely disable active MUX mode.

Tested-by: Carlos R. Mafra <crmafra2@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/i8042-x86ia64io.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 77ff205c8577..a537925f7651 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -326,6 +326,17 @@ static struct dmi_system_id __initdata i8042_dmi_nomux_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FS115B"),
 		},
 	},
+	{
+		/*
+		 * Reset and GET ID commands issued via KBD port are
+		 * sometimes being delivered to AUX3.
+		 */
+		.ident = "Sony Vaio FZ-240E",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FZ240E"),
+		},
+	},
 	{
 		.ident = "Amoi M636/A737",
 		.matches = {

From 4f7802d032344fe3b87441278f53f9ea3aa9506a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 15 Oct 2009 09:46:48 -0700
Subject: [PATCH 34/70] Input: logips2pp - model 73 is actually TrackMan FX

Reported-and-tested-by: Harald Dunkel <harald.dunkel@t-online.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/mouse/logips2pp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/mouse/logips2pp.c b/drivers/input/mouse/logips2pp.c
index de745d751162..ab5dc5f5fd83 100644
--- a/drivers/input/mouse/logips2pp.c
+++ b/drivers/input/mouse/logips2pp.c
@@ -219,7 +219,7 @@ static const struct ps2pp_info *get_model_info(unsigned char model)
 				PS2PP_WHEEL | PS2PP_SIDE_BTN | PS2PP_TASK_BTN |
 				PS2PP_EXTRA_BTN | PS2PP_NAV_BTN | PS2PP_HWHEEL },
 		{ 72,	PS2PP_KIND_TRACKMAN,	0 },			/* T-CH11: TrackMan Marble */
-		{ 73,	0,			PS2PP_SIDE_BTN },
+		{ 73,	PS2PP_KIND_TRACKMAN,	PS2PP_SIDE_BTN },	/* TrackMan FX */
 		{ 75,	PS2PP_KIND_WHEEL,	PS2PP_WHEEL },
 		{ 76,	PS2PP_KIND_WHEEL,	PS2PP_WHEEL },
 		{ 79,	PS2PP_KIND_TRACKMAN,	PS2PP_WHEEL },		/* TrackMan with wheel */

From 000c2a35b8b0485f5a872c24c4f2d0d6579951c1 Mon Sep 17 00:00:00 2001
From: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Date: Fri, 16 Oct 2009 16:13:59 -0700
Subject: [PATCH 35/70] Input: atkbd - consolidate force release quirks for
 volume keys

Some machines share same key list for volume up/down release key quirks,
use only one key list.

Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/atkbd.c | 51 ++++++++++------------------------
 1 file changed, 14 insertions(+), 37 deletions(-)

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index de520386f13f..271c0b7045c3 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -886,29 +886,6 @@ static unsigned int atkbd_hp_forced_release_keys[] = {
 	0x94, -1U
 };
 
-/*
- * Inventec system with broken key release on volume keys
- */
-static unsigned int atkbd_inventec_forced_release_keys[] = {
-	0xae, 0xb0, -1U
-};
-
-/*
- * Perform fixup for HP Pavilion ZV6100 laptop that doesn't generate release
- * for its volume buttons
- */
-static unsigned int atkbd_hp_zv6100_forced_release_keys[] = {
-	0xae, 0xb0, -1U
-};
-
-/*
- * Perform fixup for HP (Compaq) Presario R4000 R4100 R4200 that don't generate
- * release for their volume buttons
- */
-static unsigned int atkbd_hp_r4000_forced_release_keys[] = {
-	0xae, 0xb0, -1U
-};
-
 /*
  * Samsung NC10,NC20 with Fn+F? key release not working
  */
@@ -916,14 +893,6 @@ static unsigned int atkbd_samsung_forced_release_keys[] = {
 	0x82, 0x83, 0x84, 0x86, 0x88, 0x89, 0xb3, 0xf7, 0xf9, -1U
 };
 
-/*
- * The volume up and volume down special keys on a Fujitsu Amilo PA 1510 laptop
- * do not generate release events so we have to do it ourselves.
- */
-static unsigned int atkbd_amilo_pa1510_forced_release_keys[] = {
-	0xb0, 0xae, -1U
-};
-
 /*
  * Amilo Pi 3525 key release for Fn+Volume keys not working
  */
@@ -945,6 +914,14 @@ static unsigned int atkdb_soltech_ta12_forced_release_keys[] = {
 	0xa0, 0xae, 0xb0, -1U
 };
 
+/*
+ * Many notebooks don't send key release event for volume up/down
+ * keys, with key list below common among them
+ */
+static unsigned int atkbd_volume_forced_release_keys[] = {
+	0xae, 0xb0, -1U
+};
+
 /*
  * atkbd_set_keycode_table() initializes keyboard's keycode table
  * according to the selected scancode set
@@ -1585,7 +1562,7 @@ static struct dmi_system_id atkbd_dmi_quirk_table[] __initdata = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Pavilion ZV6100"),
 		},
 		.callback = atkbd_setup_forced_release,
-		.driver_data = atkbd_hp_zv6100_forced_release_keys,
+		.driver_data = atkbd_volume_forced_release_keys,
 	},
 	{
 		.ident = "HP Presario R4000",
@@ -1594,7 +1571,7 @@ static struct dmi_system_id atkbd_dmi_quirk_table[] __initdata = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4000"),
 		},
 		.callback = atkbd_setup_forced_release,
-		.driver_data = atkbd_hp_r4000_forced_release_keys,
+		.driver_data = atkbd_volume_forced_release_keys,
 	},
 	{
 		.ident = "HP Presario R4100",
@@ -1603,7 +1580,7 @@ static struct dmi_system_id atkbd_dmi_quirk_table[] __initdata = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4100"),
 		},
 		.callback = atkbd_setup_forced_release,
-		.driver_data = atkbd_hp_r4000_forced_release_keys,
+		.driver_data = atkbd_volume_forced_release_keys,
 	},
 	{
 		.ident = "HP Presario R4200",
@@ -1612,7 +1589,7 @@ static struct dmi_system_id atkbd_dmi_quirk_table[] __initdata = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4200"),
 		},
 		.callback = atkbd_setup_forced_release,
-		.driver_data = atkbd_hp_r4000_forced_release_keys,
+		.driver_data = atkbd_volume_forced_release_keys,
 	},
 	{
 		.ident = "Inventec Symphony",
@@ -1621,7 +1598,7 @@ static struct dmi_system_id atkbd_dmi_quirk_table[] __initdata = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "SYMPHONY 6.0/7.0"),
 		},
 		.callback = atkbd_setup_forced_release,
-		.driver_data = atkbd_inventec_forced_release_keys,
+		.driver_data = atkbd_volume_forced_release_keys,
 	},
 	{
 		.ident = "Samsung NC10",
@@ -1657,7 +1634,7 @@ static struct dmi_system_id atkbd_dmi_quirk_table[] __initdata = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Pa 1510"),
 		},
 		.callback = atkbd_setup_forced_release,
-		.driver_data = atkbd_amilo_pa1510_forced_release_keys,
+		.driver_data = atkbd_volume_forced_release_keys,
 	},
 	{
 		.ident = "Fujitsu Amilo Pi 3525",

From 3776989d2339c58ff8d8421e754603f186d7439b Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Sun, 18 Oct 2009 00:17:15 -0700
Subject: [PATCH 36/70] Input: hp_sdc_rtc - fix test in hp_sdc_rtc_read_rt()

If left unsigned the hp_sdc_rtc_read_i8042timer() return value will not
be checked correctly.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/hp_sdc_rtc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c
index 216a559f55ea..ea821b546969 100644
--- a/drivers/input/misc/hp_sdc_rtc.c
+++ b/drivers/input/misc/hp_sdc_rtc.c
@@ -209,7 +209,7 @@ static inline int hp_sdc_rtc_read_rt(struct timeval *res) {
 
 /* Read the i8042 fast handshake timer */
 static inline int hp_sdc_rtc_read_fhs(struct timeval *res) {
-	uint64_t raw;
+	int64_t raw;
 	unsigned int tenms;
 
 	raw = hp_sdc_rtc_read_i8042timer(HP_SDC_CMD_LOAD_FHS, 2);

From 9f0d793b52eb2266359661369ef6303838904855 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 11 Sep 2009 13:03:19 -0400
Subject: [PATCH 37/70] fsnotify: do not set group for a mark before it is on
 the i_list

fsnotify_add_mark is supposed to add a mark to the g_list and i_list and to
set the group and inode for the mark.  fsnotify_destroy_mark_by_entry uses
the fact that ->group != NULL to know if this group should be destroyed or
if it's already been done.

But fsnotify_add_mark sets the group and inode before it actually adds the
mark to the i_list and g_list.  This can result in a race in inotify, it
requires 3 threads.

sys_inotify_add_watch("file")	sys_inotify_add_watch("file")	sys_inotify_rm_watch([a])
inotify_update_watch()
inotify_new_watch()
inotify_add_to_idr()
   ^--- returns wd = [a]
				inotfiy_update_watch()
				inotify_new_watch()
				inotify_add_to_idr()
				fsnotify_add_mark()
				   ^--- returns wd = [b]
				returns to userspace;
								inotify_idr_find([a])
								   ^--- gives us the pointer from task 1
fsnotify_add_mark()
   ^--- this is going to set the mark->group and mark->inode fields, but will
return -EEXIST because of the race with [b].
								fsnotify_destroy_mark()
								   ^--- since ->group != NULL we call back
									into inotify_freeing_mark() which calls
								inotify_remove_from_idr([a])

since fsnotify_add_mark() failed we call:
inotify_remove_from_idr([a])     <------WHOOPS it's not in the idr, this could
					have been any entry added later!

The fix is to make sure we don't set mark->group until we are sure the mark is
on the inode and fsnotify_add_mark will return success.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inode_mark.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index c8a07c65482b..3165d85aada2 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -324,11 +324,11 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 	spin_lock(&group->mark_lock);
 	spin_lock(&inode->i_lock);
 
-	entry->group = group;
-	entry->inode = inode;
-
 	lentry = fsnotify_find_mark_entry(group, inode);
 	if (!lentry) {
+		entry->group = group;
+		entry->inode = inode;
+
 		hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries);
 		list_add(&entry->g_list, &group->mark_entries);
 

From cdc321ff0af78e818c97d4787f62bf52bdf9db2a Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 29 Jun 2009 11:13:30 -0400
Subject: [PATCH 38/70] inotify: deprecate the inotify kernel interface

In 2.6.33 there will be no users of the inotify interface.  Mark it for
removal as fsnotify is more generic and is easier to use.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 Documentation/feature-removal-schedule.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 04e6c819b28a..bc693fffabe0 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -418,6 +418,14 @@ When:	2.6.33
 Why:	Should be implemented in userspace, policy daemon.
 Who:	Johannes Berg <johannes@sipsolutions.net>
 
+---------------------------
+
+What:	CONFIG_INOTIFY
+When:	2.6.33
+Why:	last user (audit) will be converted to the newer more generic
+	and more easily maintained fsnotify subsystem
+Who:	Eric Paris <eparis@redhat.com>
+
 ----------------------------
 
 What:	lock_policy_rwsem_* and unlock_policy_rwsem_* will not be

From 3de0ef4f2067da58fa5126d821a56dcb98cdb565 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Wed, 14 Oct 2009 20:54:03 +0800
Subject: [PATCH 39/70] inotify: fix coalesce duplicate events into a single
 event in special case

If we do rename a dir entry, like this:

  rename("/tmp/ino7UrgoJ.rename1", "/tmp/ino7UrgoJ.rename2")
  rename("/tmp/ino7UrgoJ.rename2", "/tmp/ino7UrgoJ")

The duplicate events should be coalesced into a single event. But those two
events do not be coalesced into a single event, due to some bad check in
event_compare(). It can not match the two NULL inodes as the same event.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/notification.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 3816d5750dd5..b8bf53b4c108 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -143,7 +143,7 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
 			/* remember, after old was put on the wait_q we aren't
 			 * allowed to look at the inode any more, only thing
 			 * left to check was if the file_name is the same */
-			if (old->name_len &&
+			if (!old->name_len ||
 			    !strcmp(old->file_name, new->file_name))
 				return true;
 			break;

From 16dd18b0837dee46f1a6b0c01830c5f2b7187266 Mon Sep 17 00:00:00 2001
From: Thomas Chou <thomas@wytron.com.tw>
Date: Wed, 7 Oct 2009 14:16:42 +0000
Subject: [PATCH 40/70] ethoc: inline regs access

Signed-off-by: Thomas Chou <thomas@wytron.com.tw>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethoc.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index 1d338c6f5314..88a1c5223069 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -222,24 +222,25 @@ struct ethoc_bd {
 	u32 addr;
 };
 
-static u32 ethoc_read(struct ethoc *dev, loff_t offset)
+static inline u32 ethoc_read(struct ethoc *dev, loff_t offset)
 {
 	return ioread32(dev->iobase + offset);
 }
 
-static void ethoc_write(struct ethoc *dev, loff_t offset, u32 data)
+static inline void ethoc_write(struct ethoc *dev, loff_t offset, u32 data)
 {
 	iowrite32(data, dev->iobase + offset);
 }
 
-static void ethoc_read_bd(struct ethoc *dev, int index, struct ethoc_bd *bd)
+static inline void ethoc_read_bd(struct ethoc *dev, int index,
+		struct ethoc_bd *bd)
 {
 	loff_t offset = ETHOC_BD_BASE + (index * sizeof(struct ethoc_bd));
 	bd->stat = ethoc_read(dev, offset + 0);
 	bd->addr = ethoc_read(dev, offset + 4);
 }
 
-static void ethoc_write_bd(struct ethoc *dev, int index,
+static inline void ethoc_write_bd(struct ethoc *dev, int index,
 		const struct ethoc_bd *bd)
 {
 	loff_t offset = ETHOC_BD_BASE + (index * sizeof(struct ethoc_bd));
@@ -247,33 +248,33 @@ static void ethoc_write_bd(struct ethoc *dev, int index,
 	ethoc_write(dev, offset + 4, bd->addr);
 }
 
-static void ethoc_enable_irq(struct ethoc *dev, u32 mask)
+static inline void ethoc_enable_irq(struct ethoc *dev, u32 mask)
 {
 	u32 imask = ethoc_read(dev, INT_MASK);
 	imask |= mask;
 	ethoc_write(dev, INT_MASK, imask);
 }
 
-static void ethoc_disable_irq(struct ethoc *dev, u32 mask)
+static inline void ethoc_disable_irq(struct ethoc *dev, u32 mask)
 {
 	u32 imask = ethoc_read(dev, INT_MASK);
 	imask &= ~mask;
 	ethoc_write(dev, INT_MASK, imask);
 }
 
-static void ethoc_ack_irq(struct ethoc *dev, u32 mask)
+static inline void ethoc_ack_irq(struct ethoc *dev, u32 mask)
 {
 	ethoc_write(dev, INT_SOURCE, mask);
 }
 
-static void ethoc_enable_rx_and_tx(struct ethoc *dev)
+static inline void ethoc_enable_rx_and_tx(struct ethoc *dev)
 {
 	u32 mode = ethoc_read(dev, MODER);
 	mode |= MODER_RXEN | MODER_TXEN;
 	ethoc_write(dev, MODER, mode);
 }
 
-static void ethoc_disable_rx_and_tx(struct ethoc *dev)
+static inline void ethoc_disable_rx_and_tx(struct ethoc *dev)
 {
 	u32 mode = ethoc_read(dev, MODER);
 	mode &= ~(MODER_RXEN | MODER_TXEN);

From 50c54a57dfbd392e17f1473717b8e125afcb01a3 Mon Sep 17 00:00:00 2001
From: Thomas Chou <thomas@wytron.com.tw>
Date: Wed, 7 Oct 2009 14:16:43 +0000
Subject: [PATCH 41/70] ethoc: clear only pending irqs

This patch fixed the problem of dropped packets due to lost of
interrupt requests. We should only clear what was pending at the
moment we read the irq source reg.

Signed-off-by: Thomas Chou <thomas@wytron.com.tw>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethoc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index 88a1c5223069..590473afb3dc 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -508,7 +508,7 @@ static irqreturn_t ethoc_interrupt(int irq, void *dev_id)
 		return IRQ_NONE;
 	}
 
-	ethoc_ack_irq(priv, INT_MASK_ALL);
+	ethoc_ack_irq(priv, pending);
 
 	if (pending & INT_MASK_BUSY) {
 		dev_err(&dev->dev, "packet dropped\n");

From 77238f2b942b38ab4e7f3aced44084493e4a8675 Mon Sep 17 00:00:00 2001
From: Tomoki Sekiyama <tomoki.sekiyama.qu@hitachi.com>
Date: Sun, 18 Oct 2009 23:17:37 -0700
Subject: [PATCH 42/70] AF_UNIX: Fix deadlock on connecting to shutdown socket

I found a deadlock bug in UNIX domain socket, which makes able to DoS
attack against the local machine by non-root users.

How to reproduce:
1. Make a listening AF_UNIX/SOCK_STREAM socket with an abstruct
    namespace(*), and shutdown(2) it.
 2. Repeat connect(2)ing to the listening socket from the other sockets
    until the connection backlog is full-filled.
 3. connect(2) takes the CPU forever. If every core is taken, the
    system hangs.

PoC code: (Run as many times as cores on SMP machines.)

int main(void)
{
	int ret;
	int csd;
	int lsd;
	struct sockaddr_un sun;

	/* make an abstruct name address (*) */
	memset(&sun, 0, sizeof(sun));
	sun.sun_family = PF_UNIX;
	sprintf(&sun.sun_path[1], "%d", getpid());

	/* create the listening socket and shutdown */
	lsd = socket(AF_UNIX, SOCK_STREAM, 0);
	bind(lsd, (struct sockaddr *)&sun, sizeof(sun));
	listen(lsd, 1);
	shutdown(lsd, SHUT_RDWR);

	/* connect loop */
	alarm(15); /* forcely exit the loop after 15 sec */
	for (;;) {
		csd = socket(AF_UNIX, SOCK_STREAM, 0);
		ret = connect(csd, (struct sockaddr *)&sun, sizeof(sun));
		if (-1 == ret) {
			perror("connect()");
			break;
		}
		puts("Connection OK");
	}
	return 0;
}

(*) Make sun_path[0] = 0 to use the abstruct namespace.
    If a file-based socket is used, the system doesn't deadlock because
    of context switches in the file system layer.

Why this happens:
 Error checks between unix_socket_connect() and unix_wait_for_peer() are
 inconsistent. The former calls the latter to wait until the backlog is
 processed. Despite the latter returns without doing anything when the
 socket is shutdown, the former doesn't check the shutdown state and
 just retries calling the latter forever.

Patch:
 The patch below adds shutdown check into unix_socket_connect(), so
 connect(2) to the shutdown socket will return -ECONREFUSED.

Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama.qu@hitachi.com>
Signed-off-by: Masanori Yoshida <masanori.yoshida.tv@hitachi.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 51ab497115eb..fc820cd75453 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1074,6 +1074,8 @@ restart:
 	err = -ECONNREFUSED;
 	if (other->sk_state != TCP_LISTEN)
 		goto out_unlock;
+	if (other->sk_shutdown & RCV_SHUTDOWN)
+		goto out_unlock;
 
 	if (unix_recvq_full(other)) {
 		err = -EAGAIN;

From a1a2ad9151c26d92e5c733a33d52108f5d3a5b57 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 19 Oct 2009 19:12:36 -0700
Subject: [PATCH 43/70] Revert "tcp: fix tcp_defer_accept to consider the
 timeout"

This reverts commit 6d01a026b7d3009a418326bdcf313503a314f1ea.

Julian Anastasov, Willy Tarreau and Eric Dumazet have come up
with a more correct way to deal with this.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_minisocks.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e320afea07fc..624c3c9b3c2b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -644,7 +644,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
 	if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
 	    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
-		inet_csk(sk)->icsk_accept_queue.rskq_defer_accept--;
 		inet_rsk(req)->acked = 1;
 		return NULL;
 	}

From d1b99ba41d6c5aa1ed2fc634323449dd656899e9 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Mon, 19 Oct 2009 10:01:56 +0000
Subject: [PATCH 44/70] tcp: accept socket after TCP_DEFER_ACCEPT period

Willy Tarreau and many other folks in recent years
were concerned what happens when the TCP_DEFER_ACCEPT period
expires for clients which sent ACK packet. They prefer clients
that actively resend ACK on our SYN-ACK retransmissions to be
converted from open requests to sockets and queued to the
listener for accepting after the deferring period is finished.
Then application server can decide to wait longer for data
or to properly terminate the connection with FIN if read()
returns EAGAIN which is an indication for accepting after
the deferring period. This change still can have side effects
for applications that expect always to see data on the accepted
socket. Others can be prepared to work in both modes (with or
without TCP_DEFER_ACCEPT period) and their data processing can
ignore the read=EAGAIN notification and to allocate resources for
clients which proved to have no data to send during the deferring
period. OTOH, servers that use TCP_DEFER_ACCEPT=1 as flag (not
as a timeout) to wait for data will notice clients that didn't
send data for 3 seconds but that still resend ACKs.
Thanks to Willy Tarreau for the initial idea and to
Eric Dumazet for the review and testing the change.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_minisocks.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 624c3c9b3c2b..4c03598ed924 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -641,8 +641,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (!(flg & TCP_FLAG_ACK))
 		return NULL;
 
-	/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
-	if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+	/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
+	if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
 	    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
 		inet_rsk(req)->acked = 1;
 		return NULL;

From 0c3d79bce48034018e840468ac5a642894a521a3 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Mon, 19 Oct 2009 10:03:58 +0000
Subject: [PATCH 45/70] tcp: reduce SYN-ACK retrans for TCP_DEFER_ACCEPT

Change SYN-ACK retransmitting code for the TCP_DEFER_ACCEPT
users to not retransmit SYN-ACKs during the deferring period if
ACK from client was received. The goal is to reduce traffic
during the deferring period. When the period is finished
we continue with sending SYN-ACKs (at least one) but this time
any traffic from client will change the request to established
socket allowing application to terminate it properly.
Also, do not drop acked request if sending of SYN-ACK fails.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 34 ++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4351ca2cf0b8..537731b3bcb3 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -446,6 +446,28 @@ extern int sysctl_tcp_synack_retries;
 
 EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
 
+/* Decide when to expire the request and when to resend SYN-ACK */
+static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
+				  const int max_retries,
+				  const u8 rskq_defer_accept,
+				  int *expire, int *resend)
+{
+	if (!rskq_defer_accept) {
+		*expire = req->retrans >= thresh;
+		*resend = 1;
+		return;
+	}
+	*expire = req->retrans >= thresh &&
+		  (!inet_rsk(req)->acked || req->retrans >= max_retries);
+	/*
+	 * Do not resend while waiting for data after ACK,
+	 * start to resend on end of deferring period to give
+	 * last chance for data or ACK to create established socket.
+	 */
+	*resend = !inet_rsk(req)->acked ||
+		  req->retrans >= rskq_defer_accept - 1;
+}
+
 void inet_csk_reqsk_queue_prune(struct sock *parent,
 				const unsigned long interval,
 				const unsigned long timeout,
@@ -501,9 +523,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 		reqp=&lopt->syn_table[i];
 		while ((req = *reqp) != NULL) {
 			if (time_after_eq(now, req->expires)) {
-				if ((req->retrans < thresh ||
-				     (inet_rsk(req)->acked && req->retrans < max_retries))
-				    && !req->rsk_ops->rtx_syn_ack(parent, req)) {
+				int expire = 0, resend = 0;
+
+				syn_ack_recalc(req, thresh, max_retries,
+					       queue->rskq_defer_accept,
+					       &expire, &resend);
+				if (!expire &&
+				    (!resend ||
+				     !req->rsk_ops->rtx_syn_ack(parent, req) ||
+				     inet_rsk(req)->acked)) {
 					unsigned long timeo;
 
 					if (req->retrans++ == 0)

From b103cf34382f26ff48a87931b83f13b177b47c1a Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Mon, 19 Oct 2009 10:10:40 +0000
Subject: [PATCH 46/70] tcp: fix TCP_DEFER_ACCEPT retrans calculation

Fix TCP_DEFER_ACCEPT conversion between seconds and
retransmission to match the TCP SYN-ACK retransmission periods
because the time is converted to such retransmissions. The old
algorithm selects one more retransmission in some cases. Allow
up to 255 retransmissions.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 55 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 64d0af675823..9b2756fbdf9b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -326,6 +326,43 @@ void tcp_enter_memory_pressure(struct sock *sk)
 
 EXPORT_SYMBOL(tcp_enter_memory_pressure);
 
+/* Convert seconds to retransmits based on initial and max timeout */
+static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
+{
+	u8 res = 0;
+
+	if (seconds > 0) {
+		int period = timeout;
+
+		res = 1;
+		while (seconds > period && res < 255) {
+			res++;
+			timeout <<= 1;
+			if (timeout > rto_max)
+				timeout = rto_max;
+			period += timeout;
+		}
+	}
+	return res;
+}
+
+/* Convert retransmits to seconds based on initial and max timeout */
+static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
+{
+	int period = 0;
+
+	if (retrans > 0) {
+		period = timeout;
+		while (--retrans) {
+			timeout <<= 1;
+			if (timeout > rto_max)
+				timeout = rto_max;
+			period += timeout;
+		}
+	}
+	return period;
+}
+
 /*
  *	Wait for a TCP event.
  *
@@ -2163,16 +2200,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_DEFER_ACCEPT:
-		icsk->icsk_accept_queue.rskq_defer_accept = 0;
-		if (val > 0) {
-			/* Translate value in seconds to number of
-			 * retransmits */
-			while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
-			       val > ((TCP_TIMEOUT_INIT / HZ) <<
-				       icsk->icsk_accept_queue.rskq_defer_accept))
-				icsk->icsk_accept_queue.rskq_defer_accept++;
-			icsk->icsk_accept_queue.rskq_defer_accept++;
-		}
+		/* Translate value in seconds to number of retransmits */
+		icsk->icsk_accept_queue.rskq_defer_accept =
+			secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+					TCP_RTO_MAX / HZ);
 		break;
 
 	case TCP_WINDOW_CLAMP:
@@ -2353,8 +2384,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			val = (val ? : sysctl_tcp_fin_timeout) / HZ;
 		break;
 	case TCP_DEFER_ACCEPT:
-		val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
-			((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
+		val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
+				      TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
 		break;
 	case TCP_WINDOW_CLAMP:
 		val = tp->window_clamp;

From f74c77cb1124a11acf69c98d10c0fdc22f322664 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Sun, 18 Oct 2009 20:24:41 +0000
Subject: [PATCH 47/70] bluetooth: scheduling while atomic bug fix

Due to driver core changes dev_set_drvdata will call kzalloc which should be
in might_sleep context, but hci_conn_add will be called in atomic context

Like dev_set_name move dev_set_drvdata to work queue function.

oops as following:

Oct  2 17:41:59 darkstar kernel: [  438.001341] BUG: sleeping function called from invalid context at mm/slqb.c:1546
Oct  2 17:41:59 darkstar kernel: [  438.001345] in_atomic(): 1, irqs_disabled(): 0, pid: 2133, name: sdptool
Oct  2 17:41:59 darkstar kernel: [  438.001348] 2 locks held by sdptool/2133:
Oct  2 17:41:59 darkstar kernel: [  438.001350]  #0:  (sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+.+.}, at: [<faa1d2f5>] lock_sock+0xa/0xc [l2cap]
Oct  2 17:41:59 darkstar kernel: [  438.001360]  #1:  (&hdev->lock){+.-.+.}, at: [<faa20e16>] l2cap_sock_connect+0x103/0x26b [l2cap]
Oct  2 17:41:59 darkstar kernel: [  438.001371] Pid: 2133, comm: sdptool Not tainted 2.6.31-mm1 #2
Oct  2 17:41:59 darkstar kernel: [  438.001373] Call Trace:
Oct  2 17:41:59 darkstar kernel: [  438.001381]  [<c022433f>] __might_sleep+0xde/0xe5
Oct  2 17:41:59 darkstar kernel: [  438.001386]  [<c0298843>] __kmalloc+0x4a/0x15a
Oct  2 17:41:59 darkstar kernel: [  438.001392]  [<c03f0065>] ? kzalloc+0xb/0xd
Oct  2 17:41:59 darkstar kernel: [  438.001396]  [<c03f0065>] kzalloc+0xb/0xd
Oct  2 17:41:59 darkstar kernel: [  438.001400]  [<c03f04ff>] device_private_init+0x15/0x3d
Oct  2 17:41:59 darkstar kernel: [  438.001405]  [<c03f24c5>] dev_set_drvdata+0x18/0x26
Oct  2 17:41:59 darkstar kernel: [  438.001414]  [<fa51fff7>] hci_conn_init_sysfs+0x40/0xd9 [bluetooth]
Oct  2 17:41:59 darkstar kernel: [  438.001422]  [<fa51cdc0>] ? hci_conn_add+0x128/0x186 [bluetooth]
Oct  2 17:41:59 darkstar kernel: [  438.001429]  [<fa51ce0f>] hci_conn_add+0x177/0x186 [bluetooth]
Oct  2 17:41:59 darkstar kernel: [  438.001437]  [<fa51cf8a>] hci_connect+0x3c/0xfb [bluetooth]
Oct  2 17:41:59 darkstar kernel: [  438.001442]  [<faa20e87>] l2cap_sock_connect+0x174/0x26b [l2cap]
Oct  2 17:41:59 darkstar kernel: [  438.001448]  [<c04c8df5>] sys_connect+0x60/0x7a
Oct  2 17:41:59 darkstar kernel: [  438.001453]  [<c024b703>] ? lock_release_non_nested+0x84/0x1de
Oct  2 17:41:59 darkstar kernel: [  438.001458]  [<c028804b>] ? might_fault+0x47/0x81
Oct  2 17:41:59 darkstar kernel: [  438.001462]  [<c028804b>] ? might_fault+0x47/0x81
Oct  2 17:41:59 darkstar kernel: [  438.001468]  [<c033361f>] ? __copy_from_user_ll+0x11/0xce
Oct  2 17:41:59 darkstar kernel: [  438.001472]  [<c04c9419>] sys_socketcall+0x82/0x17b
Oct  2 17:41:59 darkstar kernel: [  438.001477]  [<c020329d>] syscall_call+0x7/0xb

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/hci_sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 7f939ce29801..2bc6f6a8de68 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -92,6 +92,8 @@ static void add_conn(struct work_struct *work)
 
 	dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
 
+	dev_set_drvdata(&conn->dev, conn);
+
 	if (device_add(&conn->dev) < 0) {
 		BT_ERR("Failed to register connection device");
 		return;
@@ -144,8 +146,6 @@ void hci_conn_init_sysfs(struct hci_conn *conn)
 	conn->dev.class = bt_class;
 	conn->dev.parent = &hdev->dev;
 
-	dev_set_drvdata(&conn->dev, conn);
-
 	device_initialize(&conn->dev);
 
 	INIT_WORK(&conn->work_add, add_conn);

From 45054dc1bf2367ccb0e7c0486037907cd9395f8b Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Sun, 18 Oct 2009 20:28:30 +0000
Subject: [PATCH 48/70] bluetooth: static lock key fix

When shutdown ppp connection, lockdep waring about non-static key
will happen, it is caused by the lock is not initialized properly
at that time.

Fix with tuning the lock/skb_queue_head init order

[   94.339261] INFO: trying to register non-static key.
[   94.342509] the code is fine but needs lockdep annotation.
[   94.342509] turning off the locking correctness validator.
[   94.342509] Pid: 0, comm: swapper Not tainted 2.6.31-mm1 #2
[   94.342509] Call Trace:
[   94.342509]  [<c0248fbe>] register_lock_class+0x58/0x241
[   94.342509]  [<c024b5df>] ? __lock_acquire+0xb57/0xb73
[   94.342509]  [<c024ab34>] __lock_acquire+0xac/0xb73
[   94.342509]  [<c024b7fa>] ? lock_release_non_nested+0x17b/0x1de
[   94.342509]  [<c024b662>] lock_acquire+0x67/0x84
[   94.342509]  [<c04cd1eb>] ? skb_dequeue+0x15/0x41
[   94.342509]  [<c054a857>] _spin_lock_irqsave+0x2f/0x3f
[   94.342509]  [<c04cd1eb>] ? skb_dequeue+0x15/0x41
[   94.342509]  [<c04cd1eb>] skb_dequeue+0x15/0x41
[   94.342509]  [<c054a648>] ? _read_unlock+0x1d/0x20
[   94.342509]  [<c04cd641>] skb_queue_purge+0x14/0x1b
[   94.342509]  [<fab94fdc>] l2cap_recv_frame+0xea1/0x115a [l2cap]
[   94.342509]  [<c024b5df>] ? __lock_acquire+0xb57/0xb73
[   94.342509]  [<c0249c04>] ? mark_lock+0x1e/0x1c7
[   94.342509]  [<f8364963>] ? hci_rx_task+0xd2/0x1bc [bluetooth]
[   94.342509]  [<fab95346>] l2cap_recv_acldata+0xb1/0x1c6 [l2cap]
[   94.342509]  [<f8364997>] hci_rx_task+0x106/0x1bc [bluetooth]
[   94.342509]  [<fab95295>] ? l2cap_recv_acldata+0x0/0x1c6 [l2cap]
[   94.342509]  [<c02302c4>] tasklet_action+0x69/0xc1
[   94.342509]  [<c022fbef>] __do_softirq+0x94/0x11e
[   94.342509]  [<c022fcaf>] do_softirq+0x36/0x5a
[   94.342509]  [<c022fe14>] irq_exit+0x35/0x68
[   94.342509]  [<c0204ced>] do_IRQ+0x72/0x89
[   94.342509]  [<c02038ee>] common_interrupt+0x2e/0x34
[   94.342509]  [<c024007b>] ? pm_qos_add_requirement+0x63/0x9d
[   94.342509]  [<c038e8a5>] ? acpi_idle_enter_bm+0x209/0x238
[   94.342509]  [<c049d238>] cpuidle_idle_call+0x5c/0x94
[   94.342509]  [<c02023f8>] cpu_idle+0x4e/0x6f
[   94.342509]  [<c0534153>] rest_init+0x53/0x55
[   94.342509]  [<c0781894>] start_kernel+0x2f0/0x2f5
[   94.342509]  [<c0781091>] i386_start_kernel+0x91/0x96

Reported-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Tested-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/l2cap.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 555d9da1869b..77e9fb130adb 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -555,12 +555,12 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
 
 	conn->feat_mask = 0;
 
-	setup_timer(&conn->info_timer, l2cap_info_timeout,
-						(unsigned long) conn);
-
 	spin_lock_init(&conn->lock);
 	rwlock_init(&conn->chan_list.lock);
 
+	setup_timer(&conn->info_timer, l2cap_info_timeout,
+						(unsigned long) conn);
+
 	conn->disc_reason = 0x13;
 
 	return conn;
@@ -783,6 +783,9 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 	/* Default config options */
 	pi->conf_len = 0;
 	pi->flush_to = L2CAP_DEFAULT_FLUSH_TO;
+	skb_queue_head_init(TX_QUEUE(sk));
+	skb_queue_head_init(SREJ_QUEUE(sk));
+	INIT_LIST_HEAD(SREJ_LIST(sk));
 }
 
 static struct proto l2cap_proto = {

From 55b8050353c4a212c94d7156e2bd5885225b869b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Oct 2009 06:41:58 +0000
Subject: [PATCH 49/70] net: Fix IP_MULTICAST_IF

ipv4/ipv6 setsockopt(IP_MULTICAST_IF) have dubious __dev_get_by_index() calls.

This function should be called only with RTNL or dev_base_lock held, or reader
could see a corrupt hash chain and eventually enter an endless loop.

Fix is to call dev_get_by_index()/dev_put().

If this happens to be performance critical, we could define a new dev_exist_by_index()
function to avoid touching dev refcount.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c   | 7 +++----
 net/ipv6/ipv6_sockglue.c | 6 +++++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 0c0b6e363a20..e982b5c1ee17 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -634,17 +634,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 				break;
 			}
 			dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
-			if (dev) {
+			if (dev)
 				mreq.imr_ifindex = dev->ifindex;
-				dev_put(dev);
-			}
 		} else
-			dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
+			dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
 
 
 		err = -EADDRNOTAVAIL;
 		if (!dev)
 			break;
+		dev_put(dev);
 
 		err = -EINVAL;
 		if (sk->sk_bound_dev_if &&
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 14f54eb5a7fc..4f7aaf6996a3 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -496,13 +496,17 @@ done:
 			goto e_inval;
 
 		if (val) {
+			struct net_device *dev;
+
 			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
 				goto e_inval;
 
-			if (__dev_get_by_index(net, val) == NULL) {
+			dev = dev_get_by_index(net, val);
+			if (!dev) {
 				retv = -ENODEV;
 				break;
 			}
+			dev_put(dev);
 		}
 		np->mcast_oif = val;
 		retv = 0;

From 13b79b971564ddd0f14e706592472adc8199e912 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Tue, 20 Oct 2009 16:20:47 +0900
Subject: [PATCH 50/70] crypto: aesni-intel - Fix irq_fpu_usable usage

When renaming kernel_fpu_using to irq_fpu_usable, the semantics of the
function is changed too, from mesuring whether kernel is using FPU,
that is, the FPU is NOT available, to measuring whether FPU is usable,
that is, the FPU is available.

But the usage of irq_fpu_usable in aesni-intel_glue.c is not changed
accordingly. This patch fixes this.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aesni-intel_glue.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 585edebe12cf..49c552c060e9 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -82,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
 		return -EINVAL;
 	}
 
-	if (irq_fpu_usable())
+	if (!irq_fpu_usable())
 		err = crypto_aes_expand_key(ctx, in_key, key_len);
 	else {
 		kernel_fpu_begin();
@@ -103,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
 
-	if (irq_fpu_usable())
+	if (!irq_fpu_usable())
 		crypto_aes_encrypt_x86(ctx, dst, src);
 	else {
 		kernel_fpu_begin();
@@ -116,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
 
-	if (irq_fpu_usable())
+	if (!irq_fpu_usable())
 		crypto_aes_decrypt_x86(ctx, dst, src);
 	else {
 		kernel_fpu_begin();
@@ -342,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req)
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 
-	if (irq_fpu_usable()) {
+	if (!irq_fpu_usable()) {
 		struct ablkcipher_request *cryptd_req =
 			ablkcipher_request_ctx(req);
 		memcpy(cryptd_req, req, sizeof(*req));
@@ -363,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req)
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 
-	if (irq_fpu_usable()) {
+	if (!irq_fpu_usable()) {
 		struct ablkcipher_request *cryptd_req =
 			ablkcipher_request_ctx(req);
 		memcpy(cryptd_req, req, sizeof(*req));

From b6b39e8f3fbbb31001b836afec87bcaf4811a7bf Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 19 Oct 2009 19:41:06 +0000
Subject: [PATCH 51/70] tcp: Try to catch MSG_PEEK bug

This patch tries to print out more information when we hit the
MSG_PEEK bug in tcp_recvmsg.  It's been around since at least
2005 and it's about time that we finally fix it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9b2756fbdf9b..90b2e0649bfb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1442,7 +1442,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				goto found_ok_skb;
 			if (tcp_hdr(skb)->fin)
 				goto found_fin_ok;
-			WARN_ON(!(flags & MSG_PEEK));
+			if (WARN_ON(!(flags & MSG_PEEK)))
+				printk(KERN_INFO "recvmsg bug 2: copied %X "
+				       "seq %X\n", *seq, TCP_SKB_CB(skb)->seq);
 		}
 
 		/* Well, if we have backlog, try to process it now yet. */

From abf90cca9725bd05362fb3443ad55071a69a12d9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 18 Oct 2009 22:48:51 +0000
Subject: [PATCH 52/70] net: Fix struct inet_timewait_sock bitfield annotation

commit 9e337b0f (net: annotate inet_timewait_sock bitfields)
added 4/8 bytes in struct inet_timewait_sock.

Fix this by declaring tw_ipv6_offset in the 'flags' bitfield
The 14 bits hole is named tw_pad to make it cleary apparent.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_timewait_sock.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index b63b80fac567..f93ad90a601b 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -130,11 +130,11 @@ struct inet_timewait_sock {
 	__u16			tw_num;
 	kmemcheck_bitfield_begin(flags);
 	/* And these are ours. */
-	__u8			tw_ipv6only:1,
-				tw_transparent:1;
-	/* 14 bits hole, try to pack */
+	unsigned int		tw_ipv6only     : 1,
+				tw_transparent  : 1,
+				tw_pad		: 14,	/* 14 bits hole */
+				tw_ipv6_offset  : 16;
 	kmemcheck_bitfield_end(flags);
-	__u16			tw_ipv6_offset;
 	unsigned long		tw_ttd;
 	struct inet_bind_bucket	*tw_tb;
 	struct hlist_node	tw_death_node;

From 945526846a84c00adac1efd1c6befdaa77039623 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 15 Oct 2009 00:13:23 +0200
Subject: [PATCH 53/70] dnotify: ignore FS_EVENT_ON_CHILD

Mask off FS_EVENT_ON_CHILD in dnotify_handle_event().  Otherwise, when there
is more than one watch on a directory and dnotify_should_send_event()
succeeds, events with FS_EVENT_ON_CHILD set will trigger all watches and cause
spurious events.

This case was overlooked in commit e42e2773.

	#define _GNU_SOURCE

	#include <stdio.h>
	#include <stdlib.h>
	#include <unistd.h>
	#include <signal.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <fcntl.h>
	#include <string.h>

	static void create_event(int s, siginfo_t* si, void* p)
	{
		printf("create\n");
	}

	static void delete_event(int s, siginfo_t* si, void* p)
	{
		printf("delete\n");
	}

	int main (void) {
		struct sigaction action;
		char *tmpdir, *file;
		int fd1, fd2;

		sigemptyset (&action.sa_mask);
		action.sa_flags = SA_SIGINFO;

		action.sa_sigaction = create_event;
		sigaction (SIGRTMIN + 0, &action, NULL);

		action.sa_sigaction = delete_event;
		sigaction (SIGRTMIN + 1, &action, NULL);

	#	define TMPDIR "/tmp/test.XXXXXX"
		tmpdir = malloc(strlen(TMPDIR) + 1);
		strcpy(tmpdir, TMPDIR);
		mkdtemp(tmpdir);

	#	define TMPFILE "/file"
		file = malloc(strlen(tmpdir) + strlen(TMPFILE) + 1);
		sprintf(file, "%s/%s", tmpdir, TMPFILE);

		fd1 = open (tmpdir, O_RDONLY);
		fcntl(fd1, F_SETSIG, SIGRTMIN);
		fcntl(fd1, F_NOTIFY, DN_MULTISHOT | DN_CREATE);

		fd2 = open (tmpdir, O_RDONLY);
		fcntl(fd2, F_SETSIG, SIGRTMIN + 1);
		fcntl(fd2, F_NOTIFY, DN_MULTISHOT | DN_DELETE);

		if (fork()) {
			/* This triggers a create event */
			creat(file, 0600);
			/* This triggers a create and delete event (!) */
			unlink(file);
		} else {
			sleep(1);
			rmdir(tmpdir);
		}

		return 0;
	}

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 828a889be909..7e54e52964dd 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -91,6 +91,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 	struct dnotify_struct *dn;
 	struct dnotify_struct **prev;
 	struct fown_struct *fown;
+	__u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
 
 	to_tell = event->to_tell;
 
@@ -106,7 +107,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 	spin_lock(&entry->lock);
 	prev = &dnentry->dn;
 	while ((dn = *prev) != NULL) {
-		if ((dn->dn_mask & event->mask) == 0) {
+		if ((dn->dn_mask & test_mask) == 0) {
 			prev = &dn->dn_next;
 			continue;
 		}

From 78abcb13dd573f80d76d12007b36200a86f1e494 Mon Sep 17 00:00:00 2001
From: Steven King <sfking@fdwdc.com>
Date: Tue, 20 Oct 2009 18:51:37 -0700
Subject: [PATCH 54/70] net: fix section mismatch in fec.c

fec_enet_init is called by both fec_probe and fec_resume, so it
shouldn't be marked as __init.

Signed-off-by: Steven King <sfking@fdwdc.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 29234380e6c6..16a1d58419d9 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -1654,7 +1654,7 @@ static const struct net_device_ops fec_netdev_ops = {
   *
   * index is only used in legacy code
   */
-int __init fec_enet_init(struct net_device *dev, int index)
+static int fec_enet_init(struct net_device *dev, int index)
 {
 	struct fec_enet_private *fep = netdev_priv(dev);
 	struct bufdesc *cbd_base;

From 57dada6819160eb08f2945fb847045f173d3218d Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 19 Oct 2009 23:49:03 +0000
Subject: [PATCH 55/70] KS8851: Add soft reset at probe time

Issue a full soft reset at probe time.

This was reported by Doong Ping of Micrel, but no explanation of why this
is necessary or what bug it is fixing. Add it as it does not seem to hurt
the current driver and ensures that the device is in a known state when we
start setting it up.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ks8851.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c
index 237835864357..18b17a1deb11 100644
--- a/drivers/net/ks8851.c
+++ b/drivers/net/ks8851.c
@@ -1239,6 +1239,9 @@ static int __devinit ks8851_probe(struct spi_device *spi)
 	ndev->netdev_ops = &ks8851_netdev_ops;
 	ndev->irq = spi->irq;
 
+	/* issue a global soft reset to reset the device. */
+	ks8851_soft_reset(ks, GRR_GSR);
+
 	/* simple check for a valid chip being connected to the bus */
 
 	if ((ks8851_rdreg16(ks, KS_CIDER) & ~CIDER_REV_MASK) != CIDER_ID) {

From 160d0fadaf7ce20e5bad26c8a955504b93c6a5ba Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 19 Oct 2009 23:49:04 +0000
Subject: [PATCH 56/70] KS8851: Fix MAC address write order

The MAC address register was being written in the wrong order, so add
a new address macro to convert mac-address byte to register address and
a ks8851_wrreg8() function to write each byte without having to worry
about any difficult byte swapping.

Fixes a bug reported by Doong, Ping of Micrel.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ks8851.c | 37 +++++++++++++++++++++++++++++++++----
 drivers/net/ks8851.h |  1 +
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c
index 18b17a1deb11..29fa19a076bd 100644
--- a/drivers/net/ks8851.c
+++ b/drivers/net/ks8851.c
@@ -170,6 +170,36 @@ static void ks8851_wrreg16(struct ks8851_net *ks, unsigned reg, unsigned val)
 		ks_err(ks, "spi_sync() failed\n");
 }
 
+/**
+ * ks8851_wrreg8 - write 8bit register value to chip
+ * @ks: The chip state
+ * @reg: The register address
+ * @val: The value to write
+ *
+ * Issue a write to put the value @val into the register specified in @reg.
+ */
+static void ks8851_wrreg8(struct ks8851_net *ks, unsigned reg, unsigned val)
+{
+	struct spi_transfer *xfer = &ks->spi_xfer1;
+	struct spi_message *msg = &ks->spi_msg1;
+	__le16 txb[2];
+	int ret;
+	int bit;
+
+	bit = 1 << (reg & 3);
+
+	txb[0] = cpu_to_le16(MK_OP(bit, reg) | KS_SPIOP_WR);
+	txb[1] = val;
+
+	xfer->tx_buf = txb;
+	xfer->rx_buf = NULL;
+	xfer->len = 3;
+
+	ret = spi_sync(ks->spidev, msg);
+	if (ret < 0)
+		ks_err(ks, "spi_sync() failed\n");
+}
+
 /**
  * ks8851_rx_1msg - select whether to use one or two messages for spi read
  * @ks: The device structure
@@ -322,13 +352,12 @@ static void ks8851_soft_reset(struct ks8851_net *ks, unsigned op)
 static int ks8851_write_mac_addr(struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
-	u16 *mcp = (u16 *)dev->dev_addr;
+	int i;
 
 	mutex_lock(&ks->lock);
 
-	ks8851_wrreg16(ks, KS_MARL, mcp[0]);
-	ks8851_wrreg16(ks, KS_MARM, mcp[1]);
-	ks8851_wrreg16(ks, KS_MARH, mcp[2]);
+	for (i = 0; i < ETH_ALEN; i++)
+		ks8851_wrreg8(ks, KS_MAR(i), dev->dev_addr[i]);
 
 	mutex_unlock(&ks->lock);
 
diff --git a/drivers/net/ks8851.h b/drivers/net/ks8851.h
index 85abe147afbf..f52c312cc356 100644
--- a/drivers/net/ks8851.h
+++ b/drivers/net/ks8851.h
@@ -16,6 +16,7 @@
 #define CCR_32PIN				(1 << 0)
 
 /* MAC address registers */
+#define KS_MAR(_m)				0x15 - (_m)
 #define KS_MARL					0x10
 #define KS_MARM					0x12
 #define KS_MARH					0x14

From b6a71bfa00eb110c8a2e38f85572ed361f8bf3a5 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 19 Oct 2009 23:49:05 +0000
Subject: [PATCH 57/70] KS8851: Fix ks8851_set_rx_mode() for IFF_MULTICAST

In ks8851_set_rx_mode() the case handling IFF_MULTICAST was also setting
the RXCR1_AE bit by accident. This meant that all unicast frames where
being accepted by the device. Remove RXCR1_AE from this case.

Note, RXCR1_AE was also masking a problem with setting the MAC address
properly, so needs to be applied after fixing the MAC write order.

Fixes a bug reported by Doong, Ping of Micrel. This version of the
patch avoids setting RXCR1_ME for all cases.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ks8851.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c
index 29fa19a076bd..a23f739d222f 100644
--- a/drivers/net/ks8851.c
+++ b/drivers/net/ks8851.c
@@ -980,7 +980,7 @@ static void ks8851_set_rx_mode(struct net_device *dev)
 			mcptr = mcptr->next;
 		}
 
-		rxctrl.rxcr1 = RXCR1_RXME | RXCR1_RXAE | RXCR1_RXPAFMA;
+		rxctrl.rxcr1 = RXCR1_RXME | RXCR1_RXPAFMA;
 	} else {
 		/* just accept broadcast / unicast */
 		rxctrl.rxcr1 = RXCR1_RXPAFMA;

From ad3960243e55320d74195fb85c975e0a8cc4466c Mon Sep 17 00:00:00 2001
From: Earl Chew <earl_chew@agilent.com>
Date: Mon, 19 Oct 2009 15:55:41 -0700
Subject: [PATCH 58/70] fs: pipe.c null pointer dereference

This patch fixes a null pointer exception in pipe_rdwr_open() which
generates the stack trace:

> Unable to handle kernel NULL pointer dereference at 0000000000000028 RIP:
>  [<ffffffff802899a5>] pipe_rdwr_open+0x35/0x70
>  [<ffffffff8028125c>] __dentry_open+0x13c/0x230
>  [<ffffffff8028143d>] do_filp_open+0x2d/0x40
>  [<ffffffff802814aa>] do_sys_open+0x5a/0x100
>  [<ffffffff8021faf3>] sysenter_do_call+0x1b/0x67

The failure mode is triggered by an attempt to open an anonymous
pipe via /proc/pid/fd/* as exemplified by this script:

=============================================================
while : ; do
   { echo y ; sleep 1 ; } | { while read ; do echo z$REPLY; done ; } &
   PID=$!
   OUT=$(ps -efl | grep 'sleep 1' | grep -v grep |
        { read PID REST ; echo $PID; } )
   OUT="${OUT%% *}"
   DELAY=$((RANDOM * 1000 / 32768))
   usleep $((DELAY * 1000 + RANDOM % 1000 ))
   echo n > /proc/$OUT/fd/1                 # Trigger defect
done
=============================================================

Note that the failure window is quite small and I could only
reliably reproduce the defect by inserting a small delay
in pipe_rdwr_open(). For example:

 static int
 pipe_rdwr_open(struct inode *inode, struct file *filp)
 {
       msleep(100);
       mutex_lock(&inode->i_mutex);

Although the defect was observed in pipe_rdwr_open(), I think it
makes sense to replicate the change through all the pipe_*_open()
functions.

The core of the change is to verify that inode->i_pipe has not
been released before attempting to manipulate it. If inode->i_pipe
is no longer present, return ENOENT to indicate so.

The comment about potentially using atomic_t for i_pipe->readers
and i_pipe->writers has also been removed because it is no longer
relevant in this context. The inode->i_mutex lock must be used so
that inode->i_pipe can be dealt with correctly.

Signed-off-by: Earl Chew <earl_chew@agilent.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c | 41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 52c415114838..ae17d026aaa3 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -777,36 +777,55 @@ pipe_rdwr_release(struct inode *inode, struct file *filp)
 static int
 pipe_read_open(struct inode *inode, struct file *filp)
 {
-	/* We could have perhaps used atomic_t, but this and friends
-	   below are the only places.  So it doesn't seem worthwhile.  */
+	int ret = -ENOENT;
+
 	mutex_lock(&inode->i_mutex);
-	inode->i_pipe->readers++;
+
+	if (inode->i_pipe) {
+		ret = 0;
+		inode->i_pipe->readers++;
+	}
+
 	mutex_unlock(&inode->i_mutex);
 
-	return 0;
+	return ret;
 }
 
 static int
 pipe_write_open(struct inode *inode, struct file *filp)
 {
+	int ret = -ENOENT;
+
 	mutex_lock(&inode->i_mutex);
-	inode->i_pipe->writers++;
+
+	if (inode->i_pipe) {
+		ret = 0;
+		inode->i_pipe->writers++;
+	}
+
 	mutex_unlock(&inode->i_mutex);
 
-	return 0;
+	return ret;
 }
 
 static int
 pipe_rdwr_open(struct inode *inode, struct file *filp)
 {
+	int ret = -ENOENT;
+
 	mutex_lock(&inode->i_mutex);
-	if (filp->f_mode & FMODE_READ)
-		inode->i_pipe->readers++;
-	if (filp->f_mode & FMODE_WRITE)
-		inode->i_pipe->writers++;
+
+	if (inode->i_pipe) {
+		ret = 0;
+		if (filp->f_mode & FMODE_READ)
+			inode->i_pipe->readers++;
+		if (filp->f_mode & FMODE_WRITE)
+			inode->i_pipe->writers++;
+	}
+
 	mutex_unlock(&inode->i_mutex);
 
-	return 0;
+	return ret;
 }
 
 /*

From 4223a4a155f245d41c350ed9eba4fc32e965c4da Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 20 Oct 2009 14:13:46 +0900
Subject: [PATCH 59/70] nfs: Fix nfs_parse_mount_options() kfree() leak

Fix a (small) memory leak in one of the error paths of the NFS mount
options parsing code.

Regression introduced in 2.6.30 by commit a67d18f (NFS: load the
rpc/rdma transport module automatically).

Reported-by: Yinghai Lu <yinghai@kernel.org>
Reported-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a2c18acb8568..90be551b80c1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1253,6 +1253,7 @@ static int nfs_parse_mount_options(char *raw,
 			default:
 				dfprintk(MOUNT, "NFS:   unrecognized "
 						"transport protocol\n");
+				kfree(string);
 				return 0;
 			}
 			break;

From af2bd9d534ca9f1ffdeb0780fb8508e71ed55803 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 21 Oct 2009 09:46:59 +0200
Subject: [PATCH 60/70] mmc: at91_mci: Don't include asm/mach/mmc.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a compile bug introduced in

	6ef297f (ARM: 5720/1: Move MMCI header to amba include dir)

That commit moved arch/arm/include/asm/mach/mmc.h to
include/linux/amba/mmci.h.  Just removing the include was enough.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Bill Gatliff <bgat@billgatliff.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Pierre Ossman <drzeus@drzeus.cx>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/host/at91_mci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index e556d42cc45a..63924e0c7ea9 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -72,7 +72,6 @@
 #include <asm/irq.h>
 #include <asm/gpio.h>
 
-#include <asm/mach/mmc.h>
 #include <mach/board.h>
 #include <mach/cpu.h>
 #include <mach/at91_mci.h>

From 04bf7539c08d64184736cdc5e4ad617eda77eb0f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 20 Oct 2009 06:45:02 +0200
Subject: [PATCH 61/70] PM: Make warning in suspend_test_finish() less likely
 to happen

Increase TEST_SUSPEND_SECONDS to 10 so the warning in
suspend_test_finish() doesn't annoy the users of slower systems so much.

Also, make the warning print the suspend-resume cycle time, so that we
know why the warning actually triggered.

Patch prepared during the hacking session at the Kernel Summit in Tokyo.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/suspend_test.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
index 17d8bb1acf9c..25596e450ac7 100644
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -19,7 +19,7 @@
  * The time it takes is system-specific though, so when we test this
  * during system bootup we allow a LOT of time.
  */
-#define TEST_SUSPEND_SECONDS	5
+#define TEST_SUSPEND_SECONDS	10
 
 static unsigned long suspend_test_start_time;
 
@@ -49,7 +49,8 @@ void suspend_test_finish(const char *label)
 	 * has some performance issues.  The stack dump of a WARN_ON
 	 * is more likely to get the right attention than a printk...
 	 */
-	WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
+	WARN(msec > (TEST_SUSPEND_SECONDS * 1000),
+	     "Component: %s, time: %u\n", label, msec);
 }
 
 /*

From 845de8afa66550331dca164ab77fa49de930b699 Mon Sep 17 00:00:00 2001
From: Joyce Yu <joyce.yu@sun.com>
Date: Wed, 21 Oct 2009 17:21:10 -0700
Subject: [PATCH 62/70] niu: VLAN_ETH_HLEN should be used to make sure that the
 whole MAC header was copied to the head buffer in the Vlan packets case

Signed-off-by: Joyce Yu <joyce.yu@sun.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/niu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index f9364d0678f2..d6c7ac68f6ea 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -3545,7 +3545,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 	rp->rcr_index = index;
 
 	skb_reserve(skb, NET_IP_ALIGN);
-	__pskb_pull_tail(skb, min(len, NIU_RXPULL_MAX));
+	__pskb_pull_tail(skb, min(len, VLAN_ETH_HLEN));
 
 	rp->rx_packets++;
 	rp->rx_bytes += skb->len;

From f8b12e513b953aebf30f8ff7d2de9be7e024dbbe Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Sep 2009 22:44:42 +0200
Subject: [PATCH 63/70] virtio_blk: revert QUEUE_FLAG_VIRT addition

It seems like the addition of QUEUE_FLAG_VIRT caueses major performance
regressions for Fedora users:

	https://bugzilla.redhat.com/show_bug.cgi?id=509383
	https://bugzilla.redhat.com/show_bug.cgi?id=505695

while I can't reproduce those extreme regressions myself I think the flag
is wrong.

Rationale:

  QUEUE_FLAG_VIRT expands to QUEUE_FLAG_NONROT which casus the queue
  unplugged immediately.  This is not a good behaviour for at least
  qemu and kvm where we do have significant overhead for every
  I/O operations.  Even with all the latested speeups (native AIO,
  MSI support, zero copy) we can only get native speed for up to 128kb
  I/O requests we already are down to 66% of native performance for 4kb
  requests even on my laptop running the Intel X25-M SSD for which the
  QUEUE_FLAG_NONROT was designed.
  If we ever get virtio-blk overhead low enough that this flag makes
  sense it should only be set based on a feature flag set by the host.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 43f19389647a..348befaaec73 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -332,7 +332,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 	}
 
 	vblk->disk->queue->queuedata = vblk;
-	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, vblk->disk->queue);
 
 	if (index < 26) {
 		sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);

From e95646c3ec33c8ec0693992da4332a6b32eb7e31 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 30 Sep 2009 11:17:21 +0200
Subject: [PATCH 64/70] virtio: let header files include virtio_ids.h

Rusty,

commit 3ca4f5ca73057a617f9444a91022d7127041970a
    virtio: add virtio IDs file
moved all device IDs into a single file. While the change itself is
a very good one, it can break userspace applications. For example
if a userspace tool wanted to get the ID of virtio_net it used to
include virtio_net.h. This does no longer work, since virtio_net.h
does not include virtio_ids.h.
This patch moves all "#include <linux/virtio_ids.h>" from the C
files into the header files, making the header files compatible with
the old ones.

In addition, this patch exports virtio_ids.h to userspace.

CC: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c       | 1 -
 drivers/block/virtio_blk.c          | 1 -
 drivers/char/hw_random/virtio-rng.c | 1 -
 drivers/char/virtio_console.c       | 1 -
 drivers/net/virtio_net.c            | 1 -
 drivers/virtio/virtio_balloon.c     | 1 -
 include/linux/Kbuild                | 1 +
 include/linux/virtio_9p.h           | 1 +
 include/linux/virtio_balloon.h      | 1 +
 include/linux/virtio_blk.h          | 1 +
 include/linux/virtio_console.h      | 1 +
 include/linux/virtio_net.h          | 1 +
 include/linux/virtio_rng.h          | 1 +
 net/9p/trans_virtio.c               | 1 -
 14 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index ba9373f82ab5..098de5bce00a 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -42,7 +42,6 @@
 #include <signal.h>
 #include "linux/lguest_launcher.h"
 #include "linux/virtio_config.h"
-#include <linux/virtio_ids.h>
 #include "linux/virtio_net.h"
 #include "linux/virtio_blk.h"
 #include "linux/virtio_console.h"
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 348befaaec73..55635d1f697d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -3,7 +3,6 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
 
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 962968f05b94..b6c24dcc987d 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -21,7 +21,6 @@
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_rng.h>
 
 /* The host will fill any buffer we give it with sweet, sweet randomness.  We
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 0d328b59568d..a035ae39a359 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -31,7 +31,6 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_console.h>
 #include "hvc_console.h"
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8d009760277c..50ac94ce9c16 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -22,7 +22,6 @@
 #include <linux/ethtool.h>
 #include <linux/module.h>
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_net.h>
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 200c22f55130..39789232646d 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -19,7 +19,6 @@
  */
 //#define DEBUG
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_balloon.h>
 #include <linux/swap.h>
 #include <linux/kthread.h>
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 3f384d4b163a..1feed71551c9 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -364,6 +364,7 @@ unifdef-y += utsname.h
 unifdef-y += videodev2.h
 unifdef-y += videodev.h
 unifdef-y += virtio_config.h
+unifdef-y += virtio_ids.h
 unifdef-y += virtio_blk.h
 unifdef-y += virtio_net.h
 unifdef-y += virtio_9p.h
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index ea7226a45acb..095e10d148b4 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -2,6 +2,7 @@
 #define _LINUX_VIRTIO_9P_H
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers. */
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
 /* Maximum number of virtio channels per partition (1 for now) */
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 09d730085060..1418f048cb34 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -2,6 +2,7 @@
 #define _LINUX_VIRTIO_BALLOON_H
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers. */
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
 /* The feature bitmap for virtio balloon */
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 15cb666581d7..1e19470d2da2 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -3,6 +3,7 @@
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers. */
 #include <linux/types.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
 /* Feature bits */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index b5f519806014..fe885174cc1f 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_VIRTIO_CONSOLE_H
 #define _LINUX_VIRTIO_CONSOLE_H
 #include <linux/types.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
  * anyone can use the definitions to implement compatible drivers/servers. */
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 1f41734bbb77..085e42298ce5 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -3,6 +3,7 @@
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers. */
 #include <linux/types.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 #include <linux/if_ether.h>
 
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
index 48121c3c434b..c4d5de896f0c 100644
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -2,6 +2,7 @@
 #define _LINUX_VIRTIO_RNG_H
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers. */
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
 #endif /* _LINUX_VIRTIO_RNG_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index b2e07f0dd298..ea1e3daabefe 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,7 +43,6 @@
 #include <net/9p/transport.h>
 #include <linux/scatterlist.h>
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_9p.h>
 
 #define VIRTQUEUE_NUM	128

From 3225beaba05d4f06087593f5e903ce867b6e118a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 22 Oct 2009 16:39:28 -0600
Subject: [PATCH 65/70] virtio_blk: Revert serial number support

This reverts "Add serial number support for virtio_blk, V4a".

Turns out that virtio_pci, lguest and s/390 all have an 8 bit limit
on virtio config space, so noone could ever use this.

This is coming back later in a cleaner form.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: john cooper <john.cooper@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/virtio_blk.c | 37 +++----------------------------------
 include/linux/virtio_blk.h |  4 ----
 2 files changed, 3 insertions(+), 38 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 55635d1f697d..51042f0ba7e1 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -182,34 +182,6 @@ static void do_virtblk_request(struct request_queue *q)
 		vblk->vq->vq_ops->kick(vblk->vq);
 }
 
-/* return ATA identify data
- */
-static int virtblk_identify(struct gendisk *disk, void *argp)
-{
-	struct virtio_blk *vblk = disk->private_data;
-	void *opaque;
-	int err = -ENOMEM;
-
-	opaque = kmalloc(VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
-	if (!opaque)
-		goto out;
-
-	err = virtio_config_buf(vblk->vdev, VIRTIO_BLK_F_IDENTIFY,
-		offsetof(struct virtio_blk_config, identify), opaque,
-		VIRTIO_BLK_ID_BYTES);
-
-	if (err)
-		goto out_kfree;
-
-	if (copy_to_user(argp, opaque, VIRTIO_BLK_ID_BYTES))
-		err = -EFAULT;
-
-out_kfree:
-	kfree(opaque);
-out:
-	return err;
-}
-
 static void virtblk_prepare_flush(struct request_queue *q, struct request *req)
 {
 	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
@@ -221,10 +193,6 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 {
 	struct gendisk *disk = bdev->bd_disk;
 	struct virtio_blk *vblk = disk->private_data;
-	void __user *argp = (void __user *)data;
-
-	if (cmd == HDIO_GET_IDENTITY)
-		return virtblk_identify(disk, argp);
 
 	/*
 	 * Only allow the generic SCSI ioctls if the host can support it.
@@ -232,7 +200,8 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
 		return -ENOTTY;
 
-	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
+	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
+			      (void __user *)data);
 }
 
 /* We provide getgeo only to please some old bootloader/partitioning tools */
@@ -443,7 +412,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
-	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY, VIRTIO_BLK_F_FLUSH
+	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH
 };
 
 /*
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 1e19470d2da2..fd294c56d571 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -14,11 +14,8 @@
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
-#define VIRTIO_BLK_F_IDENTIFY	8	/* ATA IDENTIFY supported */
 #define VIRTIO_BLK_F_FLUSH	9	/* Cache flush command support */
 
-#define VIRTIO_BLK_ID_BYTES	(sizeof(__u16[256]))	/* IDENTIFY DATA */
-
 struct virtio_blk_config {
 	/* The capacity (in 512-byte sectors). */
 	__u64 capacity;
@@ -34,7 +31,6 @@ struct virtio_blk_config {
 	} geometry;
 	/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
 	__u32 blk_size;
-	__u8 identify[VIRTIO_BLK_ID_BYTES];
 } __attribute__((packed));
 
 /*

From 1e65175c2c73742495f0e5ca52658539a65825db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 1 Oct 2009 10:28:33 +0200
Subject: [PATCH 66/70] move virtballoon_remove to .devexit.text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function virtballoon_remove is used only wrapped by __devexit_p so
define it using __devexit.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_balloon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 39789232646d..9dd588042880 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -247,7 +247,7 @@ out:
 	return err;
 }
 
-static void virtballoon_remove(struct virtio_device *vdev)
+static void __devexit virtballoon_remove(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb = vdev->priv;
 

From ff07eb897a97640b7ac0262cd50311ad403038f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 1 Oct 2009 10:28:35 +0200
Subject: [PATCH 67/70] move virtrng_remove to .devexit.text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function virtrng_remove is used only wrapped by __devexit_p so define
it using __devexit.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/hw_random/virtio-rng.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index b6c24dcc987d..915157fcff98 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -116,7 +116,7 @@ static int virtrng_probe(struct virtio_device *vdev)
 	return 0;
 }
 
-static void virtrng_remove(struct virtio_device *vdev)
+static void __devexit virtrng_remove(struct virtio_device *vdev)
 {
 	vdev->config->reset(vdev);
 	hwrng_unregister(&virtio_hwrng);

From ba93483f8c997f1a3fc0a6677f41e8556ae3eba6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 26 Oct 2009 09:58:31 +0900
Subject: [PATCH 68/70] sh: __irq_entry annotate do_IRQ().

This adds an __irq_entry annotation for do_IRQ() so that the IRQ
annotation in the function graph tracer works as advertized. We already
have the IRQENTRY section wired up, so this is just a trivial addition
to actually make use of it.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/irq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 7cb933ba4957..eac7da772fc2 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/kernel_stat.h>
 #include <linux/seq_file.h>
+#include <linux/ftrace.h>
 #include <asm/processor.h>
 #include <asm/machvec.h>
 #include <asm/uaccess.h>
@@ -106,7 +107,7 @@ static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
 static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
 #endif
 
-asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs)
+asmlinkage __irq_entry int do_IRQ(unsigned int irq, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 #ifdef CONFIG_IRQSTACKS

From 26fadd3672964596d33548490b9756014ae0f414 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Thu, 22 Oct 2009 11:58:37 +0000
Subject: [PATCH 69/70] sh: Build fix: define more __movmem* symbols

ERROR: "__movmemSI12" [net/unix/unix.ko] undefined!
ERROR: "__movmemSI52" [net/ipv6/sit.ko] undefined!
ERROR: "__movmemSI24" [net/ipv6/ipv6.ko] undefined!
ERROR: "__movmemSI60" [net/ipv6/ipv6.ko] undefined!
ERROR: "__movmemSI16" [net/ipv6/ipv6.ko] undefined!
ERROR: "__movmemSI20" [net/ipv6/ipv6.ko] undefined!
ERROR: "__movmemSI32" [net/ipv6/ipv6.ko] undefined!

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/sh_ksyms_32.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 86c270428357..444cce3ae921 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -85,6 +85,20 @@ DECLARE_EXPORT(__movstr_i4_even);
 DECLARE_EXPORT(__movstr_i4_odd);
 DECLARE_EXPORT(__movstrSI12_i4);
 DECLARE_EXPORT(__movmem);
+DECLARE_EXPORT(__movmemSI8);
+DECLARE_EXPORT(__movmemSI12);
+DECLARE_EXPORT(__movmemSI16);
+DECLARE_EXPORT(__movmemSI20);
+DECLARE_EXPORT(__movmemSI24);
+DECLARE_EXPORT(__movmemSI28);
+DECLARE_EXPORT(__movmemSI32);
+DECLARE_EXPORT(__movmemSI36);
+DECLARE_EXPORT(__movmemSI40);
+DECLARE_EXPORT(__movmemSI44);
+DECLARE_EXPORT(__movmemSI48);
+DECLARE_EXPORT(__movmemSI52);
+DECLARE_EXPORT(__movmemSI56);
+DECLARE_EXPORT(__movmemSI60);
 DECLARE_EXPORT(__movmem_i4_even);
 DECLARE_EXPORT(__movmem_i4_odd);
 DECLARE_EXPORT(__movmemSI12_i4);

From 60339fad5c68c9c533cd14e67194ff8f727c41d9 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@console-pimps.org>
Date: Sat, 24 Oct 2009 18:56:57 +0000
Subject: [PATCH 70/70] sh: Check for return_to_handler when unwinding the
 stack

When CONFIG_FUNCTION_GRAPH_TRACER is enabled the function graph tracer
may patch return addresses on the stack with the address of
return_to_handler(). This really confuses the DWARF unwinder because it
will try find the caller of return_to_handler(), not the caller of the
real return address.

So teach the DWARF unwinder how to find the real return address whenever
it encounters return_to_handler().

This patch does not cope very well when multiple return addresses on the
stack have been patched. To make it work properly it would require state
to track how many return_to_handler()'s have been seen so that we'd know
where to look in current->curr_ret_stack[]. So for now, instead of
trying to handle this, just moan if more than one return address on the
stack has been patched.

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/dwarf.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c
index 03b3616c80a5..2d07084e4882 100644
--- a/arch/sh/kernel/dwarf.c
+++ b/arch/sh/kernel/dwarf.c
@@ -20,6 +20,7 @@
 #include <linux/list.h>
 #include <linux/mempool.h>
 #include <linux/mm.h>
+#include <linux/ftrace.h>
 #include <asm/dwarf.h>
 #include <asm/unwinder.h>
 #include <asm/sections.h>
@@ -557,6 +558,27 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
 	if (!pc && !prev)
 		pc = (unsigned long)current_text_addr();
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/*
+	 * If our stack has been patched by the function graph tracer
+	 * then we might see the address of return_to_handler() where we
+	 * expected to find the real return address.
+	 */
+	if (pc == (unsigned long)&return_to_handler) {
+		int index = current->curr_ret_stack;
+
+		/*
+		 * We currently have no way of tracking how many
+		 * return_to_handler()'s we've seen. If there is more
+		 * than one patched return address on our stack,
+		 * complain loudly.
+		 */
+		WARN_ON(index > 0);
+
+		pc = current->ret_stack[index].ret;
+	}
+#endif
+
 	frame = mempool_alloc(dwarf_frame_pool, GFP_ATOMIC);
 	if (!frame) {
 		printk(KERN_ERR "Unable to allocate a dwarf frame\n");