From 93544cc6486bea12e127ed58ca33477bb6ceafe6 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@austin.rr.com>
Date: Tue, 14 Feb 2006 22:30:52 -0600
Subject: [PATCH 1/6] [PATCH] CIFS: fix cifs_user_read oops when null SMB
 response on forcedirectio mount

This patch fixes an oops reported by Adrian Bunk in cifs_user_read when a null
read response is returned on a forcedirectio mount.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/file.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index d17c97d07c80..675bd2568297 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1442,13 +1442,15 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
 					 &bytes_read, &smb_read_data,
 					 &buf_type);
 			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
-			if (copy_to_user(current_offset, 
-					 smb_read_data + 4 /* RFC1001 hdr */
-					 + le16_to_cpu(pSMBr->DataOffset), 
-					 bytes_read)) {
-				rc = -EFAULT;
-			}
 			if (smb_read_data) {
+				if (copy_to_user(current_offset,
+						smb_read_data +
+						4 /* RFC1001 length field */ +
+						le16_to_cpu(pSMBr->DataOffset),
+						bytes_read)) {
+					rc = -EFAULT;
+				}
+
 				if(buf_type == CIFS_SMALL_BUFFER)
 					cifs_small_buf_release(smb_read_data);
 				else if(buf_type == CIFS_LARGE_BUFFER)

From ee68cea2c26b7a8222f9020f54d22c6067011e8b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Feb 2006 01:34:23 -0800
Subject: [PATCH 2/6] [NETFILTER]: Fix xfrm lookup after SNAT

To find out if a packet needs to be handled by IPsec after SNAT, packets
are currently rerouted in POST_ROUTING and a new xfrm lookup is done. This
breaks SNAT of non-unicast packets to non-local addresses because the
packet is routed as incoming packet and no neighbour entry is bound to the
dst_entry. In general, it seems to be a bad idea to replace the dst_entry
after the packet was already sent to the output routine because its state
might not match what's expected.

This patch changes the xfrm lookup in POST_ROUTING to re-use the original
dst_entry without routing the packet again. This means no policy routing
can be used for transport mode transforms (which keep the original route)
when packets are SNATed to match the policy, but it looks like the best
we can do for now.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4.h         |  2 +-
 net/ipv4/netfilter.c                   | 41 ++++++++++++++++++++++++++
 net/ipv4/netfilter/ip_nat_standalone.c |  6 ++--
 3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index fdc4a9527343..43c09d790b83 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -79,7 +79,7 @@ enum nf_ip_hook_priorities {
 
 #ifdef __KERNEL__
 extern int ip_route_me_harder(struct sk_buff **pskb);
-
+extern int ip_xfrm_me_harder(struct sk_buff **pskb);
 #endif /*__KERNEL__*/
 
 #endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 52a3d7c57907..ed42cdc57cd9 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -78,6 +78,47 @@ int ip_route_me_harder(struct sk_buff **pskb)
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
+#ifdef CONFIG_XFRM
+int ip_xfrm_me_harder(struct sk_buff **pskb)
+{
+	struct flowi fl;
+	unsigned int hh_len;
+	struct dst_entry *dst;
+
+	if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED)
+		return 0;
+	if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0)
+		return -1;
+
+	dst = (*pskb)->dst;
+	if (dst->xfrm)
+		dst = ((struct xfrm_dst *)dst)->route;
+	dst_hold(dst);
+
+	if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0)
+		return -1;
+
+	dst_release((*pskb)->dst);
+	(*pskb)->dst = dst;
+
+	/* Change in oif may mean change in hh_len. */
+	hh_len = (*pskb)->dst->dev->hard_header_len;
+	if (skb_headroom(*pskb) < hh_len) {
+		struct sk_buff *nskb;
+
+		nskb = skb_realloc_headroom(*pskb, hh_len);
+		if (!nskb)
+			return -1;
+		if ((*pskb)->sk)
+			skb_set_owner_w(nskb, (*pskb)->sk);
+		kfree_skb(*pskb);
+		*pskb = nskb;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ip_xfrm_me_harder);
+#endif
+
 void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
 EXPORT_SYMBOL(ip_nat_decode_session);
 
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 92c54999a19d..7c3f7d380240 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -235,19 +235,19 @@ ip_nat_out(unsigned int hooknum,
 		return NF_ACCEPT;
 
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+#ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN
 	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 
 		if (ct->tuplehash[dir].tuple.src.ip !=
 		    ct->tuplehash[!dir].tuple.dst.ip
-#ifdef CONFIG_XFRM
 		    || ct->tuplehash[dir].tuple.src.u.all !=
 		       ct->tuplehash[!dir].tuple.dst.u.all
-#endif
 		    )
-			return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+			return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
 	}
+#endif
 	return ret;
 }
 

From 78872ccb68335b14f0d1ac7338ecfcbf1cba1df4 Mon Sep 17 00:00:00 2001
From: Adrian Drzewiecki <z@drze.net>
Date: Wed, 15 Feb 2006 01:47:48 -0800
Subject: [PATCH 3/6] [BRIDGE]: Fix deadlock in br_stp_disable_bridge

Looks like somebody forgot to use the _bh spin_lock variant. We ran into a
deadlock where br->hello_timer expired while br_stp_disable_br() walked
br->port_list.

Signed-off-by: Adrian Drzewiecki <z@drze.net>
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp_if.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index cc047f7fb6ef..35cf3a074087 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -67,7 +67,7 @@ void br_stp_disable_bridge(struct net_bridge *br)
 {
 	struct net_bridge_port *p;
 
-	spin_lock(&br->lock);
+	spin_lock_bh(&br->lock);
 	list_for_each_entry(p, &br->port_list, list) {
 		if (p->state != BR_STATE_DISABLED)
 			br_stp_disable_port(p);
@@ -76,7 +76,7 @@ void br_stp_disable_bridge(struct net_bridge *br)
 
 	br->topology_change = 0;
 	br->topology_change_detected = 0;
-	spin_unlock(&br->lock);
+	spin_unlock_bh(&br->lock);
 
 	del_timer_sync(&br->hello_timer);
 	del_timer_sync(&br->topology_change_timer);

From 3f17da699431ec48540beabc55c54d4b5e66c8e7 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 15 Feb 2006 22:13:24 +0300
Subject: [PATCH 4/6] [PATCH] fix kill_proc_info() vs CLONE_THREAD race

There is a window after copy_process() unlocks ->sighand.siglock
and before it adds the new thread to the thread list.

In that window __group_complete_signal(SIGKILL) will not see the
new thread yet, so this thread will start running while the whole
thread group was supposed to exit.

I beleive we have another good reason to place attach_pid(PID/TGID)
under ->sighand.siglock. We can do the same for

	release_task()->__unhash_process()

	de_thread()->switch_exec_pids()

After that we don't need tasklist_lock to iterate over the thread
list, and we can simplify things, see for example do_sigaction()
or sys_times().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 8e88b374cee9..3683ce10f4a9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1123,8 +1123,8 @@ static task_t *copy_process(unsigned long clone_flags,
 		p->real_parent = current;
 	p->parent = p->real_parent;
 
+	spin_lock(&current->sighand->siglock);
 	if (clone_flags & CLONE_THREAD) {
-		spin_lock(&current->sighand->siglock);
 		/*
 		 * Important: if an exit-all has been started then
 		 * do not create this new thread - the whole thread
@@ -1162,8 +1162,6 @@ static task_t *copy_process(unsigned long clone_flags,
 			 */
 			p->it_prof_expires = jiffies_to_cputime(1);
 		}
-
-		spin_unlock(&current->sighand->siglock);
 	}
 
 	/*
@@ -1189,6 +1187,7 @@ static task_t *copy_process(unsigned long clone_flags,
 
 	nr_threads++;
 	total_forks++;
+	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	return p;

From dadac81b1b86196fcc48fb87620403c4a7174f06 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 15 Feb 2006 22:13:26 +0300
Subject: [PATCH 5/6] [PATCH] fix kill_proc_info() vs fork() theoretical race

copy_process:

	attach_pid(p, PIDTYPE_PID, p->pid);
	attach_pid(p, PIDTYPE_TGID, p->tgid);

What if kill_proc_info(p->pid) happens in between?

copy_process() holds current->sighand.siglock, so we are safe
in CLONE_THREAD case, because current->sighand == p->sighand.

Otherwise, p->sighand is unlocked, the new process is already
visible to the find_task_by_pid(), but have a copy of parent's
'struct pid' in ->pids[PIDTYPE_TGID].

This means that __group_complete_signal() may hang while doing

	do ... while (next_thread() != p)

We can solve this problem if we reverse these 2 attach_pid()s:

	attach_pid() does wmb()

	group_send_sig_info() calls spin_lock(), which
	provides a read barrier. // Yes ?

I don't think we can hit this race in practice, but still.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 3683ce10f4a9..fbea12d7a943 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1173,8 +1173,6 @@ static task_t *copy_process(unsigned long clone_flags,
 	if (unlikely(p->ptrace & PT_PTRACED))
 		__ptrace_link(p, current->parent);
 
-	attach_pid(p, PIDTYPE_PID, p->pid);
-	attach_pid(p, PIDTYPE_TGID, p->tgid);
 	if (thread_group_leader(p)) {
 		p->signal->tty = current->signal->tty;
 		p->signal->pgrp = process_group(current);
@@ -1184,6 +1182,8 @@ static task_t *copy_process(unsigned long clone_flags,
 		if (p->pid)
 			__get_cpu_var(process_counts)++;
 	}
+	attach_pid(p, PIDTYPE_TGID, p->tgid);
+	attach_pid(p, PIDTYPE_PID, p->pid);
 
 	nr_threads++;
 	total_forks++;

From 5ecfbae093f0c37311e89b29bfc0c9d586eace87 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 15 Feb 2006 22:50:10 +0300
Subject: [PATCH 6/6] [PATCH] fix zap_thread's ptrace related problems

1. The tracee can go from ptrace_stop() to do_signal_stop()
   after __ptrace_unlink(p).

2. It is unsafe to __ptrace_unlink(p) while p->parent may wait
   for tasklist_lock in ptrace_detach().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c              |  2 +-
 include/linux/ptrace.h |  1 +
 kernel/ptrace.c        | 29 +++++++++++++++++------------
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 055378d2513e..0e1c95074d42 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1403,7 +1403,7 @@ static void zap_threads (struct mm_struct *mm)
 		do_each_thread(g,p) {
 			if (mm == p->mm && p != tsk &&
 			    p->ptrace && p->parent->mm == mm) {
-				__ptrace_unlink(p);
+				__ptrace_detach(p, 0);
 			}
 		} while_each_thread(g,p);
 		write_unlock_irq(&tasklist_lock);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 9d5cd106b344..0d36750fc0f1 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -84,6 +84,7 @@ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __us
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
 extern int ptrace_attach(struct task_struct *tsk);
 extern int ptrace_detach(struct task_struct *, unsigned int);
+extern void __ptrace_detach(struct task_struct *, unsigned int);
 extern void ptrace_disable(struct task_struct *);
 extern int ptrace_check_attach(struct task_struct *task, int kill);
 extern int ptrace_request(struct task_struct *child, long request, long addr, long data);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index d2cf144d0af5..d95a72c9279d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child)
  */
 void __ptrace_unlink(task_t *child)
 {
-	if (!child->ptrace)
-		BUG();
+	BUG_ON(!child->ptrace);
+
 	child->ptrace = 0;
 	if (!list_empty(&child->ptrace_list)) {
 		list_del_init(&child->ptrace_list);
@@ -184,22 +184,27 @@ bad:
 	return retval;
 }
 
-int ptrace_detach(struct task_struct *child, unsigned int data)
+void __ptrace_detach(struct task_struct *child, unsigned int data)
 {
-	if (!valid_signal(data))
-		return	-EIO;
-
-	/* Architecture-specific hardware disable .. */
-	ptrace_disable(child);
-
-	/* .. re-parent .. */
 	child->exit_code = data;
-
-	write_lock_irq(&tasklist_lock);
+	/* .. re-parent .. */
 	__ptrace_unlink(child);
 	/* .. and wake it up. */
 	if (child->exit_state != EXIT_ZOMBIE)
 		wake_up_process(child);
+}
+
+int ptrace_detach(struct task_struct *child, unsigned int data)
+{
+	if (!valid_signal(data))
+		return -EIO;
+
+	/* Architecture-specific hardware disable .. */
+	ptrace_disable(child);
+
+	write_lock_irq(&tasklist_lock);
+	if (child->ptrace)
+		__ptrace_detach(child, data);
 	write_unlock_irq(&tasklist_lock);
 
 	return 0;