From b514d3192736563dee3f4ba4b659558b3cadc7f7 Mon Sep 17 00:00:00 2001 From: Lucas Correia Villa Real Date: Thu, 9 Feb 2006 16:47:58 +0000 Subject: [PATCH 01/27] [ARM] 3314/1: S3C2400 - adds s3c2400.h Patch from Lucas Correia Villa Real This patch adds s3c2400.h, fixing the build for the 2410/2440 platforms. Signed-off-by: Lucas Correia Villa Real Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/mach-s3c2410/s3c2400.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 arch/arm/mach-s3c2410/s3c2400.h diff --git a/arch/arm/mach-s3c2410/s3c2400.h b/arch/arm/mach-s3c2410/s3c2400.h new file mode 100644 index 000000000000..8b2394e1ed40 --- /dev/null +++ b/arch/arm/mach-s3c2410/s3c2400.h @@ -0,0 +1,31 @@ +/* arch/arm/mach-s3c2410/s3c2400.h + * + * Copyright (c) 2004 Simtec Electronics + * Ben Dooks + * + * Header file for S3C2400 cpu support + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Modifications: + * 09-Fev-2006 LCVR First version, based on s3c2410.h +*/ + +#ifdef CONFIG_CPU_S3C2400 + +extern int s3c2400_init(void); + +extern void s3c2400_map_io(struct map_desc *mach_desc, int size); + +extern void s3c2400_init_uarts(struct s3c2410_uartcfg *cfg, int no); + +extern void s3c2400_init_clocks(int xtal); + +#else +#define s3c2400_init_clocks NULL +#define s3c2400_init_uarts NULL +#define s3c2400_map_io NULL +#define s3c2400_init NULL +#endif From a70ea994a0d83fd0151a070be72b87d014ef0a7e Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Thu, 9 Feb 2006 16:40:11 -0800 Subject: [PATCH 02/27] [NETLINK]: Fix a severe bug netlink overrun was broken while improvement of netlink. Destination socket is used in the place where it was meant to be source socket, so that now overrun is never sent to user netlink sockets, when it should be, and it even can be set on kernel socket, which results in complete deadlock of rtnetlink. Suggested fix is to restore status quo passing source socket as additional argument to netlink_attachskb(). A little explanation: overrun is set on a socket, when it failed to receive some message and sender of this messages does not or even have no way to handle this error. This happens in two cases: 1. when kernel sends something. Kernel never retransmits and cannot wait for buffer space. 2. when user sends a broadcast and the message was not delivered to some recipients. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- include/linux/netlink.h | 3 ++- ipc/mqueue.c | 3 ++- net/netlink/af_netlink.c | 7 ++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6a2ccf78a356..c256ebe2a7b4 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -160,7 +160,8 @@ extern int netlink_unregister_notifier(struct notifier_block *nb); /* finegrained unicast helpers: */ struct sock *netlink_getsockbyfilp(struct file *filp); -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo); +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, + long timeo, struct sock *ssk); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 59302fc3643b..fd2e26b6f966 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1018,7 +1018,8 @@ retry: goto out; } - ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT); + ret = netlink_attachskb(sock, nc, 0, + MAX_SCHEDULE_TIMEOUT, NULL); if (ret == 1) goto retry; if (ret) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2101b45d2ec6..6b9772d95872 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -702,7 +702,8 @@ struct sock *netlink_getsockbyfilp(struct file *filp) * 0: continue * 1: repeat lookup - reference dropped while waiting for socket memory. */ -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo) +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, + long timeo, struct sock *ssk) { struct netlink_sock *nlk; @@ -712,7 +713,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long t test_bit(0, &nlk->state)) { DECLARE_WAITQUEUE(wait, current); if (!timeo) { - if (!nlk->pid) + if (!ssk || nlk_sk(ssk)->pid == 0) netlink_overrun(sk); sock_put(sk); kfree_skb(skb); @@ -797,7 +798,7 @@ retry: kfree_skb(skb); return PTR_ERR(sk); } - err = netlink_attachskb(sk, skb, nonblock, timeo); + err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); if (err == 1) goto retry; if (err) From 28633514afd68afa77ed2fa34fa53626837bf2d5 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Thu, 9 Feb 2006 16:40:58 -0800 Subject: [PATCH 03/27] [NETLINK]: illegal use of pid in rtnetlink When a netlink message is not related to a netlink socket, it is issued by kernel socket with pid 0. Netlink "pid" has nothing to do with current->pid. I called it incorrectly, if it was named "port", the confusion would be avoided. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- net/ipv4/devinet.c | 2 +- net/ipv4/fib_semantics.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8700379685e0..eca2976abb25 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -455,7 +455,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) if (!skb) return; - if (rtnetlink_fill_ifinfo(skb, dev, type, current->pid, 0, change, 0) < 0) { + if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) { kfree_skb(skb); return; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 95b9d81ac488..3ffa60dadc0c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1135,7 +1135,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa) if (!skb) netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); - else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { + else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { kfree_skb(skb); netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); } else { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ef4724de7350..0f4145babb14 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1045,7 +1045,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, } nl->nlmsg_flags = NLM_F_REQUEST; - nl->nlmsg_pid = current->pid; + nl->nlmsg_pid = 0; nl->nlmsg_seq = 0; nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); if (cmd == SIOCDELRT) { From d93077fb0e7cb9d4f4094a649501d840c55fdc8b Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 9 Feb 2006 16:58:46 -0800 Subject: [PATCH 04/27] [IRDA]: Set proper IrLAP device address length This patch set IrDA's addr_len properly, i.e to 4 bytes, the size of the IrLAP device address. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/net/irda/irlap.h | 3 +++ net/irda/irda_device.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h index f55e86e75030..2127cae1e0a6 100644 --- a/include/net/irda/irlap.h +++ b/include/net/irda/irlap.h @@ -50,6 +50,9 @@ /* May be different when we get VFIR */ #define LAP_MAX_HEADER (LAP_ADDR_HEADER + LAP_CTRL_HEADER) +/* Each IrDA device gets a random 32 bits IRLAP device address */ +#define LAP_ALEN 4 + #define BROADCAST 0xffffffff /* Broadcast device address */ #define CBROADCAST 0xfe /* Connection broadcast address */ #define XID_FORMAT 0x01 /* Discovery XID format */ diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 890bac0d4a56..e3debbdb67f5 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -343,12 +343,12 @@ static void irda_task_timer_expired(void *data) static void irda_device_setup(struct net_device *dev) { dev->hard_header_len = 0; - dev->addr_len = 0; + dev->addr_len = LAP_ALEN; dev->type = ARPHRD_IRDA; dev->tx_queue_len = 8; /* Window size + 1 s-frame */ - memset(dev->broadcast, 0xff, 4); + memset(dev->broadcast, 0xff, LAP_ALEN); dev->mtu = 2048; dev->flags = IFF_NOARP; From 80ba250e59ced808a8c9b79560938bbe4509c0a7 Mon Sep 17 00:00:00 2001 From: David Binderman Date: Thu, 9 Feb 2006 16:59:48 -0800 Subject: [PATCH 05/27] [IRDA]: out of range array access This patch fixes an out of range array access in irnet_irda.c. Author: David Binderman Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- net/irda/irnet/irnet_irda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c index 07ec326c71f5..f65c7a83bc5c 100644 --- a/net/irda/irnet/irnet_irda.c +++ b/net/irda/irnet/irnet_irda.c @@ -696,7 +696,7 @@ irnet_daddr_to_dname(irnet_socket * self) { /* Yes !!! Get it.. */ strlcpy(self->rname, discoveries[i].info, sizeof(self->rname)); - self->rname[NICKNAME_MAX_LEN + 1] = '\0'; + self->rname[sizeof(self->rname) - 1] = '\0'; DEBUG(IRDA_SERV_INFO, "Device 0x%08x is in fact ``%s''.\n", self->daddr, self->rname); kfree(discoveries); From 6fcf9412de64056238a6295f21db7aa9c37a532e Mon Sep 17 00:00:00 2001 From: John Heffner Date: Thu, 9 Feb 2006 17:06:57 -0800 Subject: [PATCH 06/27] [TCP]: rcvbuf lock when tcp_moderate_rcvbuf enabled The rcvbuf lock should probably be honored here. Signed-off-by: John Heffner Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a97ed5416c28..e9a54ae7d690 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -456,7 +456,8 @@ void tcp_rcv_space_adjust(struct sock *sk) tp->rcvq_space.space = space; - if (sysctl_tcp_moderate_rcvbuf) { + if (sysctl_tcp_moderate_rcvbuf && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int new_clamp = space; /* Receive space grows, normalize in order to From b3f1be4b5412e34647764457bec901e06b03e624 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 9 Feb 2006 17:08:52 -0800 Subject: [PATCH 07/27] [BRIDGE]: fix for RCU and deadlock on device removal Change Bridge receive path to correctly handle RCU removal of device from bridge. Also fixes deadlock between carrier_check and del_nbp. This replaces the previous deleted flag fix. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 21 +++++++++++---------- net/bridge/br_input.c | 21 +++++++++++++-------- net/bridge/br_private.h | 1 - net/bridge/br_stp_bpdu.c | 30 ++++++++++++++++++------------ 4 files changed, 42 insertions(+), 31 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index da687c8dc6ff..70b7ef917234 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -79,9 +79,14 @@ static int port_cost(struct net_device *dev) */ static void port_carrier_check(void *arg) { - struct net_bridge_port *p = arg; + struct net_device *dev = arg; + struct net_bridge_port *p; rtnl_lock(); + p = dev->br_port; + if (!p) + goto done; + if (netif_carrier_ok(p->dev)) { u32 cost = port_cost(p->dev); @@ -97,6 +102,7 @@ static void port_carrier_check(void *arg) br_stp_disable_port(p); spin_unlock_bh(&p->br->lock); } +done: rtnl_unlock(); } @@ -104,7 +110,6 @@ static void destroy_nbp(struct net_bridge_port *p) { struct net_device *dev = p->dev; - dev->br_port = NULL; p->br = NULL; p->dev = NULL; dev_put(dev); @@ -133,24 +138,20 @@ static void del_nbp(struct net_bridge_port *p) struct net_bridge *br = p->br; struct net_device *dev = p->dev; - /* Race between RTNL notify and RCU callback */ - if (p->deleted) - return; - dev_set_promiscuity(dev, -1); cancel_delayed_work(&p->carrier_check); - flush_scheduled_work(); spin_lock_bh(&br->lock); br_stp_disable_port(p); - p->deleted = 1; spin_unlock_bh(&br->lock); br_fdb_delete_by_port(br, p); list_del_rcu(&p->list); + rcu_assign_pointer(dev->br_port, NULL); + call_rcu(&p->rcu, destroy_nbp_rcu); } @@ -254,11 +255,10 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->dev = dev; p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; - dev->br_port = p; p->port_no = index; br_init_port(p); p->state = BR_STATE_DISABLED; - INIT_WORK(&p->carrier_check, port_carrier_check, p); + INIT_WORK(&p->carrier_check, port_carrier_check, dev); kobject_init(&p->kobj); return p; @@ -397,6 +397,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) else if ((err = br_sysfs_addif(p))) del_nbp(p); else { + rcu_assign_pointer(dev->br_port, p); dev_set_promiscuity(dev, 1); list_add_rcu(&p->list, &br->port_list); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e3a73cead6b6..4eef83755315 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -45,18 +45,20 @@ static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb) int br_handle_frame_finish(struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; - struct net_bridge_port *p = skb->dev->br_port; - struct net_bridge *br = p->br; + struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); + struct net_bridge *br; struct net_bridge_fdb_entry *dst; int passedup = 0; - /* insert into forwarding database after filtering to avoid spoofing */ - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + if (!p || p->state == BR_STATE_DISABLED) + goto drop; - if (p->state == BR_STATE_LEARNING) { - kfree_skb(skb); - goto out; - } + /* insert into forwarding database after filtering to avoid spoofing */ + br = p->br; + br_fdb_update(br, p, eth_hdr(skb)->h_source); + + if (p->state == BR_STATE_LEARNING) + goto drop; if (br->dev->flags & IFF_PROMISC) { struct sk_buff *skb2; @@ -93,6 +95,9 @@ int br_handle_frame_finish(struct sk_buff *skb) out: return 0; +drop: + kfree_skb(skb); + goto out; } /* diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e330b17b6d81..c5bd631ffcd5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -68,7 +68,6 @@ struct net_bridge_port /* STP */ u8 priority; u8 state; - u8 deleted; u16 port_no; unsigned char topology_change_ack; unsigned char config_pending; diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index d071f1c9ad0b..296f6a487c52 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -133,29 +133,35 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) static const unsigned char header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; -/* NO locks */ +/* NO locks, but rcu_read_lock (preempt_disabled) */ int br_stp_handle_bpdu(struct sk_buff *skb) { - struct net_bridge_port *p = skb->dev->br_port; - struct net_bridge *br = p->br; + struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); + struct net_bridge *br; unsigned char *buf; + if (!p) + goto err; + + br = p->br; + spin_lock(&br->lock); + + if (p->state == BR_STATE_DISABLED || !(br->dev->flags & IFF_UP)) + goto out; + /* insert into forwarding database after filtering to avoid spoofing */ - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + br_fdb_update(br, p, eth_hdr(skb)->h_source); + + if (!br->stp_enabled) + goto out; /* need at least the 802 and STP headers */ if (!pskb_may_pull(skb, sizeof(header)+1) || memcmp(skb->data, header, sizeof(header))) - goto err; + goto out; buf = skb_pull(skb, sizeof(header)); - spin_lock_bh(&br->lock); - if (p->state == BR_STATE_DISABLED - || !(br->dev->flags & IFF_UP) - || !br->stp_enabled) - goto out; - if (buf[0] == BPDU_TYPE_CONFIG) { struct br_config_bpdu bpdu; @@ -201,7 +207,7 @@ int br_stp_handle_bpdu(struct sk_buff *skb) br_received_tcn_bpdu(p); } out: - spin_unlock_bh(&br->lock); + spin_unlock(&br->lock); err: kfree_skb(skb); return 0; From 5dce971acf2ae20c80d5e9d1f6bbf17376870911 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 9 Feb 2006 17:09:38 -0800 Subject: [PATCH 08/27] [BRIDGE]: netfilter handle RCU during removal Bridge netfilter code needs to handle the case where device is removed from bridge while packet in process. In these cases the bridge_parent can become null while processing. This should fix: http://bugzilla.kernel.org/show_bug.cgi?id=5803 Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 53 ++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 7cac3fb9f809..b5018166b0e5 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -51,9 +51,6 @@ #define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->daddr) #define dnat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->daddr) -#define has_bridge_parent(device) ((device)->br_port != NULL) -#define bridge_parent(device) ((device)->br_port->br->dev) - #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables = 1; @@ -98,6 +95,12 @@ static struct rtable __fake_rtable = { .rt_flags = 0, }; +static inline struct net_device *bridge_parent(const struct net_device *dev) +{ + struct net_bridge_port *port = rcu_dereference(dev->br_port); + + return port ? port->br->dev : NULL; +} /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the @@ -189,11 +192,15 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; skb->dev = bridge_parent(skb->dev); - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - skb->nh.raw += VLAN_HLEN; + if (!skb->dev) + kfree_skb(skb); + else { + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + skb->nh.raw += VLAN_HLEN; + } + skb->dst->output(skb); } - skb->dst->output(skb); return 0; } @@ -270,7 +277,7 @@ bridged_dnat: } /* Some common code for IPv4/IPv6 */ -static void setup_pre_routing(struct sk_buff *skb) +static struct net_device *setup_pre_routing(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; @@ -282,6 +289,8 @@ static void setup_pre_routing(struct sk_buff *skb) nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge->physindev = skb->dev; skb->dev = bridge_parent(skb->dev); + + return skb->dev; } /* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */ @@ -376,7 +385,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, nf_bridge_put(skb->nf_bridge); if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) return NF_DROP; - setup_pre_routing(skb); + if (!setup_pre_routing(skb)) + return NF_DROP; NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); @@ -465,7 +475,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, nf_bridge_put(skb->nf_bridge); if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) return NF_DROP; - setup_pre_routing(skb); + if (!setup_pre_routing(skb)) + return NF_DROP; store_orig_dstaddr(skb); NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, @@ -539,11 +550,16 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + struct net_device *parent; int pf; if (!skb->nf_bridge) return NF_ACCEPT; + parent = bridge_parent(out); + if (!parent) + return NF_DROP; + if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) pf = PF_INET; else @@ -564,8 +580,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; - NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), - bridge_parent(out), br_nf_forward_finish); + NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent, + br_nf_forward_finish); return NF_STOLEN; } @@ -688,6 +704,8 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, goto out; } realoutdev = bridge_parent(skb->dev); + if (!realoutdev) + return NF_DROP; #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) /* iptables should match -o br0.x */ @@ -701,9 +719,11 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, /* IP forwarded traffic has a physindev, locally * generated traffic hasn't. */ if (realindev != NULL) { - if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT) && - has_bridge_parent(realindev)) - realindev = bridge_parent(realindev); + if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT) ) { + struct net_device *parent = bridge_parent(realindev); + if (parent) + realindev = parent; + } NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev, realoutdev, br_nf_local_out_finish, @@ -743,6 +763,9 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, if (!nf_bridge) return NF_ACCEPT; + if (!realoutdev) + return NF_DROP; + if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) pf = PF_INET; else From bab1deea308afcf9200837d6ac20aefe92972efb Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 9 Feb 2006 17:10:12 -0800 Subject: [PATCH 09/27] [BRIDGE]: fix error handling for add interface to bridge Refactor how the bridge code interacts with kobject system. It should still use kobjects even if not using sysfs. Fix the error unwind handling in br_add_if. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 77 ++++++++++++++++++++++++++++------------ net/bridge/br_private.h | 5 +-- net/bridge/br_sysfs_if.c | 50 ++------------------------ 3 files changed, 59 insertions(+), 73 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 70b7ef917234..7fa3a5a9971f 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -106,6 +106,20 @@ done: rtnl_unlock(); } +static void release_nbp(struct kobject *kobj) +{ + struct net_bridge_port *p + = container_of(kobj, struct net_bridge_port, kobj); + kfree(p); +} + +static struct kobj_type brport_ktype = { +#ifdef CONFIG_SYSFS + .sysfs_ops = &brport_sysfs_ops, +#endif + .release = release_nbp, +}; + static void destroy_nbp(struct net_bridge_port *p) { struct net_device *dev = p->dev; @@ -114,7 +128,7 @@ static void destroy_nbp(struct net_bridge_port *p) p->dev = NULL; dev_put(dev); - br_sysfs_freeif(p); + kobject_put(&p->kobj); } static void destroy_nbp_rcu(struct rcu_head *head) @@ -138,6 +152,8 @@ static void del_nbp(struct net_bridge_port *p) struct net_bridge *br = p->br; struct net_device *dev = p->dev; + sysfs_remove_link(&br->ifobj, dev->name); + dev_set_promiscuity(dev, -1); cancel_delayed_work(&p->carrier_check); @@ -152,6 +168,8 @@ static void del_nbp(struct net_bridge_port *p) rcu_assign_pointer(dev->br_port, NULL); + kobject_del(&p->kobj); + call_rcu(&p->rcu, destroy_nbp_rcu); } @@ -161,7 +179,6 @@ static void del_br(struct net_bridge *br) struct net_bridge_port *p, *n; list_for_each_entry_safe(p, n, &br->port_list, list) { - br_sysfs_removeif(p); del_nbp(p); } @@ -261,6 +278,11 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, INIT_WORK(&p->carrier_check, port_carrier_check, dev); kobject_init(&p->kobj); + kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR); + p->kobj.ktype = &brport_ktype; + p->kobj.parent = &(dev->class_dev.kobj); + p->kobj.kset = NULL; + return p; } @@ -388,31 +410,43 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (dev->br_port != NULL) return -EBUSY; - if (IS_ERR(p = new_nbp(br, dev))) + p = new_nbp(br, dev); + if (IS_ERR(p)) return PTR_ERR(p); - if ((err = br_fdb_insert(br, p, dev->dev_addr))) - destroy_nbp(p); - - else if ((err = br_sysfs_addif(p))) - del_nbp(p); - else { - rcu_assign_pointer(dev->br_port, p); - dev_set_promiscuity(dev, 1); + err = kobject_add(&p->kobj); + if (err) + goto err0; - list_add_rcu(&p->list, &br->port_list); + err = br_fdb_insert(br, p, dev->dev_addr); + if (err) + goto err1; - spin_lock_bh(&br->lock); - br_stp_recalculate_bridge_id(br); - br_features_recompute(br); - if ((br->dev->flags & IFF_UP) - && (dev->flags & IFF_UP) && netif_carrier_ok(dev)) - br_stp_enable_port(p); - spin_unlock_bh(&br->lock); + err = br_sysfs_addif(p); + if (err) + goto err2; - dev_set_mtu(br->dev, br_min_mtu(br)); - } + rcu_assign_pointer(dev->br_port, p); + dev_set_promiscuity(dev, 1); + list_add_rcu(&p->list, &br->port_list); + + spin_lock_bh(&br->lock); + br_stp_recalculate_bridge_id(br); + br_features_recompute(br); + schedule_delayed_work(&p->carrier_check, BR_PORT_DEBOUNCE); + spin_unlock_bh(&br->lock); + + dev_set_mtu(br->dev, br_min_mtu(br)); + kobject_uevent(&p->kobj, KOBJ_ADD); + + return 0; +err2: + br_fdb_delete_by_port(br, p); +err1: + kobject_del(&p->kobj); +err0: + kobject_put(&p->kobj); return err; } @@ -424,7 +458,6 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) if (!p || p->br != br) return -EINVAL; - br_sysfs_removeif(p); del_nbp(p); spin_lock_bh(&br->lock); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c5bd631ffcd5..8f10e09f251b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -232,9 +232,8 @@ extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ +extern struct sysfs_ops brport_sysfs_ops; extern int br_sysfs_addif(struct net_bridge_port *p); -extern void br_sysfs_removeif(struct net_bridge_port *p); -extern void br_sysfs_freeif(struct net_bridge_port *p); /* br_sysfs_br.c */ extern int br_sysfs_addbr(struct net_device *dev); @@ -243,8 +242,6 @@ extern void br_sysfs_delbr(struct net_device *dev); #else #define br_sysfs_addif(p) (0) -#define br_sysfs_removeif(p) do { } while(0) -#define br_sysfs_freeif(p) kfree(p) #define br_sysfs_addbr(dev) (0) #define br_sysfs_delbr(dev) do { } while(0) #endif /* CONFIG_SYSFS */ diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 0ac0355d16dd..c51c9e42aeb3 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -195,23 +195,11 @@ static ssize_t brport_store(struct kobject * kobj, return ret; } -/* called from kobject_put when port ref count goes to zero. */ -static void brport_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct net_bridge_port, kobj)); -} - -static struct sysfs_ops brport_sysfs_ops = { +struct sysfs_ops brport_sysfs_ops = { .show = brport_show, .store = brport_store, }; -static struct kobj_type brport_ktype = { - .sysfs_ops = &brport_sysfs_ops, - .release = brport_release, -}; - - /* * Add sysfs entries to ethernet device added to a bridge. * Creates a brport subdirectory with bridge attributes. @@ -223,17 +211,6 @@ int br_sysfs_addif(struct net_bridge_port *p) struct brport_attribute **a; int err; - ASSERT_RTNL(); - - kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR); - p->kobj.ktype = &brport_ktype; - p->kobj.parent = &(p->dev->class_dev.kobj); - p->kobj.kset = NULL; - - err = kobject_add(&p->kobj); - if(err) - goto out1; - err = sysfs_create_link(&p->kobj, &br->dev->class_dev.kobj, SYSFS_BRIDGE_PORT_LINK); if (err) @@ -245,28 +222,7 @@ int br_sysfs_addif(struct net_bridge_port *p) goto out2; } - err = sysfs_create_link(&br->ifobj, &p->kobj, p->dev->name); - if (err) - goto out2; - - kobject_uevent(&p->kobj, KOBJ_ADD); - return 0; - out2: - kobject_del(&p->kobj); - out1: + err= sysfs_create_link(&br->ifobj, &p->kobj, p->dev->name); +out2: return err; } - -void br_sysfs_removeif(struct net_bridge_port *p) -{ - pr_debug("br_sysfs_removeif\n"); - sysfs_remove_link(&p->br->ifobj, p->dev->name); - kobject_uevent(&p->kobj, KOBJ_REMOVE); - kobject_del(&p->kobj); -} - -void br_sysfs_freeif(struct net_bridge_port *p) -{ - pr_debug("br_sysfs_freeif\n"); - kobject_put(&p->kobj); -} From 8568daa49063fd84b52b9e22b4e2422417b4d483 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 10 Feb 2006 16:02:20 +1100 Subject: [PATCH 10/27] ppc: Use the system call table from arch/powerpc/kernel/systbl.S With this, new system calls only have to be wired up in one place for ARCH=ppc and ARCH=powerpc, rather than 2. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/Makefile | 6 +- arch/powerpc/kernel/systbl.S | 2 - arch/ppc/kernel/misc.S | 283 ----------------------------------- 3 files changed, 3 insertions(+), 288 deletions(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c287980b7e65..80e9fe2632b8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,10 +12,10 @@ endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ - init_task.o process.o + init_task.o process.o systbl.o obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ - signal_64.o ptrace32.o systbl.o \ + signal_64.o ptrace32.o \ paca.o cpu_setup_power4.o \ firmware.o sysfs.o idle_64.o obj-$(CONFIG_PPC64) += vdso64/ @@ -46,7 +46,7 @@ extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds obj-y += time.o prom.o traps.o setup-common.o udbg.o -obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 007b15ee36d2..55b9fc10694e 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -36,8 +36,6 @@ #ifdef CONFIG_PPC64 #define sys_sigpending sys_ni_syscall #define sys_old_getrlimit sys_ni_syscall -#else -#define ppc_rtas sys_ni_syscall #endif _GLOBAL(sys_call_table) diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index c3427eed8345..5a936566fd61 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -1048,286 +1048,3 @@ _GLOBAL(name) \ blr SYSCALL(execve) - -/* Why isn't this a) automatic, b) written in 'C'? */ - .data - .align 4 -_GLOBAL(sys_call_table) - .long sys_restart_syscall /* 0 */ - .long sys_exit - .long ppc_fork - .long sys_read - .long sys_write - .long sys_open /* 5 */ - .long sys_close - .long sys_waitpid - .long sys_creat - .long sys_link - .long sys_unlink /* 10 */ - .long sys_execve - .long sys_chdir - .long sys_time - .long sys_mknod - .long sys_chmod /* 15 */ - .long sys_lchown - .long sys_ni_syscall /* old break syscall holder */ - .long sys_stat - .long sys_lseek - .long sys_getpid /* 20 */ - .long sys_mount - .long sys_oldumount - .long sys_setuid - .long sys_getuid - .long sys_stime /* 25 */ - .long sys_ptrace - .long sys_alarm - .long sys_fstat - .long sys_pause - .long sys_utime /* 30 */ - .long sys_ni_syscall /* old stty syscall holder */ - .long sys_ni_syscall /* old gtty syscall holder */ - .long sys_access - .long sys_nice - .long sys_ni_syscall /* 35 */ /* old ftime syscall holder */ - .long sys_sync - .long sys_kill - .long sys_rename - .long sys_mkdir - .long sys_rmdir /* 40 */ - .long sys_dup - .long sys_pipe - .long sys_times - .long sys_ni_syscall /* old prof syscall holder */ - .long sys_brk /* 45 */ - .long sys_setgid - .long sys_getgid - .long sys_signal - .long sys_geteuid - .long sys_getegid /* 50 */ - .long sys_acct - .long sys_umount /* recycled never used phys() */ - .long sys_ni_syscall /* old lock syscall holder */ - .long sys_ioctl - .long sys_fcntl /* 55 */ - .long sys_ni_syscall /* old mpx syscall holder */ - .long sys_setpgid - .long sys_ni_syscall /* old ulimit syscall holder */ - .long sys_olduname - .long sys_umask /* 60 */ - .long sys_chroot - .long sys_ustat - .long sys_dup2 - .long sys_getppid - .long sys_getpgrp /* 65 */ - .long sys_setsid - .long sys_sigaction - .long sys_sgetmask - .long sys_ssetmask - .long sys_setreuid /* 70 */ - .long sys_setregid - .long sys_sigsuspend - .long sys_sigpending - .long sys_sethostname - .long sys_setrlimit /* 75 */ - .long sys_old_getrlimit - .long sys_getrusage - .long sys_gettimeofday - .long sys_settimeofday - .long sys_getgroups /* 80 */ - .long sys_setgroups - .long ppc_select - .long sys_symlink - .long sys_lstat - .long sys_readlink /* 85 */ - .long sys_uselib - .long sys_swapon - .long sys_reboot - .long old_readdir - .long sys_mmap /* 90 */ - .long sys_munmap - .long sys_truncate - .long sys_ftruncate - .long sys_fchmod - .long sys_fchown /* 95 */ - .long sys_getpriority - .long sys_setpriority - .long sys_ni_syscall /* old profil syscall holder */ - .long sys_statfs - .long sys_fstatfs /* 100 */ - .long sys_ni_syscall - .long sys_socketcall - .long sys_syslog - .long sys_setitimer - .long sys_getitimer /* 105 */ - .long sys_newstat - .long sys_newlstat - .long sys_newfstat - .long sys_uname - .long sys_ni_syscall /* 110 */ - .long sys_vhangup - .long sys_ni_syscall /* old 'idle' syscall */ - .long sys_ni_syscall - .long sys_wait4 - .long sys_swapoff /* 115 */ - .long sys_sysinfo - .long sys_ipc - .long sys_fsync - .long sys_sigreturn - .long ppc_clone /* 120 */ - .long sys_setdomainname - .long sys_newuname - .long sys_ni_syscall - .long sys_adjtimex - .long sys_mprotect /* 125 */ - .long sys_sigprocmask - .long sys_ni_syscall /* old sys_create_module */ - .long sys_init_module - .long sys_delete_module - .long sys_ni_syscall /* old sys_get_kernel_syms */ /* 130 */ - .long sys_quotactl - .long sys_getpgid - .long sys_fchdir - .long sys_bdflush - .long sys_sysfs /* 135 */ - .long sys_personality - .long sys_ni_syscall /* for afs_syscall */ - .long sys_setfsuid - .long sys_setfsgid - .long sys_llseek /* 140 */ - .long sys_getdents - .long ppc_select - .long sys_flock - .long sys_msync - .long sys_readv /* 145 */ - .long sys_writev - .long sys_getsid - .long sys_fdatasync - .long sys_sysctl - .long sys_mlock /* 150 */ - .long sys_munlock - .long sys_mlockall - .long sys_munlockall - .long sys_sched_setparam - .long sys_sched_getparam /* 155 */ - .long sys_sched_setscheduler - .long sys_sched_getscheduler - .long sys_sched_yield - .long sys_sched_get_priority_max - .long sys_sched_get_priority_min /* 160 */ - .long sys_sched_rr_get_interval - .long sys_nanosleep - .long sys_mremap - .long sys_setresuid - .long sys_getresuid /* 165 */ - .long sys_ni_syscall /* old sys_query_module */ - .long sys_poll - .long sys_nfsservctl - .long sys_setresgid - .long sys_getresgid /* 170 */ - .long sys_prctl - .long sys_rt_sigreturn - .long sys_rt_sigaction - .long sys_rt_sigprocmask - .long sys_rt_sigpending /* 175 */ - .long sys_rt_sigtimedwait - .long sys_rt_sigqueueinfo - .long sys_rt_sigsuspend - .long sys_pread64 - .long sys_pwrite64 /* 180 */ - .long sys_chown - .long sys_getcwd - .long sys_capget - .long sys_capset - .long sys_sigaltstack /* 185 */ - .long sys_sendfile - .long sys_ni_syscall /* streams1 */ - .long sys_ni_syscall /* streams2 */ - .long ppc_vfork - .long sys_getrlimit /* 190 */ - .long sys_readahead - .long sys_mmap2 - .long sys_truncate64 - .long sys_ftruncate64 - .long sys_stat64 /* 195 */ - .long sys_lstat64 - .long sys_fstat64 - .long sys_pciconfig_read - .long sys_pciconfig_write - .long sys_pciconfig_iobase /* 200 */ - .long sys_ni_syscall /* 201 - reserved - MacOnLinux - new */ - .long sys_getdents64 - .long sys_pivot_root - .long sys_fcntl64 - .long sys_madvise /* 205 */ - .long sys_mincore - .long sys_gettid - .long sys_tkill - .long sys_setxattr - .long sys_lsetxattr /* 210 */ - .long sys_fsetxattr - .long sys_getxattr - .long sys_lgetxattr - .long sys_fgetxattr - .long sys_listxattr /* 215 */ - .long sys_llistxattr - .long sys_flistxattr - .long sys_removexattr - .long sys_lremovexattr - .long sys_fremovexattr /* 220 */ - .long sys_futex - .long sys_sched_setaffinity - .long sys_sched_getaffinity - .long sys_ni_syscall - .long sys_ni_syscall /* 225 - reserved for Tux */ - .long sys_sendfile64 - .long sys_io_setup - .long sys_io_destroy - .long sys_io_getevents - .long sys_io_submit /* 230 */ - .long sys_io_cancel - .long sys_set_tid_address - .long sys_fadvise64 - .long sys_exit_group - .long sys_lookup_dcookie /* 235 */ - .long sys_epoll_create - .long sys_epoll_ctl - .long sys_epoll_wait - .long sys_remap_file_pages - .long sys_timer_create /* 240 */ - .long sys_timer_settime - .long sys_timer_gettime - .long sys_timer_getoverrun - .long sys_timer_delete - .long sys_clock_settime /* 245 */ - .long sys_clock_gettime - .long sys_clock_getres - .long sys_clock_nanosleep - .long sys_swapcontext - .long sys_tgkill /* 250 */ - .long sys_utimes - .long sys_statfs64 - .long sys_fstatfs64 - .long ppc_fadvise64_64 - .long sys_ni_syscall /* 255 - rtas (used on ppc64) */ - .long sys_debug_setcontext - .long sys_ni_syscall /* 257 reserved for vserver */ - .long sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ - .long sys_ni_syscall /* 259 reserved for new sys_mbind */ - .long sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ - .long sys_ni_syscall /* 261 reserved for new sys_set_mempolicy */ - .long sys_mq_open - .long sys_mq_unlink - .long sys_mq_timedsend - .long sys_mq_timedreceive /* 265 */ - .long sys_mq_notify - .long sys_mq_getsetattr - .long sys_kexec_load - .long sys_add_key - .long sys_request_key /* 270 */ - .long sys_keyctl - .long sys_waitid - .long sys_ioprio_set - .long sys_ioprio_get - .long sys_inotify_init /* 275 */ - .long sys_inotify_add_watch - .long sys_inotify_rm_watch From b37ce281d729181b9862c4e3e112f9b5eea74ac9 Mon Sep 17 00:00:00 2001 From: JANAK DESAI Date: Tue, 7 Feb 2006 12:59:11 -0800 Subject: [PATCH 11/27] [PATCH] powerpc: unshare system call registration Registers system call for the powerpc architecture. Signed-off-by: Janak Desai Cc: Al Viro Cc: Christoph Hellwig Cc: Michael Kerrisk Cc: Benjamin Herrenschmidt Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/systbl.S | 1 + include/asm-powerpc/unistd.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 55b9fc10694e..8a9f994ed917 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -321,3 +321,4 @@ SYSCALL(spu_run) SYSCALL(spu_create) COMPAT_SYS(pselect6) COMPAT_SYS(ppoll) +SYSCALL(unshare) diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h index a40cdff21a88..35556993f066 100644 --- a/include/asm-powerpc/unistd.h +++ b/include/asm-powerpc/unistd.h @@ -300,8 +300,9 @@ #define __NR_spu_create 279 #define __NR_pselect6 280 #define __NR_ppoll 281 +#define __NR_unshare 282 -#define __NR_syscalls 282 +#define __NR_syscalls 283 #ifdef __KERNEL__ #define __NR__exit __NR_exit From ad71f123a9e9b809f6c829db1222ce0423a1153c Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Tue, 7 Feb 2006 13:44:08 -0600 Subject: [PATCH 12/27] [PATCH] powerpc: Add FSL USB node to documentation Updated the documentation to include the definition of the USB device node format for Freescale SOC devices. Signed-off-by: Becky Bruce Signed-off-by: Kumar Gala Signed-off-by: Paul Mackerras --- Documentation/powerpc/booting-without-of.txt | 60 +++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 54e5f9b1536d..d02c64953dcd 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -44,7 +44,6 @@ compiler and the textural representation of the tree that can be "compiled" by dtc. - November 21, 2005: Rev 0.5 - Additions/generalizations for 32-bit - Changed to reflect the new arch/powerpc @@ -1307,6 +1306,65 @@ platforms are moved over to use the flattened-device-tree model. }; + f) Freescale SOC USB controllers + + The device node for a USB controller that is part of a Freescale + SOC is as described in the document "Open Firmware Recommended + Practice : Universal Serial Bus" with the following modifications + and additions : + + Required properties : + - compatible : Should be "fsl-usb2-mph" for multi port host usb + controllers, or "fsl-usb2-dr" for dual role usb controllers + - phy_type : For multi port host usb controllers, should be one of + "ulpi", or "serial". For dual role usb controllers, should be + one of "ulpi", "utmi", "utmi_wide", or "serial". + - reg : Offset and length of the register set for the device + - port0 : boolean; if defined, indicates port0 is connected for + fsl-usb2-mph compatible controllers. Either this property or + "port1" (or both) must be defined for "fsl-usb2-mph" compatible + controllers. + - port1 : boolean; if defined, indicates port1 is connected for + fsl-usb2-mph compatible controllers. Either this property or + "port0" (or both) must be defined for "fsl-usb2-mph" compatible + controllers. + + Recommended properties : + - interrupts : where a is the interrupt number and b is a + field that represents an encoding of the sense and level + information for the interrupt. This should be encoded based on + the information in section 2) depending on the type of interrupt + controller you have. + - interrupt-parent : the phandle for the interrupt controller that + services interrupts for this device. + + Example multi port host usb controller device node : + usb@22000 { + device_type = "usb"; + compatible = "fsl-usb2-mph"; + reg = <22000 1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <700>; + interrupts = <27 1>; + phy_type = "ulpi"; + port0; + port1; + }; + + Example dual role usb controller device node : + usb@23000 { + device_type = "usb"; + compatible = "fsl-usb2-dr"; + reg = <23000 1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <700>; + interrupts = <26 1>; + phy = "ulpi"; + }; + + More devices will be defined as this spec matures. From 00adbf62bd16f6527e046b422349a54d783a3d86 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Mon, 16 Jan 2006 10:53:22 -0600 Subject: [PATCH 13/27] [PATCH] powerpc: Add CONFIG_DEFAULT_UIMAGE for embedded boards Embedded boards that u-boot require a kernel image in the uImage format. This allows a given board to specify it wants a uImage built by default. This also fixes a warning at config time, as this symbol is referred to in arch/powerpc/platforms/83xx/Kconfig. Signed-off-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/Kconfig | 6 ++++++ arch/powerpc/Makefile | 1 + 2 files changed, 7 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index df338c5cc910..80d114a3a837 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -83,6 +83,12 @@ config GENERIC_TBSYNC default y if PPC32 && SMP default n +config DEFAULT_UIMAGE + bool + help + Used to allow a board to specify it wants a uImage built by default + default n + menu "Processor support" choice prompt "Processor Type" diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 44dd82b791d1..15fc3e98ac5c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -142,6 +142,7 @@ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ # Default to zImage, override when needed defaultimage-y := zImage defaultimage-$(CONFIG_PPC_ISERIES) := vmlinux +defaultimage-$(CONFIG_DEFAULT_UIMAGE) := uImage KBUILD_IMAGE := $(defaultimage-y) all: $(KBUILD_IMAGE) From a2000572ad511f5f43091ed7bd2cc3b913104a1e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 10 Feb 2006 01:51:02 -0800 Subject: [PATCH 14/27] [PATCH] sched: remove smpnice I don't think the code is quite ready, which is why I asked for Peter's additions to also be merged before I acked it (although it turned out that it still isn't quite ready with his additions either). Basically I have had similar observations to Suresh in that it does not play nicely with the rest of the balancing infrastructure (and raised similar concerns in my review). The samples (group of 4) I got for "maximum recorded imbalance" on a 2x2 SMP+HT Xeon are as follows: | Following boot | hackbench 20 | hackbench 40 -----------+----------------+---------------------+--------------------- 2.6.16-rc2 | 30,37,100,112 | 5600,5530,6020,6090 | 6390,7090,8760,8470 +nosmpnice | 3, 2, 4, 2 | 28, 150, 294, 132 | 348, 348, 294, 347 Hackbench raw performance is down around 15% with smpnice (but that in itself isn't a huge deal because it is just a benchmark). However, the samples show that the imbalance passed into move_tasks is increased by about a factor of 10-30. I think this would also go some way to explaining latency blips turning up in the balancing code (though I haven't actually measured that). We'll probably have to revert this in the SUSE kernel. Cc: "Siddha, Suresh B" Acked-by: Ingo Molnar Cc: Peter Williams Cc: "Martin J. Bligh" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 137 ++++++++----------------------------------------- 1 file changed, 22 insertions(+), 115 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index bc38804e40dd..87d93be336a1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -215,7 +215,6 @@ struct runqueue { */ unsigned long nr_running; #ifdef CONFIG_SMP - unsigned long prio_bias; unsigned long cpu_load[3]; #endif unsigned long long nr_switches; @@ -669,68 +668,13 @@ static int effective_prio(task_t *p) return prio; } -#ifdef CONFIG_SMP -static inline void inc_prio_bias(runqueue_t *rq, int prio) -{ - rq->prio_bias += MAX_PRIO - prio; -} - -static inline void dec_prio_bias(runqueue_t *rq, int prio) -{ - rq->prio_bias -= MAX_PRIO - prio; -} - -static inline void inc_nr_running(task_t *p, runqueue_t *rq) -{ - rq->nr_running++; - if (rt_task(p)) { - if (p != rq->migration_thread) - /* - * The migration thread does the actual balancing. Do - * not bias by its priority as the ultra high priority - * will skew balancing adversely. - */ - inc_prio_bias(rq, p->prio); - } else - inc_prio_bias(rq, p->static_prio); -} - -static inline void dec_nr_running(task_t *p, runqueue_t *rq) -{ - rq->nr_running--; - if (rt_task(p)) { - if (p != rq->migration_thread) - dec_prio_bias(rq, p->prio); - } else - dec_prio_bias(rq, p->static_prio); -} -#else -static inline void inc_prio_bias(runqueue_t *rq, int prio) -{ -} - -static inline void dec_prio_bias(runqueue_t *rq, int prio) -{ -} - -static inline void inc_nr_running(task_t *p, runqueue_t *rq) -{ - rq->nr_running++; -} - -static inline void dec_nr_running(task_t *p, runqueue_t *rq) -{ - rq->nr_running--; -} -#endif - /* * __activate_task - move a task to the runqueue. */ static inline void __activate_task(task_t *p, runqueue_t *rq) { enqueue_task(p, rq->active); - inc_nr_running(p, rq); + rq->nr_running++; } /* @@ -739,7 +683,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) static inline void __activate_idle_task(task_t *p, runqueue_t *rq) { enqueue_task_head(p, rq->active); - inc_nr_running(p, rq); + rq->nr_running++; } static int recalc_task_prio(task_t *p, unsigned long long now) @@ -863,7 +807,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) */ static void deactivate_task(struct task_struct *p, runqueue_t *rq) { - dec_nr_running(p, rq); + rq->nr_running--; dequeue_task(p, p->array); p->array = NULL; } @@ -1007,61 +951,27 @@ void kick_process(task_t *p) * We want to under-estimate the load of migration sources, to * balance conservatively. */ -static unsigned long __source_load(int cpu, int type, enum idle_type idle) -{ - runqueue_t *rq = cpu_rq(cpu); - unsigned long running = rq->nr_running; - unsigned long source_load, cpu_load = rq->cpu_load[type-1], - load_now = running * SCHED_LOAD_SCALE; - - if (type == 0) - source_load = load_now; - else - source_load = min(cpu_load, load_now); - - if (running > 1 || (idle == NOT_IDLE && running)) - /* - * If we are busy rebalancing the load is biased by - * priority to create 'nice' support across cpus. When - * idle rebalancing we should only bias the source_load if - * there is more than one task running on that queue to - * prevent idle rebalance from trying to pull tasks from a - * queue with only one running task. - */ - source_load = source_load * rq->prio_bias / running; - - return source_load; -} - static inline unsigned long source_load(int cpu, int type) { - return __source_load(cpu, type, NOT_IDLE); + runqueue_t *rq = cpu_rq(cpu); + unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; + if (type == 0) + return load_now; + + return min(rq->cpu_load[type-1], load_now); } /* * Return a high guess at the load of a migration-target cpu */ -static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) -{ - runqueue_t *rq = cpu_rq(cpu); - unsigned long running = rq->nr_running; - unsigned long target_load, cpu_load = rq->cpu_load[type-1], - load_now = running * SCHED_LOAD_SCALE; - - if (type == 0) - target_load = load_now; - else - target_load = max(cpu_load, load_now); - - if (running > 1 || (idle == NOT_IDLE && running)) - target_load = target_load * rq->prio_bias / running; - - return target_load; -} - static inline unsigned long target_load(int cpu, int type) { - return __target_load(cpu, type, NOT_IDLE); + runqueue_t *rq = cpu_rq(cpu); + unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; + if (type == 0) + return load_now; + + return max(rq->cpu_load[type-1], load_now); } /* @@ -1530,7 +1440,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) list_add_tail(&p->run_list, ¤t->run_list); p->array = current->array; p->array->nr_active++; - inc_nr_running(p, rq); + rq->nr_running++; } set_need_resched(); } else @@ -1875,9 +1785,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) { dequeue_task(p, src_array); - dec_nr_running(p, src_rq); + src_rq->nr_running--; set_task_cpu(p, this_cpu); - inc_nr_running(p, this_rq); + this_rq->nr_running++; enqueue_task(p, this_array); p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) + this_rq->timestamp_last_tick; @@ -2056,9 +1966,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, /* Bias balancing toward cpus of our domain */ if (local_group) - load = __target_load(i, load_idx, idle); + load = target_load(i, load_idx); else - load = __source_load(i, load_idx, idle); + load = source_load(i, load_idx); avg_load += load; } @@ -2171,7 +2081,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group, int i; for_each_cpu_mask(i, group->cpumask) { - load = __source_load(i, 0, idle); + load = source_load(i, 0); if (load > max_load) { max_load = load; @@ -3571,10 +3481,8 @@ void set_user_nice(task_t *p, long nice) goto out_unlock; } array = p->array; - if (array) { + if (array) dequeue_task(p, array); - dec_prio_bias(rq, p->static_prio); - } old_prio = p->prio; new_prio = NICE_TO_PRIO(nice); @@ -3584,7 +3492,6 @@ void set_user_nice(task_t *p, long nice) if (array) { enqueue_task(p, array); - inc_prio_bias(rq, p->static_prio); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: From afcd024183d8a6eae7e489ce50b2485c5ae4f662 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Fri, 10 Feb 2006 01:51:03 -0800 Subject: [PATCH 15/27] [PATCH] wrong firmware location in IPW2100 Kconfig entry Firmware should go into /lib/firmware, not /etc/firmware. Found by Alejandro Bonilla. Signed-off-by: Jesper Juhl Signed-off-by: Adrian Bunk Acked-by: Zhu Yi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/wireless/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 233a4f608084..ef85d76575a2 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -148,7 +148,7 @@ config IPW2100 In order to use this driver, you will need a firmware image for it. You can obtain the firmware from . Once you have the firmware image, you - will need to place it in /etc/firmware. + will need to place it in /lib/firmware. You will also very likely need the Wireless Tools in order to configure your card: From 9c15e852a524d55ab768cf48c97f5c684f876af2 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Fri, 10 Feb 2006 01:51:05 -0800 Subject: [PATCH 16/27] [PATCH] kexec: fix in free initrd when overlapped with crashkernel region It is possible that the reserved crashkernel region can be overlapped with initrd since the bootloader sets the initrd location. When the initrd region is freed, the second kernel memory will not be contiguous. The Kexec_load can cause an oops since there is no contiguous memory to write the second kernel or this memory could be used in the first kernel itself and may not be part of the dump. For example, on powerpc, the initrd is located at 36MB and the crashkernel starts at 32MB. The kexec_load caused panic since writing into non-allocated memory (after 36MB). We could see the similar issue even on other archs. One possibility is to move the initrd outside of crashkernel region. But, the initrd region will be freed anyway before the system is up. This patch fixes this issue and frees only regions that are not part of crashkernel memory in case overlaps. Signed-off-by: Haren Myneni Acked-by: "Eric W. Biederman" Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 1 + init/initramfs.c | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index a311f58c8a7c..cfb3410e32b1 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -6,6 +6,7 @@ #include #include #include +#include #include /* Verify architecture specific macros are defined */ diff --git a/init/initramfs.c b/init/initramfs.c index 0c5d9a3f951b..637344b05981 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -466,10 +466,32 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) extern char __initramfs_start[], __initramfs_end[]; #ifdef CONFIG_BLK_DEV_INITRD #include +#include static void __init free_initrd(void) { - free_initrd_mem(initrd_start, initrd_end); +#ifdef CONFIG_KEXEC + unsigned long crashk_start = (unsigned long)__va(crashk_res.start); + unsigned long crashk_end = (unsigned long)__va(crashk_res.end); + + /* + * If the initrd region is overlapped with crashkernel reserved region, + * free only memory that is not part of crashkernel region. + */ + if (initrd_start < crashk_end && initrd_end > crashk_start) { + /* + * Initialize initrd memory region since the kexec boot does + * not do. + */ + memset((void *)initrd_start, 0, initrd_end - initrd_start); + if (initrd_start < crashk_start) + free_initrd_mem(initrd_start, crashk_start); + if (initrd_end > crashk_end) + free_initrd_mem(crashk_end, initrd_end); + } else +#endif + free_initrd_mem(initrd_start, initrd_end); + initrd_start = 0; initrd_end = 0; } From 02df360bf38ca2acb78ddee9fd28262e9474153c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Feb 2006 01:51:06 -0800 Subject: [PATCH 17/27] [PATCH] remove bogus comment from init/main.c Remove bogus comment from init function which could lead to the assumption that cpu_possible_map is setup in smp_prepare_cpus(). Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/init/main.c b/init/main.c index 7c79da57d3a2..4c194c47395f 100644 --- a/init/main.c +++ b/init/main.c @@ -668,7 +668,6 @@ static int init(void * unused) */ child_reaper = current; - /* Sets up cpus_possible() */ smp_prepare_cpus(max_cpus); do_pre_smp_initcalls(); From 705672935f8a9e661264e34cd8c409e8cddcc7db Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 10 Feb 2006 01:51:07 -0800 Subject: [PATCH 18/27] [PATCH] Fix building external modules on ppc32 We are setting up sources for building external modules like this: /usr/src/linux-obj> # create a .config file /usr/src/linux-obj> make -C /usr/src/linux O=$PWD oldconfig /usr/src/linux-obj> make -C /usr/src/linux O=$PWD prepare /usr/src/linux-obj> make -C /usr/src/linux O=$PWD scripts /usr/src/linux-obj> make -C /usr/src/linux O=$PWD clean After that, external modules can be built with: /usr/src/module> make -C /usr/src/linux-obj M=$PWD This fails for ppc32 because the `make clean' removes the arch/powerpc/include directory. This should be done in archmrproper instead of in archclean. Signed-off-by: Andreas Gruenbacher Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 44dd82b791d1..efcad7601a7a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -167,6 +167,8 @@ endef archclean: $(Q)$(MAKE) $(clean)=$(boot) + +archmrproper: $(Q)rm -rf arch/$(ARCH)/include archprepare: checkbin From 7a8ef1cb774e5438d292365626f9b96616283706 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 10 Feb 2006 01:51:08 -0800 Subject: [PATCH 19/27] [PATCH] x86: don't initialise cpu_possible_map to all ones Initialising cpu_possible_map to all-ones with CONFIG_HOTPLUG_CPU means that a) All for_each_cpu() loops will iterate across all NR_CPUS CPUs, rather than over possible ones. That can be quite expensive. b) Soon we'll be allocating per-cpu areas only for possible CPUs. So with CPU_MASK_ALL, we'll be wasting memory. I also switched voyager over to not use CPU_MASK_ALL in the non-CPU-hotplug case. Should be OK.. I note that parisc is also using CPU_MASK_ALL. Suggest that it stop doing that. Cc: James Bottomley Cc: Kyle McMartin Cc: Paul Jackson Cc: Ashok Raj Cc: Zwane Mwaikambo Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smpboot.c | 4 ---- arch/i386/mach-voyager/voyager_smp.c | 2 +- include/linux/cpumask.h | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 255adb498268..fb00ab7b7612 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -87,11 +87,7 @@ EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); -#ifdef CONFIG_HOTPLUG_CPU -cpumask_t cpu_possible_map = CPU_MASK_ALL; -#else cpumask_t cpu_possible_map; -#endif EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 72a1b9cae2e4..6e4c3baef6cc 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -240,7 +240,7 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE; cpumask_t cpu_callin_map = CPU_MASK_NONE; cpumask_t cpu_callout_map = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_callout_map); -cpumask_t cpu_possible_map = CPU_MASK_ALL; +cpumask_t cpu_possible_map = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_possible_map); /* The per processor IRQ masks (these are usually kept in sync) */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 20b446f26ecd..60e56c6e03dd 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -328,7 +328,7 @@ static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp, * bitmap of size NR_CPUS. * * #ifdef CONFIG_HOTPLUG_CPU - * cpu_possible_map - all NR_CPUS bits set + * cpu_possible_map - has bit 'cpu' set iff cpu is populatable * cpu_present_map - has bit 'cpu' set iff cpu is populated * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler * #else From c22db9412736204b25aeba19d18e5ea922f7d632 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 10 Feb 2006 01:51:11 -0800 Subject: [PATCH 20/27] [PATCH] prevent recursive panic from softlockup watchdog When panic_timeout is zero, suppress triggering a nested panic due to soft lockup detection. Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/panic.c b/kernel/panic.c index c5c4ab255834..126dc43f1c74 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -130,6 +130,7 @@ NORET_TYPE void panic(const char * fmt, ...) #endif local_irq_enable(); for (i = 0;;) { + touch_softlockup_watchdog(); i += panic_blink(i); mdelay(1); i++; From 8e36709d8cea48a4d341294ce2b46678a2e77159 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Fri, 10 Feb 2006 01:51:12 -0800 Subject: [PATCH 21/27] [PATCH] shmdt cannot detach not-alined shm segment cleanly. sys_shmdt() can manage shm segments which are covered by multiple vmas. (This can happen when a user uses mprotect() after shmat().) This works well if shm is aligned to PAGE_SIZE, but if not, the last segment cannot be detached. It is because a comparison in sys_shmdt() (vma->vm_end - addr) < size addr == return address of shmat() size == shmsize, argments to shmget() size should be aligned to PAGE_SIZE before being compared with vma->vm_end, which is aligned. Signed-off-by: KAMEZAWA Hiroyuki Cc: Manfred Spraul Acked-by: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/shm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ipc/shm.c b/ipc/shm.c index 4c28d2d8e305..9162123a7b23 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -870,6 +870,7 @@ asmlinkage long sys_shmdt(char __user *shmaddr) * could possibly have landed at. Also cast things to loff_t to * prevent overflows and make comparisions vs. equal-width types. */ + size = PAGE_ALIGN(size); while (vma && (loff_t)(vma->vm_end - addr) <= size) { next = vma->vm_next; From f0188f47482efdbd2e005103bb4f0224a835dfad Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Fri, 10 Feb 2006 01:51:13 -0800 Subject: [PATCH 22/27] [PATCH] slab: Avoid deadlock at kmem_cache_create/kmem_cache_destroy Prevents deadlock situation between kmem_cache_create()/kmem_cache_destory(), and kmem_cache_create() /cpu hotplug. The locking order probably got moved over time. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index d66c2b0d9715..add05d808a4a 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1717,6 +1717,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, BUG(); } + /* + * Prevent CPUs from coming and going. + * lock_cpu_hotplug() nests outside cache_chain_mutex + */ + lock_cpu_hotplug(); + mutex_lock(&cache_chain_mutex); list_for_each(p, &cache_chain) { @@ -1918,8 +1924,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->dtor = dtor; cachep->name = name; - /* Don't let CPUs to come and go */ - lock_cpu_hotplug(); if (g_cpucache_up == FULL) { enable_cpucache(cachep); @@ -1978,12 +1982,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, /* cache setup completed, link it into the list */ list_add(&cachep->next, &cache_chain); - unlock_cpu_hotplug(); oops: if (!cachep && (flags & SLAB_PANIC)) panic("kmem_cache_create(): failed to create slab `%s'\n", name); mutex_unlock(&cache_chain_mutex); + unlock_cpu_hotplug(); return cachep; } EXPORT_SYMBOL(kmem_cache_create); From 8977d929e49021d9a6e031310aab01fa72f849c2 Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Fri, 10 Feb 2006 01:51:14 -0800 Subject: [PATCH 23/27] [PATCH] tty buffering stall fix Prevent stalled processing of received data when a driver allocates tty buffer space but does not immediately follow the allocation with more data and a call to schedule receive tty processing. (example: hvc_console) This bug was introduced by the first locking patch for the new tty buffering. Signed-off-by: Paul Fulghum Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 30 ++++++++++++++++++++++-------- include/linux/kbd_kern.h | 4 +++- include/linux/tty.h | 2 ++ include/linux/tty_flip.h | 4 +++- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 076e07c1da38..a23816d3e9a1 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -268,6 +268,8 @@ static struct tty_buffer *tty_buffer_alloc(size_t size) p->size = size; p->next = NULL; p->active = 0; + p->commit = 0; + p->read = 0; p->char_buf_ptr = (char *)(p->data); p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size; /* printk("Flip create %p\n", p); */ @@ -298,6 +300,8 @@ static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size) *tbh = t->next; t->next = NULL; t->used = 0; + t->commit = 0; + t->read = 0; /* DEBUG ONLY */ memset(t->data, '*', size); /* printk("Flip recycle %p\n", t); */ @@ -335,6 +339,7 @@ int tty_buffer_request_room(struct tty_struct *tty, size_t size) if (b != NULL) { b->next = n; b->active = 0; + b->commit = b->used; } else tty->buf.head = n; tty->buf.tail = n; @@ -2752,6 +2757,9 @@ static void flush_to_ldisc(void *private_) unsigned long flags; struct tty_ldisc *disc; struct tty_buffer *tbuf; + int count; + char *char_buf; + unsigned char *flag_buf; disc = tty_ldisc_ref(tty); if (disc == NULL) /* !TTY_LDISC */ @@ -2765,16 +2773,20 @@ static void flush_to_ldisc(void *private_) goto out; } spin_lock_irqsave(&tty->buf.lock, flags); - while((tbuf = tty->buf.head) != NULL && !tbuf->active) { + while((tbuf = tty->buf.head) != NULL) { + while ((count = tbuf->commit - tbuf->read) != 0) { + char_buf = tbuf->char_buf_ptr + tbuf->read; + flag_buf = tbuf->flag_buf_ptr + tbuf->read; + tbuf->read += count; + spin_unlock_irqrestore(&tty->buf.lock, flags); + disc->receive_buf(tty, char_buf, flag_buf, count); + spin_lock_irqsave(&tty->buf.lock, flags); + } + if (tbuf->active) + break; tty->buf.head = tbuf->next; if (tty->buf.head == NULL) tty->buf.tail = NULL; - spin_unlock_irqrestore(&tty->buf.lock, flags); - /* printk("Process buffer %p for %d\n", tbuf, tbuf->used); */ - disc->receive_buf(tty, tbuf->char_buf_ptr, - tbuf->flag_buf_ptr, - tbuf->used); - spin_lock_irqsave(&tty->buf.lock, flags); tty_buffer_free(tty, tbuf); } spin_unlock_irqrestore(&tty->buf.lock, flags); @@ -2871,8 +2883,10 @@ void tty_flip_buffer_push(struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tty->buf.lock, flags); - if (tty->buf.tail != NULL) + if (tty->buf.tail != NULL) { tty->buf.tail->active = 0; + tty->buf.tail->commit = tty->buf.tail->used; + } spin_unlock_irqrestore(&tty->buf.lock, flags); if (tty->low_latency) diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index 3aed37314ab8..e87c32a5c86a 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -153,8 +153,10 @@ static inline void con_schedule_flip(struct tty_struct *t) { unsigned long flags; spin_lock_irqsave(&t->buf.lock, flags); - if (t->buf.tail != NULL) + if (t->buf.tail != NULL) { t->buf.tail->active = 0; + t->buf.tail->commit = t->buf.tail->used; + } spin_unlock_irqrestore(&t->buf.lock, flags); schedule_work(&t->buf.work); } diff --git a/include/linux/tty.h b/include/linux/tty.h index a7bd3b4558d2..f45cd74e6f24 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -58,6 +58,8 @@ struct tty_buffer { int used; int size; int active; + int commit; + int read; /* Data points here */ unsigned long data[0]; }; diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index 82961eb19888..222faf97d5f9 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -29,8 +29,10 @@ _INLINE_ void tty_schedule_flip(struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tty->buf.lock, flags); - if (tty->buf.tail != NULL) + if (tty->buf.tail != NULL) { tty->buf.tail->active = 0; + tty->buf.tail->commit = tty->buf.tail->used; + } spin_unlock_irqrestore(&tty->buf.lock, flags); schedule_delayed_work(&tty->buf.work, 1); } From 418aade459f03318defd18ef0b11981a63bd81b0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 10 Feb 2006 01:51:15 -0800 Subject: [PATCH 24/27] [PATCH] Updates for page migration This adds some additional comments in order to help others figure out how exactly the code works. And fix a variable name. Also swap_page does need to ignore all reference bits when unmapping a page. Otherwise we may have to repeatedly unmap a frequently touched page. So change the try_to_unmap parameter to 1. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 5a610804cd06..5db32fdfaf39 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -632,7 +632,7 @@ static int swap_page(struct page *page) struct address_space *mapping = page_mapping(page); if (page_mapped(page) && mapping) - if (try_to_unmap(page, 0) != SWAP_SUCCESS) + if (try_to_unmap(page, 1) != SWAP_SUCCESS) goto unlock_retry; if (PageDirty(page)) { @@ -839,7 +839,7 @@ EXPORT_SYMBOL(migrate_page); * pages are swapped out. * * The function returns after 10 attempts or if no pages - * are movable anymore because t has become empty + * are movable anymore because to has become empty * or no retryable pages exist anymore. * * Return: Number of pages not migrated when "to" ran empty. @@ -928,12 +928,21 @@ redo: goto unlock_both; if (mapping->a_ops->migratepage) { + /* + * Most pages have a mapping and most filesystems + * should provide a migration function. Anonymous + * pages are part of swap space which also has its + * own migration function. This is the most common + * path for page migration. + */ rc = mapping->a_ops->migratepage(newpage, page); goto unlock_both; } /* - * Trigger writeout if page is dirty + * Default handling if a filesystem does not provide + * a migration function. We can only migrate clean + * pages so try to write out any dirty pages first. */ if (PageDirty(page)) { switch (pageout(page, mapping)) { @@ -949,9 +958,10 @@ redo: ; /* try to migrate the page below */ } } + /* - * If we have no buffer or can release the buffer - * then do a simple migration. + * Buffers are managed in a filesystem specific way. + * We must have no buffers or drop them. */ if (!page_has_buffers(page) || try_to_release_page(page, GFP_KERNEL)) { @@ -966,6 +976,11 @@ redo: * swap them out. */ if (pass > 4) { + /* + * Persistently unable to drop buffers..... As a + * measure of last resort we fall back to + * swap_page(). + */ unlock_page(newpage); newpage = NULL; rc = swap_page(page); From 21b4da78c941f292f6daf87abb562d6285216e51 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 10 Feb 2006 16:27:11 -0500 Subject: [PATCH 25/27] [PATCH] Fix s390 build failure. arch/s390/kernel/compat_signal.c:199: error: conflicting types for 'do_sigaction' include/linux/sched.h:1115: error: previous declaration of 'do_sigaction' was here Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds --- arch/s390/kernel/compat_signal.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index ef706694a0c1..5291b5f8788d 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -195,9 +195,6 @@ sys32_sigaction(int sig, const struct old_sigaction32 __user *act, return ret; } -int -do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact); - asmlinkage long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, struct sigaction32 __user *oact, size_t sigsetsize) From fd401aee6273e869e2711de498e28f5208184797 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 10 Feb 2006 21:50:43 +0000 Subject: [PATCH 26/27] [SERIAL] Remove incorrect code from ioc4 serial driver Serial drivers in general should not write uart_info->flags - they're private to serial_core. Serial drivers have no need to fiddle with tty->alt_speed, nor manipulate TTY_IO_ERROR in tty->flags. Fix the ioc4 serial driver for both these points by simply removing the offending code. Acked-by: pfg@sgi.com Signed-off-by: Russell King --- drivers/serial/ioc4_serial.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/serial/ioc4_serial.c b/drivers/serial/ioc4_serial.c index 1d85533d46d2..f3763d2ccb86 100644 --- a/drivers/serial/ioc4_serial.c +++ b/drivers/serial/ioc4_serial.c @@ -1717,11 +1717,9 @@ ioc4_change_speed(struct uart_port *the_port, } if (cflag & CRTSCTS) { - info->flags |= ASYNC_CTS_FLOW; port->ip_sscr |= IOC4_SSCR_HFC_EN; } else { - info->flags &= ~ASYNC_CTS_FLOW; port->ip_sscr &= ~IOC4_SSCR_HFC_EN; } writel(port->ip_sscr, &port->ip_serial_regs->sscr); @@ -1760,18 +1758,6 @@ static inline int ic4_startup_local(struct uart_port *the_port) info = the_port->info; - if (info->tty) { - set_bit(TTY_IO_ERROR, &info->tty->flags); - clear_bit(TTY_IO_ERROR, &info->tty->flags); - if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI) - info->tty->alt_speed = 57600; - if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI) - info->tty->alt_speed = 115200; - if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI) - info->tty->alt_speed = 230400; - if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP) - info->tty->alt_speed = 460800; - } local_open(port); /* set the speed of the serial port */ From e19816808346cc1619733532a267a11dce8f8a12 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 10 Feb 2006 22:40:51 +0000 Subject: [PATCH 27/27] [ARM] 3326/1: H1940 - Control latches Patch from Ben Dooks Define the bits for the two board control latches that control various items on the H1940 iPAQ. Signed-off-by: Ben Dooks Signed-off-by: Arnaud Patard Signed-off-by: Russell King --- arch/arm/mach-s3c2410/mach-h1940.c | 29 +++++++++- include/asm-arm/arch-s3c2410/h1940-latch.h | 64 ++++++++++++++++++++++ 2 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 include/asm-arm/arch-s3c2410/h1940-latch.h diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c index 1c316f14ed94..646a3a5d33a5 100644 --- a/arch/arm/mach-s3c2410/mach-h1940.c +++ b/arch/arm/mach-s3c2410/mach-h1940.c @@ -46,10 +46,11 @@ #include #include -//#include + #include #include +#include #include #include @@ -59,7 +60,12 @@ #include "cpu.h" static struct map_desc h1940_iodesc[] __initdata = { - /* nothing here yet */ + [0] = { + .virtual = (unsigned long)H1940_LATCH, + .pfn = __phys_to_pfn(H1940_PA_LATCH), + .length = SZ_16K, + .type = MT_DEVICE + }, }; #define UCON S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK @@ -92,6 +98,25 @@ static struct s3c2410_uartcfg h1940_uartcfgs[] = { } }; +/* Board control latch control */ + +static unsigned int latch_state = H1940_LATCH_DEFAULT; + +void h1940_latch_control(unsigned int clear, unsigned int set) +{ + unsigned long flags; + + local_irq_save(flags); + + latch_state &= ~clear; + latch_state |= set; + + __raw_writel(latch_state, H1940_LATCH); + + local_irq_restore(flags); +} + +EXPORT_SYMBOL_GPL(h1940_latch_control); /** diff --git a/include/asm-arm/arch-s3c2410/h1940-latch.h b/include/asm-arm/arch-s3c2410/h1940-latch.h new file mode 100644 index 000000000000..c5802411f43d --- /dev/null +++ b/include/asm-arm/arch-s3c2410/h1940-latch.h @@ -0,0 +1,64 @@ +/* linux/include/asm-arm/arch-s3c2410/h1940-latch.h + * + * (c) 2005 Simtec Electronics + * http://armlinux.simtec.co.uk/ + * Ben Dooks + * + * iPAQ H1940 series - latch definitions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#ifndef __ASM_ARCH_H1940_LATCH_H +#define __ASM_ARCH_H1940_LATCH_H + + +#ifndef __ASSEMBLY__ +#define H1940_LATCH ((void __iomem *)0xF8000000) +#else +#define H1940_LATCH 0xF8000000 +#endif + +#define H1940_PA_LATCH (S3C2410_CS2) + +/* SD layer latch */ + +#define H1940_LATCH_SDQ1 (1<<16) +#define H1940_LATCH_LCD_P1 (1<<17) +#define H1940_LATCH_LCD_P2 (1<<18) +#define H1940_LATCH_LCD_P3 (1<<19) +#define H1940_LATCH_MAX1698_nSHUTDOWN (1<<20) /* LCD backlight */ +#define H1940_LATCH_LED_RED (1<<21) +#define H1940_LATCH_SDQ7 (1<<22) +#define H1940_LATCH_USB_DP (1<<23) + +/* CPU layer latch */ + +#define H1940_LATCH_UDA_POWER (1<<24) +#define H1940_LATCH_AUDIO_POWER (1<<25) +#define H1940_LATCH_SM803_ENABLE (1<<26) +#define H1940_LATCH_LCD_P4 (1<<27) +#define H1940_LATCH_CPUQ5 (1<<28) /* untraced */ +#define H1940_LATCH_BLUETOOTH_POWER (1<<29) /* active high */ +#define H1940_LATCH_LED_GREEN (1<<30) +#define H1940_LATCH_LED_FLASH (1<<31) + +/* default settings */ + +#define H1940_LATCH_DEFAULT \ + H1940_LATCH_LCD_P4 | \ + H1940_LATCH_SM803_ENABLE | \ + H1940_LATCH_SDQ1 | \ + H1940_LATCH_LCD_P1 | \ + H1940_LATCH_LCD_P2 | \ + H1940_LATCH_LCD_P3 | \ + H1940_LATCH_MAX1698_nSHUTDOWN | \ + H1940_LATCH_CPUQ5 + +/* control functions */ + +extern void h1940_latch_control(unsigned int clear, unsigned int set); + +#endif /* __ASM_ARCH_H1940_LATCH_H */