From dde655c9df02ee07ed090dfdb7ae8741bf299e14 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 29 Nov 2007 21:51:36 +1100 Subject: [PATCH 01/27] [SUNGEM]: Fix NAPI regression with reset work sungem's gem_reset_task() will unconditionally try to disable NAPI even when it's called while the interface is not operating and hence the NAPI struct isn't enabled. Make napi_disable() depend on gp->running. Also removes a superfluous test of gp->running in the same function. Signed-off-by: Johannes Berg Signed-off-by: Herbert Xu --- drivers/net/sungem.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index f6fedcc32de1..68872142530b 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -2281,14 +2281,12 @@ static void gem_reset_task(struct work_struct *work) mutex_lock(&gp->pm_mutex); - napi_disable(&gp->napi); + if (gp->opened) + napi_disable(&gp->napi); spin_lock_irq(&gp->lock); spin_lock(&gp->tx_lock); - if (gp->running == 0) - goto not_running; - if (gp->running) { netif_stop_queue(gp->dev); @@ -2298,13 +2296,14 @@ static void gem_reset_task(struct work_struct *work) gem_set_link_modes(gp); netif_wake_queue(gp->dev); } - not_running: + gp->reset_task_pending = 0; spin_unlock(&gp->tx_lock); spin_unlock_irq(&gp->lock); - napi_enable(&gp->napi); + if (gp->opened) + napi_enable(&gp->napi); mutex_unlock(&gp->pm_mutex); } From 6ab3b487db77fa98a24560f11a5a8e744b98d877 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Thu, 29 Nov 2007 22:16:41 +1100 Subject: [PATCH 02/27] [VLAN]: Fix nested VLAN transmit bug Fix misbehavior of vlan_dev_hard_start_xmit() for recursive encapsulations. Signed-off-by: Joonwoo Park Signed-off-by: Herbert Xu --- net/8021q/vlan_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 7a36878241da..4f99bb86af5c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -462,7 +462,8 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ - if (veth->h_vlan_proto != htons(ETH_P_8021Q)) { + if (veth->h_vlan_proto != htons(ETH_P_8021Q) || + VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR) { int orig_headroom = skb_headroom(skb); unsigned short veth_TCI; From 0a11225887fe6cbccd882404dc36ddc50f47daf9 Mon Sep 17 00:00:00 2001 From: Florian Zumbiehl Date: Thu, 29 Nov 2007 23:19:23 +1100 Subject: [PATCH 03/27] [UNIX]: EOF on non-blocking SOCK_SEQPACKET I am not absolutely sure whether this actually is a bug (as in: I've got no clue what the standards say or what other implementations do), but at least I was pretty surprised when I noticed that a recv() on a non-blocking unix domain socket of type SOCK_SEQPACKET (which is connection oriented, after all) where the remote end has closed the connection returned -1 (EAGAIN) rather than 0 to indicate end of file. This is a test case: | #include | #include | #include | #include | #include | #include | #include | | int main(){ | int sock; | struct sockaddr_un addr; | char buf[4096]; | int pfds[2]; | | pipe(pfds); | sock=socket(PF_UNIX,SOCK_SEQPACKET,0); | addr.sun_family=AF_UNIX; | strcpy(addr.sun_path,"/tmp/foobar_testsock"); | bind(sock,(struct sockaddr *)&addr,sizeof(addr)); | listen(sock,1); | if(fork()){ | close(sock); | sock=socket(PF_UNIX,SOCK_SEQPACKET,0); | connect(sock,(struct sockaddr *)&addr,sizeof(addr)); | fcntl(sock,F_SETFL,fcntl(sock,F_GETFL)|O_NONBLOCK); | close(pfds[1]); | read(pfds[0],buf,sizeof(buf)); | recv(sock,buf,sizeof(buf),0); // <-- this one | }else accept(sock,NULL,NULL); | exit(0); | } If you try it, make sure /tmp/foobar_testsock doesn't exist. The marked recv() returns -1 (EAGAIN) on 2.6.23.9. Below you find a patch that fixes that. Signed-off-by: Florian Zumbiehl Signed-off-by: Herbert Xu --- net/unix/af_unix.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e835da8fc091..060bba4567d2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1637,8 +1637,15 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, mutex_lock(&u->readlock); skb = skb_recv_datagram(sk, flags, noblock, &err); - if (!skb) + if (!skb) { + unix_state_lock(sk); + /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ + if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && + (sk->sk_shutdown & RCV_SHUTDOWN)) + err = 0; + unix_state_unlock(sk); goto out_unlock; + } wake_up_interruptible_sync(&u->peer_wait); From 17efdd45755c0eb8d1418a1368ef7c7ebbe98c6e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 29 Nov 2007 23:41:43 +1100 Subject: [PATCH 04/27] [BRIDGE]: Lost call to br_fdb_fini() in br_init() error path In case the br_netfilter_init() (or any subsequent call) fails, the br_fdb_fini() must be called to free the allocated in br_fdb_init() br_fdb_cache kmem cache. Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu --- net/bridge/br.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br.c b/net/bridge/br.c index 93867bb6cc97..a90182873120 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -39,7 +39,7 @@ static int __init br_init(void) err = br_fdb_init(); if (err) - goto err_out1; + goto err_out; err = br_netfilter_init(); if (err) @@ -65,6 +65,8 @@ err_out3: err_out2: br_netfilter_fini(); err_out1: + br_fdb_fini(); +err_out: llc_sap_put(br_stp_sap); return err; } From 82de382ce8e1c7645984616728dc7aaa057821e4 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 29 Nov 2007 23:58:58 +1100 Subject: [PATCH 05/27] [BRIDGE]: Properly dereference the br_should_route_hook This hook is protected with the RCU, so simple if (br_should_route_hook) br_should_route_hook(...) is not enough on some architectures. Use the rcu_dereference/rcu_assign_pointer in this case. Fixed Stephen's comment concerning using the typeof(). Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu --- net/bridge/br_input.c | 7 ++++--- net/bridge/netfilter/ebtable_broute.c | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 3cedd4eeeed6..0ee79a726d91 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -122,6 +122,7 @@ static inline int is_link_local(const unsigned char *dest) struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; + int (*rhook)(struct sk_buff *skb); if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; @@ -147,9 +148,9 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) switch (p->state) { case BR_STATE_FORWARDING: - - if (br_should_route_hook) { - if (br_should_route_hook(skb)) + rhook = rcu_dereference(br_should_route_hook); + if (rhook != NULL) { + if (rhook(skb)) return skb; dest = eth_hdr(skb)->h_dest; } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index e44519ebf1d2..be6f18681053 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -70,13 +70,13 @@ static int __init ebtable_broute_init(void) if (ret < 0) return ret; /* see br_input.c */ - br_should_route_hook = ebt_broute; + rcu_assign_pointer(br_should_route_hook, ebt_broute); return ret; } static void __exit ebtable_broute_fini(void) { - br_should_route_hook = NULL; + rcu_assign_pointer(br_should_route_hook, NULL); synchronize_net(); ebt_unregister_table(&broute_table); } From 076931989fe96823a577259cc6bc205d7ec31754 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 30 Nov 2007 00:08:14 +1100 Subject: [PATCH 06/27] [INET]: Fix inet_diag register vs rcv race The following race is possible when one cpu unregisters the handler while other one is trying to receive a message and call this one: CPU1: CPU2: inet_diag_rcv() inet_diag_unregister() mutex_lock(&inet_diag_mutex); netlink_rcv_skb(skb, &inet_diag_rcv_msg); if (inet_diag_table[nlh->nlmsg_type] == NULL) /* false handler is still registered */ ... netlink_dump_start(idiagnl, skb, nlh, inet_diag_dump, NULL); cb = kzalloc(sizeof(*cb), GFP_KERNEL); /* sleep here freeing memory * or preempt * or sleep later on nlk->cb_mutex */ spin_lock(&inet_diag_register_lock); inet_diag_table[type] = NULL; ... spin_unlock(&inet_diag_register_lock); synchronize_rcu(); /* CPU1 is sleeping - RCU quiescent * state is passed */ return; /* inet_diag_dump is finally called: */ inet_diag_dump() handler = inet_diag_table[cb->nlh->nlmsg_type]; BUG_ON(handler == NULL); /* OOPS! While we slept the unregister has set * handler to NULL :( */ Grep showed, that the register/unregister functions are called from init/fini module callbacks for tcp_/dccp_diag, so it's OK to use the inet_diag_mutex to synchronize manipulations with the inet_diag_table and the access to it. Besides, as Herbert pointed out, asynchronous dumps should hold this mutex as well, and thus, we provide the mutex as cb_mutex one. Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu --- net/ipv4/inet_diag.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index b0170732b5e9..6b3fffb554b6 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -853,8 +853,6 @@ static void inet_diag_rcv(struct sk_buff *skb) mutex_unlock(&inet_diag_mutex); } -static DEFINE_SPINLOCK(inet_diag_register_lock); - int inet_diag_register(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; @@ -863,13 +861,13 @@ int inet_diag_register(const struct inet_diag_handler *h) if (type >= INET_DIAG_GETSOCK_MAX) goto out; - spin_lock(&inet_diag_register_lock); + mutex_lock(&inet_diag_mutex); err = -EEXIST; if (inet_diag_table[type] == NULL) { inet_diag_table[type] = h; err = 0; } - spin_unlock(&inet_diag_register_lock); + mutex_unlock(&inet_diag_mutex); out: return err; } @@ -882,11 +880,9 @@ void inet_diag_unregister(const struct inet_diag_handler *h) if (type >= INET_DIAG_GETSOCK_MAX) return; - spin_lock(&inet_diag_register_lock); + mutex_lock(&inet_diag_mutex); inet_diag_table[type] = NULL; - spin_unlock(&inet_diag_register_lock); - - synchronize_rcu(); + mutex_unlock(&inet_diag_mutex); } EXPORT_SYMBOL_GPL(inet_diag_unregister); @@ -901,7 +897,7 @@ static int __init inet_diag_init(void) goto out; idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, - inet_diag_rcv, NULL, THIS_MODULE); + inet_diag_rcv, &inet_diag_mutex, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; err = 0; From 5e5234ff17ef98932688116025b30958bd28a940 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 30 Nov 2007 00:50:31 +1100 Subject: [PATCH 07/27] [IPSEC]: Fix uninitialised dst warning in __xfrm_lookup Andrew Morton reported that __xfrm_lookup generates this warning: net/xfrm/xfrm_policy.c: In function '__xfrm_lookup': net/xfrm/xfrm_policy.c:1449: warning: 'dst' may be used uninitialized in this function This is because if policy->action is of an unexpected value then dst will not be initialised. Of course, in practice this should never happen since the input layer xfrm_user/af_key will filter out all illegal values. But the compiler doesn't know that of course. So this patch fixes this by taking the conservative approach and treat all unknown actions the same as a blocking action. Thanks to Andrew for finding this and providing an initial fix. Signed-off-by: Herbert Xu --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b702bd8a3893..9a4cf2e45a15 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1344,6 +1344,7 @@ restart: xfrm_nr += pols[0]->xfrm_nr; switch (policy->action) { + default: case XFRM_POLICY_BLOCK: /* Prohibit the flow */ err = -EPERM; From a357dde9df33f28611e6a3d4f88265e39bcc8880 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 30 Nov 2007 01:10:55 +1100 Subject: [PATCH 08/27] [TCP] illinois: Incorrect beta usage Lachlan Andrew observed that my TCP-Illinois implementation uses the beta value incorrectly: The parameter beta in the paper specifies the amount to decrease *by*: that is, on loss, W <- W - beta*W but in tcp_illinois_ssthresh() uses beta as the amount to decrease *to*: W <- beta*W This bug makes the Linux TCP-Illinois get less-aggressive on uncongested network, hurting performance. Note: since the base beta value is .5, it has no impact on a congested network. Signed-off-by: Stephen Hemminger Signed-off-by: Herbert Xu --- net/ipv4/tcp_illinois.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 64f1cbaf96e8..5aa5f5496d6d 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -298,7 +298,7 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) struct illinois *ca = inet_csk_ca(sk); /* Multiplicative decrease */ - return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U); + return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); } From fd10279bc7405c4f1e47a008686d3d9ad71d7f6d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2007 12:13:32 -0500 Subject: [PATCH 09/27] SCTP: Fix the number of HB transmissions. Our treatment of Heartbeats is special in that the inital HB chunk counts against the error count for the association, where as for other chunks, only retransmissions or timeouts count against us. As a result, we had an off-by-1 situation with a number of Heartbeats we could send. Signed-off-by: Vlad Yasevich --- net/sctp/sm_statefuns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b8bbb960723c..5fb84778846d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -959,7 +959,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, { struct sctp_transport *transport = (struct sctp_transport *) arg; - if (asoc->overall_error_count >= asoc->max_retrans) { + if (asoc->overall_error_count > asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ From 9baffaa689a50ef9480ecd9017ffd1480c807328 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 29 Nov 2007 08:44:34 -0500 Subject: [PATCH 10/27] SCTP: Fix SCTP-AUTH to correctly add HMACS paramter. There was a typo that cleared the HMACS parameters when no authenticated chunks were specified. We whould be clearing the chunks pointer instead of the hmacs. Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 5a9783c38de1..a139469792a3 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -243,7 +243,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, if (auth_chunks->length) chunksize += ntohs(auth_chunks->length); else - auth_hmacs = NULL; + auth_chunks = NULL; extensions[num_ext] = SCTP_CID_AUTH; num_ext += 1; From 8ee4be37e8ac28e79ae673d441e83c1f51e7ecfd Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 29 Nov 2007 08:50:35 -0500 Subject: [PATCH 11/27] SCTP: Fix the supported extensions paramter Supported extensions parameter was not coded right and ended up over-writing memory or causing skb overflows. First, remove the FWD_TSN support from as it shouldn't be there and also fix the paramter encoding. Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index a139469792a3..f4876291bb5e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -77,6 +77,8 @@ static int sctp_process_param(struct sctp_association *asoc, union sctp_params param, const union sctp_addr *peer_addr, gfp_t gfp); +static void *sctp_addto_param(struct sctp_chunk *chunk, int len, + const void *data); /* What was the inbound interface for this chunk? */ int sctp_chunk_iif(const struct sctp_chunk *chunk) @@ -207,11 +209,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types); chunksize += sizeof(ecap_param); - if (sctp_prsctp_enable) { - chunksize += sizeof(prsctp_param); - extensions[num_ext] = SCTP_CID_FWD_TSN; - num_ext += 1; - } + /* ADDIP: Section 4.2.7: * An implementation supporting this extension [ADDIP] MUST list * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and @@ -297,7 +295,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, htons(sizeof(sctp_supported_ext_param_t) + num_ext); sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), &ext_param); - sctp_addto_chunk(retval, num_ext, extensions); + sctp_addto_param(retval, num_ext, extensions); } if (sctp_prsctp_enable) @@ -371,20 +369,12 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, if (asoc->peer.ecn_capable) chunksize += sizeof(ecap_param); - /* Tell peer that we'll do PR-SCTP only if peer advertised. */ - if (asoc->peer.prsctp_capable) { - chunksize += sizeof(prsctp_param); - extensions[num_ext] = SCTP_CID_FWD_TSN; - num_ext += 1; - } - if (sctp_addip_enable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; num_ext += 2; } - chunksize += sizeof(ext_param) + num_ext; chunksize += sizeof(aiparam); if (asoc->peer.auth_capable) { @@ -407,6 +397,9 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, num_ext += 1; } + if (num_ext) + chunksize += sizeof(sctp_supported_ext_param_t) + num_ext; + /* Now allocate and fill out the chunk. */ retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize); if (!retval) @@ -428,7 +421,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, htons(sizeof(sctp_supported_ext_param_t) + num_ext); sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), &ext_param); - sctp_addto_chunk(retval, num_ext, extensions); + sctp_addto_param(retval, num_ext, extensions); } if (asoc->peer.prsctp_capable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); From 555d3d5d2be13675490a80df0d7961551822ef1f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 29 Nov 2007 08:56:16 -0500 Subject: [PATCH 12/27] SCTP: Fix chunk acceptance when no authenticated chunks were listed. In the case where no autheticated chunks were specified, we were still trying to verify that a given chunk needs authentication and doing so incorrectly. Add a check for parameter length to make sure we don't try to use an empty auth_chunks parameter to verify against. Signed-off-by: Vlad Yasevich --- net/sctp/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 6d5fa6bb371b..6d89e35307aa 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -631,7 +631,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) int found = 0; int i; - if (!param) + if (!param || param->param_hdr.length == 0) return 0; len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t); From b7e0fe9f81e19c4f2a1369b324c3c062c1738be4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 29 Nov 2007 09:53:52 -0500 Subject: [PATCH 13/27] SCTP: Fix build issues with SCTP AUTH. SCTP-AUTH requires selection of CRYPTO, HMAC and SHA1 since SHA1 is a MUST requirement for AUTH. We also support SHA256, but that's optional, so fix the code to treat it as such. Signed-off-by: Vlad Yasevich --- include/net/sctp/constants.h | 9 ++++++--- net/sctp/Kconfig | 6 +++--- net/sctp/auth.c | 2 ++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index f30b537d6952..05f22a6afbcd 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -441,11 +441,14 @@ enum { SCTP_AUTH_HMAC_ID_RESERVED_0, SCTP_AUTH_HMAC_ID_SHA1, SCTP_AUTH_HMAC_ID_RESERVED_2, - SCTP_AUTH_HMAC_ID_SHA256 +#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE) + SCTP_AUTH_HMAC_ID_SHA256, +#endif + __SCTP_AUTH_HMAC_MAX }; -#define SCTP_AUTH_HMAC_ID_MAX SCTP_AUTH_HMAC_ID_SHA256 -#define SCTP_AUTH_NUM_HMACS (SCTP_AUTH_HMAC_ID_SHA256 + 1) +#define SCTP_AUTH_HMAC_ID_MAX __SCTP_AUTH_HMAC_MAX - 1 +#define SCTP_AUTH_NUM_HMACS __SCTP_AUTH_HMAC_MAX #define SCTP_SHA1_SIG_SIZE 20 #define SCTP_SHA256_SIG_SIZE 32 diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 8210f549c492..5390bc792159 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -6,9 +6,9 @@ menuconfig IP_SCTP tristate "The SCTP Protocol (EXPERIMENTAL)" depends on INET && EXPERIMENTAL depends on IPV6 || IPV6=n - select CRYPTO if SCTP_HMAC_SHA1 || SCTP_HMAC_MD5 - select CRYPTO_HMAC if SCTP_HMAC_SHA1 || SCTP_HMAC_MD5 - select CRYPTO_SHA1 if SCTP_HMAC_SHA1 + select CRYPTO + select CRYPTO_HMAC + select CRYPTO_SHA1 select CRYPTO_MD5 if SCTP_HMAC_MD5 ---help--- Stream Control Transmission Protocol diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 6d89e35307aa..97e6ebd14500 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -54,11 +54,13 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { /* id 2 is reserved as well */ .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2, }, +#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE) { .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, .hmac_name="hmac(sha256)", .hmac_len = SCTP_SHA256_SIG_SIZE, } +#endif }; From a10605e599a7873417043fe2bb559abe719f8a1a Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 22 Nov 2007 11:10:22 +0800 Subject: [PATCH 14/27] mac80211: free ifsta->extra_ie and clear IEEE80211_STA_PRIVACY_INVOKED I'm not sure if this is best choice, someone might have better solutions. But this patch fixed the connection problem when switching from a WPA enabled AP (using wpa_supplicant) to an open AP (using iwconfig). The root cause is when we connect to a WPA enabled AP, wpa_supplicant sets the ifsta->extra_ie thru SIOCSIWGENIE. But if we stop wpa_supplicant and connect to an open AP with iwconfig, there is no way to clear the extra_ie so that mac80211 keeps connecting with that. Someone could argue wpa_supplicant should clear the extra_ie during its shutdown. But mac80211 should also handle the unexpected shutdown case (ie. killall -9 wpa_supplicant). On Wed, 2007-11-21 at 16:19 +0100, Johannes Berg wrote: > Yeah. Can you amend the patch to also clear the > IEEE80211_STA_PRIVACY_INVOKED flag? Signed-off-by: Zhu Yi Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index e0ee65a969bc..8f11c97f13d2 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -334,6 +334,11 @@ static int ieee80211_stop(struct net_device *dev) cancel_delayed_work(&local->scan_work); } flush_workqueue(local->hw.workqueue); + + sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; + kfree(sdata->u.sta.extra_ie); + sdata->u.sta.extra_ie = NULL; + sdata->u.sta.extra_ie_len = 0; /* fall through */ default: conf.if_id = dev->ifindex; From 01e1f045e65b683fe7203b1e16a915bbb94c15fe Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 22 Nov 2007 22:03:42 +0000 Subject: [PATCH 15/27] ieee80211: fix unaligned access in ieee80211_copy_snap There is no guarantee that data+SNAP_SIZE will reside on an even numbered address, so doing a 16 bit read will cause an unaligned access in some situations. Based on a patch from Jun Sun. Signed-off-by: Daniel Drake Signed-off-by: John W. Linville --- net/ieee80211/ieee80211_tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index a4c3c51140a3..6d06f1385e28 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -144,7 +144,8 @@ static int ieee80211_copy_snap(u8 * data, u16 h_proto) snap->oui[1] = oui[1]; snap->oui[2] = oui[2]; - *(u16 *) (data + SNAP_SIZE) = htons(h_proto); + h_proto = htons(h_proto); + memcpy(data + SNAP_SIZE, &h_proto, sizeof(u16)); return SNAP_SIZE + sizeof(u16); } From 8b393f1dc76acbe65a97a4e51f8144f4a65fa1c9 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Wed, 28 Nov 2007 01:57:08 -0500 Subject: [PATCH 16/27] mac80211: Fix behavior of ieee80211_open and ieee80211_close This patch fixes: - Incorrect calls to ieee80211_hw_config when the radiotap flag is set. - Failure to actually unset the radiotap flag when all monitors are down. - Failure to call ieee80211_hw_config after successful interface start. Signed-off-by: Michael Wu Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 8f11c97f13d2..0dc114c4ef2b 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -216,6 +216,7 @@ static int ieee80211_open(struct net_device *dev) res = local->ops->start(local_to_hw(local)); if (res) return res; + ieee80211_hw_config(local); } switch (sdata->type) { @@ -232,7 +233,6 @@ static int ieee80211_open(struct net_device *dev) netif_tx_unlock_bh(local->mdev); local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - ieee80211_hw_config(local); } break; case IEEE80211_IF_TYPE_STA: @@ -311,8 +311,7 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_configure_filter(local); netif_tx_unlock_bh(local->mdev); - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - ieee80211_hw_config(local); + local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; } break; case IEEE80211_IF_TYPE_STA: From 8312512e81ab16d845b49d1ec695fad1c72f19f6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Nov 2007 11:07:57 +0100 Subject: [PATCH 17/27] mac80211: drop unencrypted frames if encryption is expected This patch fixes a regression I (most likely) introduced, namely that unencrypted frames are right now accepted even if we have a key for that specific sender. That has very bad security implications. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 428a9fcf57d6..00f908d9275e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -997,7 +997,7 @@ ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx) if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && - rx->sdata->drop_unencrypted && + (rx->key || rx->sdata->drop_unencrypted) && (rx->sdata->eapol == 0 || !ieee80211_is_eapol(rx->skb)))) { if (net_ratelimit()) printk(KERN_DEBUG "%s: RX non-WEP frame, but expected " From 7f4c534178722ac9ffb4feae3a4d54e3fbe3f22c Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Wed, 28 Nov 2007 17:49:34 +0100 Subject: [PATCH 18/27] rfkill: fix double-mutex-locking rfkill_toggle_radio is called from functions where rfkill->mutex is already aquired. Remove the lock from rfkill_toggle_radio() and add it to the only calling function that calls it without the lock held. Signed-off-by: Michael Buesch Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- net/rfkill/rfkill.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 73d60a307129..4469a7be006c 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -60,11 +60,7 @@ static void rfkill_led_trigger(struct rfkill *rfkill, static int rfkill_toggle_radio(struct rfkill *rfkill, enum rfkill_state state) { - int retval; - - retval = mutex_lock_interruptible(&rfkill->mutex); - if (retval) - return retval; + int retval = 0; if (state != rfkill->state) { retval = rfkill->toggle_radio(rfkill->data, state); @@ -74,7 +70,6 @@ static int rfkill_toggle_radio(struct rfkill *rfkill, } } - mutex_unlock(&rfkill->mutex); return retval; } @@ -158,12 +153,13 @@ static ssize_t rfkill_state_store(struct device *dev, if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (mutex_lock_interruptible(&rfkill->mutex)) + return -ERESTARTSYS; error = rfkill_toggle_radio(rfkill, state ? RFKILL_STATE_ON : RFKILL_STATE_OFF); - if (error) - return error; + mutex_unlock(&rfkill->mutex); - return count; + return error ? error : count; } static ssize_t rfkill_claim_show(struct device *dev, From 53cb4791c156908ae634de31949f7f25f8de002b Mon Sep 17 00:00:00 2001 From: Adel Gadllah Date: Thu, 29 Nov 2007 17:09:41 +0100 Subject: [PATCH 19/27] mac80211: rate limit wep decrypt failed messages The attached patch rate limits "WEP decrypt failed (ICV)" to avoid flooding the logfiles. Signed-off-by: Adel Gadllah Signed-off-by: John W. Linville --- net/mac80211/wep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 9bf0e1cc530a..b5f3413403bd 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -265,7 +265,8 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, skb->data + hdrlen + WEP_IV_LEN, len)) { - printk(KERN_DEBUG "WEP decrypt failed (ICV)\n"); + if (net_ratelimit()) + printk(KERN_DEBUG "WEP decrypt failed (ICV)\n"); ret = -1; } From d5a784b3719ae364f49ecff12a0248f6e4252720 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Nov 2007 23:06:40 +1100 Subject: [PATCH 20/27] [RXRPC]: Add missing select on CRYPTO AF_RXRPC uses the crypto services, so should depend on or select CRYPTO. Signed-off-by: David Howells Signed-off-by: Herbert Xu --- net/rxrpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index e662f1d07664..0d3103c4f11c 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -5,6 +5,7 @@ config AF_RXRPC tristate "RxRPC session sockets" depends on INET && EXPERIMENTAL + select CRYPTO select KEYS help Say Y or M here to include support for RxRPC session sockets (just From d31c7b8fa303eb81311f27b80595b8d2cbeef950 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Fri, 30 Nov 2007 23:36:08 +1100 Subject: [PATCH 21/27] [IPV6]: Restore IPv6 when MTU is big enough Avaid provided test application, so bug got fixed. IPv6 addrconf removes ipv6 inner device from netdev each time cmu changes and new value is less than IPV6_MIN_MTU (1280 bytes). When mtu is changed and new value is greater than IPV6_MIN_MTU, it does not add ipv6 addresses and inner device bac. This patch fixes that. Tested with Avaid's application, which works ok now. Signed-off-by: Evgeniy Polyakov Signed-off-by: Herbert Xu --- net/ipv6/addrconf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 567664eac463..e8c347579da9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2293,6 +2293,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; } + if (!idev && dev->mtu >= IPV6_MIN_MTU) + idev = ipv6_add_dev(dev); + if (idev) idev->if_flags |= IF_READY; } else { @@ -2357,12 +2360,18 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGEMTU: - if ( idev && dev->mtu >= IPV6_MIN_MTU) { + if (idev && dev->mtu >= IPV6_MIN_MTU) { rt6_mtu_change(dev, dev->mtu); idev->cnf.mtu6 = dev->mtu; break; } + if (!idev && dev->mtu >= IPV6_MIN_MTU) { + idev = ipv6_add_dev(dev); + if (idev) + break; + } + /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ case NETDEV_DOWN: From 3ccd86241b277249d5ac08e91eddfade47184520 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 30 Nov 2007 23:43:31 +1100 Subject: [PATCH 22/27] [DECNET]: dn_nl_deladdr() almost always returns no error As far as I see from the err variable initialization the dn_nl_deladdr() routine was designed to report errors like "EADDRNOTAVAIL" and probaby "ENODEV". But the code sets this err to 0 after the first nlmsg_parse and goes on, returning this 0 in any case. Signed-off-by: Pavel Emelyanov Acked-by: Steven Whitehouse Signed-off-by: Herbert Xu --- net/decnet/dn_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 66e266fb5908..3bc82dc83b38 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -651,16 +651,18 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct dn_dev *dn_db; struct ifaddrmsg *ifm; struct dn_ifaddr *ifa, **ifap; - int err = -EADDRNOTAVAIL; + int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); if (err < 0) goto errout; + err = -ENODEV; ifm = nlmsg_data(nlh); if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL) goto errout; + err = -EADDRNOTAVAIL; for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2)) From 9dc0564e862b1b9a4677dec2c736b12169e03e99 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 30 Nov 2007 23:58:03 +1100 Subject: [PATCH 23/27] [NETFILTER]: xt_TCPMSS: remove network triggerable WARN_ON ipv6_skip_exthdr() returns -1 for invalid packets. don't WARN_ON that. Signed-off-by: Patrick McHardy Signed-off-by: Herbert Xu --- net/netfilter/xt_TCPMSS.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 07435a602b11..8e76d1f52fbe 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -174,10 +174,8 @@ xt_tcpmss_target6(struct sk_buff *skb, nexthdr = ipv6h->nexthdr; tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); - if (tcphoff < 0) { - WARN_ON(1); + if (tcphoff < 0) return NF_DROP; - } ret = tcpmss_mangle_packet(skb, targinfo, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) From 67b4af297033f5f65999885542f95ba7b562848a Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 1 Dec 2007 00:01:50 +1100 Subject: [PATCH 24/27] [NETFILTER]: fix forgotten module release in xt_CONNMARK and xt_CONNSECMARK Fix forgotten module release in xt_CONNMARK and xt_CONNSECMARK When xt_CONNMARK is used outside the mangle table and the user specified "--restore-mark", the connmark_tg_check() function will (correctly) error out, but (incorrectly) forgets to release the L3 conntrack module. Same for xt_CONNSECMARK. Fix is to move the call to acquire the L3 module after the basic constraint checks. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: Herbert Xu --- net/netfilter/xt_CONNMARK.c | 10 +++++----- net/netfilter/xt_CONNSECMARK.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 856793e8db7a..0621ca7de3b0 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -86,11 +86,6 @@ checkentry(const char *tablename, { const struct xt_connmark_target_info *matchinfo = targinfo; - if (nf_ct_l3proto_try_module_get(target->family) < 0) { - printk(KERN_WARNING "can't load conntrack support for " - "proto=%d\n", target->family); - return false; - } if (matchinfo->mode == XT_CONNMARK_RESTORE) { if (strcmp(tablename, "mangle") != 0) { printk(KERN_WARNING "CONNMARK: restore can only be " @@ -103,6 +98,11 @@ checkentry(const char *tablename, printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); return false; } + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return false; + } return true; } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 021b5c8d20e2..d8feba9bdb48 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -90,11 +90,6 @@ static bool checkentry(const char *tablename, const void *entry, { const struct xt_connsecmark_target_info *info = targinfo; - if (nf_ct_l3proto_try_module_get(target->family) < 0) { - printk(KERN_WARNING "can't load conntrack support for " - "proto=%d\n", target->family); - return false; - } switch (info->mode) { case CONNSECMARK_SAVE: case CONNSECMARK_RESTORE: @@ -105,6 +100,11 @@ static bool checkentry(const char *tablename, const void *entry, return false; } + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return false; + } return true; } From e03ba84adb62fbc6049325a5bc00ef6932fa5e39 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 1 Dec 2007 00:03:52 +1100 Subject: [PATCH 25/27] [TEXTSEARCH]: Do not allow zero length patterns in the textsearch infrastructure If a zero length pattern is passed then return EINVAL. Avoids infinite loops (bm) or invalid memory accesses (kmp). Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: Herbert Xu --- lib/textsearch.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/textsearch.c b/lib/textsearch.c index 88c98a2ec8d9..be8bda3862f5 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. * * Authors: Thomas Graf - * Pablo Neira Ayuso + * Pablo Neira Ayuso * * ========================================================================== * @@ -250,7 +250,8 @@ unsigned int textsearch_find_continuous(struct ts_config *conf, * the various search algorithms. * * Returns a new textsearch configuration according to the specified - * parameters or a ERR_PTR(). + * parameters or a ERR_PTR(). If a zero length pattern is passed, this + * function returns EINVAL. */ struct ts_config *textsearch_prepare(const char *algo, const void *pattern, unsigned int len, gfp_t gfp_mask, int flags) @@ -259,6 +260,9 @@ struct ts_config *textsearch_prepare(const char *algo, const void *pattern, struct ts_config *conf; struct ts_ops *ops; + if (len == 0) + return ERR_PTR(-EINVAL); + ops = lookup_ts_algo(algo); #ifdef CONFIG_KMOD /* From 2b1e300a9dfc3196ccddf6f1d74b91b7af55e416 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 2 Dec 2007 00:33:17 +1100 Subject: [PATCH 26/27] [NETNS]: Fix /proc/net breakage Well I clearly goofed when I added the initial network namespace support for /proc/net. Currently things work but there are odd details visible to user space, even when we have a single network namespace. Since we do not cache proc_dir_entry dentries at the moment we can just modify ->lookup to return a different directory inode depending on the network namespace of the process looking at /proc/net, replacing the current technique of using a magic and fragile follow_link method. To accomplish that this patch: - introduces a shadow_proc method to allow different dentries to be returned from proc_lookup. - Removes the old /proc/net follow_link magic - Fixes a weakness in our not caching of proc generic dentries. As shadow_proc uses a task struct to decided which dentry to return we can go back later and fix the proc generic caching without modifying any code that uses the shadow_proc method. Signed-off-by: Eric W. Biederman Cc: "Rafael J. Wysocki" Cc: Pavel Machek Cc: Pavel Emelyanov Cc: "David S. Miller" Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Herbert Xu --- fs/proc/generic.c | 12 +++++- fs/proc/proc_net.c | 86 +++-------------------------------------- include/linux/proc_fs.h | 3 ++ 3 files changed, 19 insertions(+), 82 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index a9806bc21ec3..c2b752341f89 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -374,9 +374,16 @@ static int proc_delete_dentry(struct dentry * dentry) return 1; } +static int proc_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) +{ + d_drop(dentry); + return 0; +} + static struct dentry_operations proc_dentry_operations = { .d_delete = proc_delete_dentry, + .d_revalidate = proc_revalidate_dentry, }; /* @@ -397,8 +404,11 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam if (de->namelen != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { - unsigned int ino = de->low_ino; + unsigned int ino; + if (de->shadow_proc) + de = de->shadow_proc(current, de); + ino = de->low_ino; de_get(de); spin_unlock(&proc_subdir_lock); error = -EINVAL; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 131f9c68be5f..0afe21ee0607 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -50,89 +50,14 @@ struct net *get_proc_net(const struct inode *inode) } EXPORT_SYMBOL_GPL(get_proc_net); -static struct proc_dir_entry *proc_net_shadow; +static struct proc_dir_entry *shadow_pde; -static struct dentry *proc_net_shadow_dentry(struct dentry *parent, +static struct proc_dir_entry *proc_net_shadow(struct task_struct *task, struct proc_dir_entry *de) { - struct dentry *shadow = NULL; - struct inode *inode; - if (!de) - goto out; - de_get(de); - inode = proc_get_inode(parent->d_inode->i_sb, de->low_ino, de); - if (!inode) - goto out_de_put; - shadow = d_alloc_name(parent, de->name); - if (!shadow) - goto out_iput; - shadow->d_op = parent->d_op; /* proc_dentry_operations */ - d_instantiate(shadow, inode); -out: - return shadow; -out_iput: - iput(inode); -out_de_put: - de_put(de); - goto out; + return task->nsproxy->net_ns->proc_net; } -static void *proc_net_follow_link(struct dentry *parent, struct nameidata *nd) -{ - struct net *net = current->nsproxy->net_ns; - struct dentry *shadow; - shadow = proc_net_shadow_dentry(parent, net->proc_net); - if (!shadow) - return ERR_PTR(-ENOENT); - - dput(nd->dentry); - /* My dentry count is 1 and that should be enough as the - * shadow dentry is thrown away immediately. - */ - nd->dentry = shadow; - return NULL; -} - -static struct dentry *proc_net_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct net *net = current->nsproxy->net_ns; - struct dentry *shadow; - - shadow = proc_net_shadow_dentry(nd->dentry, net->proc_net); - if (!shadow) - return ERR_PTR(-ENOENT); - - dput(nd->dentry); - nd->dentry = shadow; - - return shadow->d_inode->i_op->lookup(shadow->d_inode, dentry, nd); -} - -static int proc_net_setattr(struct dentry *dentry, struct iattr *iattr) -{ - struct net *net = current->nsproxy->net_ns; - struct dentry *shadow; - int ret; - - shadow = proc_net_shadow_dentry(dentry->d_parent, net->proc_net); - if (!shadow) - return -ENOENT; - ret = shadow->d_inode->i_op->setattr(shadow, iattr); - dput(shadow); - return ret; -} - -static const struct file_operations proc_net_dir_operations = { - .read = generic_read_dir, -}; - -static struct inode_operations proc_net_dir_inode_operations = { - .follow_link = proc_net_follow_link, - .lookup = proc_net_lookup, - .setattr = proc_net_setattr, -}; - static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *root, *netd, *net_statd; @@ -185,9 +110,8 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = { int __init proc_net_init(void) { - proc_net_shadow = proc_mkdir("net", NULL); - proc_net_shadow->proc_iops = &proc_net_dir_inode_operations; - proc_net_shadow->proc_fops = &proc_net_dir_operations; + shadow_pde = proc_mkdir("net", NULL); + shadow_pde->shadow_proc = proc_net_shadow; return register_pernet_subsys(&proc_net_ns_ops); } diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 1273c6ec535c..523528d237b0 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -48,6 +48,8 @@ typedef int (read_proc_t)(char *page, char **start, off_t off, typedef int (write_proc_t)(struct file *file, const char __user *buffer, unsigned long count, void *data); typedef int (get_info_t)(char *, char **, off_t, int); +typedef struct proc_dir_entry *(shadow_proc_t)(struct task_struct *task, + struct proc_dir_entry *pde); struct proc_dir_entry { unsigned int low_ino; @@ -79,6 +81,7 @@ struct proc_dir_entry { int pde_users; /* number of callers into module in progress */ spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ struct completion *pde_unload_completion; + shadow_proc_t *shadow_proc; }; struct kcore_list { From d523a328fb0271e1a763e985a21f2488fd816e7e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Dec 2007 15:51:25 +1100 Subject: [PATCH 27/27] [INET]: Fix inet_diag dead-lock regression The inet_diag register fix broke inet_diag module loading because the loaded module had to take the same mutex that's already held by the loader in order to register the new handler. This patch fixes it by introducing a separate mutex to protect the handling of handlers. Signed-off-by: Herbert Xu --- net/ipv4/inet_diag.c | 67 ++++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 6b3fffb554b6..e468e7a7aac4 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -51,6 +51,29 @@ static struct sock *idiagnl; #define INET_DIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) +static DEFINE_MUTEX(inet_diag_table_mutex); + +static const struct inet_diag_handler *inet_diag_lock_handler(int type) +{ +#ifdef CONFIG_KMOD + if (!inet_diag_table[type]) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_INET_DIAG, type); +#endif + + mutex_lock(&inet_diag_table_mutex); + if (!inet_diag_table[type]) + return ERR_PTR(-ENOENT); + + return inet_diag_table[type]; +} + +static inline void inet_diag_unlock_handler( + const struct inet_diag_handler *handler) +{ + mutex_unlock(&inet_diag_table_mutex); +} + static int inet_csk_diag_fill(struct sock *sk, struct sk_buff *skb, int ext, u32 pid, u32 seq, u16 nlmsg_flags, @@ -235,9 +258,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, struct inet_hashinfo *hashinfo; const struct inet_diag_handler *handler; - handler = inet_diag_table[nlh->nlmsg_type]; - BUG_ON(handler == NULL); + handler = inet_diag_lock_handler(nlh->nlmsg_type); + if (!handler) + return -ENOENT; + hashinfo = handler->idiag_hashinfo; + err = -EINVAL; if (req->idiag_family == AF_INET) { sk = inet_lookup(hashinfo, req->id.idiag_dst[0], @@ -255,11 +281,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, } #endif else { - return -EINVAL; + goto unlock; } + err = -ENOENT; if (sk == NULL) - return -ENOENT; + goto unlock; err = -ESTALE; if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || @@ -296,6 +323,8 @@ out: else sock_put(sk); } +unlock: + inet_diag_unlock_handler(handler); return err; } @@ -678,8 +707,10 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) const struct inet_diag_handler *handler; struct inet_hashinfo *hashinfo; - handler = inet_diag_table[cb->nlh->nlmsg_type]; - BUG_ON(handler == NULL); + handler = inet_diag_lock_handler(cb->nlh->nlmsg_type); + if (!handler) + goto no_handler; + hashinfo = handler->idiag_hashinfo; s_i = cb->args[1]; @@ -743,7 +774,7 @@ skip_listen_ht: } if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) - return skb->len; + goto unlock; for (i = s_i; i < hashinfo->ehash_size; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; @@ -805,6 +836,9 @@ next_dying: done: cb->args[1] = i; cb->args[2] = num; +unlock: + inet_diag_unlock_handler(handler); +no_handler: return skb->len; } @@ -816,15 +850,6 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) nlmsg_len(nlh) < hdrlen) return -EINVAL; -#ifdef CONFIG_KMOD - if (inet_diag_table[nlh->nlmsg_type] == NULL) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_INET_DIAG, nlh->nlmsg_type); -#endif - - if (inet_diag_table[nlh->nlmsg_type] == NULL) - return -ENOENT; - if (nlh->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(nlh, hdrlen)) { struct nlattr *attr; @@ -861,13 +886,13 @@ int inet_diag_register(const struct inet_diag_handler *h) if (type >= INET_DIAG_GETSOCK_MAX) goto out; - mutex_lock(&inet_diag_mutex); + mutex_lock(&inet_diag_table_mutex); err = -EEXIST; if (inet_diag_table[type] == NULL) { inet_diag_table[type] = h; err = 0; } - mutex_unlock(&inet_diag_mutex); + mutex_unlock(&inet_diag_table_mutex); out: return err; } @@ -880,9 +905,9 @@ void inet_diag_unregister(const struct inet_diag_handler *h) if (type >= INET_DIAG_GETSOCK_MAX) return; - mutex_lock(&inet_diag_mutex); + mutex_lock(&inet_diag_table_mutex); inet_diag_table[type] = NULL; - mutex_unlock(&inet_diag_mutex); + mutex_unlock(&inet_diag_table_mutex); } EXPORT_SYMBOL_GPL(inet_diag_unregister); @@ -897,7 +922,7 @@ static int __init inet_diag_init(void) goto out; idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, - inet_diag_rcv, &inet_diag_mutex, THIS_MODULE); + inet_diag_rcv, NULL, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; err = 0;