From 14869c388673e8db3348ab3706fa6485d0f0cf95 Mon Sep 17 00:00:00 2001 From: Dmitry Yusupov Date: Tue, 23 Aug 2005 10:09:27 -0700 Subject: [PATCH 01/11] [TCP]: Do TSO deferral even if tail SKB can go out now. If the tail SKB fits into the window, it is still benefitical to defer until the goal percentage of the window is available. This give the application time to feed more data into the send queue and thus results in larger TSO frames going out. Patch from Dmitry Yusupov . Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 566045e58437..dd30dd137b74 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -925,10 +925,6 @@ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_ limit = min(send_win, cong_win); - /* If sk_send_head can be sent fully now, just do it. */ - if (skb->len <= limit) - return 0; - if (sysctl_tcp_tso_win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); From c3a20692ca5c8eb8cf5d0f489d4fc839ce7593d1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 23 Aug 2005 10:09:53 -0700 Subject: [PATCH 02/11] [RPC]: Kill bogus kmap in krb5 While I was going through the crypto users recently, I noticed this bogus kmap in sunrpc. It's totally unnecessary since the crypto layer will do its own kmap before touching the data. Besides, the kmap is throwing the return value away. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 24c21f2a33a7..5a7265aeaf83 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -185,9 +185,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, sg->page = body->pages[i]; sg->offset = offset; sg->length = thislen; - kmap(sg->page); /* XXX kmap_atomic? */ crypto_digest_update(tfm, sg, 1); - kunmap(sg->page); len -= thislen; i++; offset = 0; From 1344a41637114485fac7afa1505bce2ff862807a Mon Sep 17 00:00:00 2001 From: Dave Johnson Date: Tue, 23 Aug 2005 10:10:15 -0700 Subject: [PATCH 03/11] [IPV4]: Fix negative timer loop with lots of ipv4 peers. From: Dave Johnson Found this bug while doing some scaling testing that created 500K inet peers. peer_check_expire() in net/ipv4/inetpeer.c isn't using inet_peer_gc_mintime correctly and will end up creating an expire timer with less than the minimum duration, and even zero/negative if enough active peers are present. If >65K peers, the timer will be less than inet_peer_gc_mintime, and with >70K peers, the timer duration will reach zero and go negative. The timer handler will continue to schedule another zero/negative timer in a loop until peers can be aged. This can continue for at least a few minutes or even longer if the peers remain active due to arriving packets while the loop is occurring. Bug is present in both 2.4 and 2.6. Same patch will apply to both just fine. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 95473953c406..ab18a853d7ce 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -450,10 +450,13 @@ static void peer_check_expire(unsigned long dummy) /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime * interval depending on the total number of entries (more entries, * less interval). */ - peer_periodic_timer.expires = jiffies - + inet_peer_gc_maxtime - - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - peer_total / inet_peer_threshold * HZ; + if (peer_total >= inet_peer_threshold) + peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; + else + peer_periodic_timer.expires = jiffies + + inet_peer_gc_maxtime + - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * + peer_total / inet_peer_threshold * HZ; add_timer(&peer_periodic_timer); } From 66a79a19a7c582efd99bb143c3a59fbda006eb39 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 23 Aug 2005 10:10:35 -0700 Subject: [PATCH 04/11] [NETFILTER]: Fix HW checksum handling in ip_queue/ip6_queue The checksum needs to be filled in on output, after mangling a packet ip_summed needs to be reset. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_queue.c | 7 +++++++ net/ipv6/netfilter/ip6_queue.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index eda1fba431a4..c6baa8174389 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -214,6 +214,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: + if (entry->skb->ip_summed == CHECKSUM_HW && + (*errp = skb_checksum_help(entry->skb, + entry->info->outdev == NULL))) { + read_unlock_bh(&queue_lock); + return NULL; + } if (copy_range == 0 || copy_range > entry->skb->len) data_len = entry->skb->len; else @@ -385,6 +391,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (!skb_ip_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); + e->skb->ip_summed = CHECKSUM_NONE; e->skb->nfcache |= NFC_ALTERED; /* diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 5493180f0d44..a16df5b27c84 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -211,6 +211,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: + if (entry->skb->ip_summed == CHECKSUM_HW && + (*errp = skb_checksum_help(entry->skb, + entry->info->outdev == NULL))) { + read_unlock_bh(&queue_lock); + return NULL; + } if (copy_range == 0 || copy_range > entry->skb->len) data_len = entry->skb->len; else @@ -381,6 +387,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (!skb_ip_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); + e->skb->ip_summed = CHECKSUM_NONE; e->skb->nfcache |= NFC_ALTERED; /* From 53b924b31fa53ac3007df3fef6870d5074a9adf8 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 23 Aug 2005 10:11:30 -0700 Subject: [PATCH 05/11] [NET]: Fix socket bitop damage The socket flag cleanups that went into 2.6.12-rc1 are basically oring the flags of an old socket into the socket just being created. Unfortunately that one was just initialized by sock_init_data(), so already has SOCK_ZAPPED set. As the result zapped sockets are created and all incoming connection will fail due to this bug which again was carefully replicated to at least AX.25, NET/ROM or ROSE. In order to keep the abstraction alive I've introduced sock_copy_flags() to copy the socket flags from one sockets to another and used that instead of the bitwise copy thing. Anyway, the idea here has probably been to copy all flags, so sock_copy_flags() should be the right thing. With this the ham radio protocols are usable again, so I hope this will make it into 2.6.13. Signed-off-by: Ralf Baechle DL5RB Signed-off-by: David S. Miller --- include/net/sock.h | 5 +++++ net/ax25/af_ax25.c | 7 +------ net/netrom/af_netrom.c | 7 +------ net/rose/af_rose.c | 7 +------ 4 files changed, 8 insertions(+), 18 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index a1042d08becd..e9b1dbab90d0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -384,6 +384,11 @@ enum sock_flags { SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ }; +static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) +{ + nsk->sk_flags = osk->sk_flags; +} + static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) { __set_bit(flag, &sk->sk_flags); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 707097deac3d..7d8ecadba668 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -875,12 +875,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; sk->sk_sleep = osk->sk_sleep; - - if (sock_flag(osk, SOCK_DBG)) - sock_set_flag(sk, SOCK_DBG); - - if (sock_flag(osk, SOCK_ZAPPED)) - sock_set_flag(sk, SOCK_ZAPPED); + sock_copy_flags(sk, osk); oax25 = ax25_sk(osk); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 31ed4a9a1d06..5385835e9267 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -459,12 +459,7 @@ static struct sock *nr_make_new(struct sock *osk) sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; sk->sk_sleep = osk->sk_sleep; - - if (sock_flag(osk, SOCK_ZAPPED)) - sock_set_flag(sk, SOCK_ZAPPED); - - if (sock_flag(osk, SOCK_DBG)) - sock_set_flag(sk, SOCK_DBG); + sock_copy_flags(sk, osk); skb_queue_head_init(&nr->ack_queue); skb_queue_head_init(&nr->reseq_queue); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 7eb6a5bf93ea..3fe7e562125a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -556,12 +556,7 @@ static struct sock *rose_make_new(struct sock *osk) sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; sk->sk_sleep = osk->sk_sleep; - - if (sock_flag(osk, SOCK_ZAPPED)) - sock_set_flag(sk, SOCK_ZAPPED); - - if (sock_flag(osk, SOCK_DBG)) - sock_set_flag(sk, SOCK_DBG); + sock_copy_flags(sk, osk); init_timer(&rose->timer); init_timer(&rose->idletimer); From 01d7dd0e9f8c5f1888619d2649c7da389232b408 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 23 Aug 2005 10:11:45 -0700 Subject: [PATCH 06/11] [AX25]: UID fixes o Brown paperbag bug - ax25_findbyuid() was always returning a NULL pointer as the result. Breaks ROSE completly and AX.25 if UID policy set to deny. o While the list structure of AX.25's UID to callsign mapping table was properly protected by a spinlock, it's elements were not refcounted resulting in a race between removal and usage of an element. Signed-off-by: Ralf Baechle DL5RB Signed-off-by: David S. Miller --- include/net/ax25.h | 18 ++++++++- net/ax25/af_ax25.c | 20 ++++++---- net/ax25/ax25_route.c | 12 +++--- net/ax25/ax25_uid.c | 83 ++++++++++++++++++------------------------ net/netrom/af_netrom.c | 24 ++++++++---- net/rose/af_rose.c | 20 ++++++---- 6 files changed, 100 insertions(+), 77 deletions(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index 828a3a93dda1..3696f988a9f1 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -139,11 +139,25 @@ enum { #define AX25_DEF_DS_TIMEOUT (3 * 60 * HZ) /* DAMA timeout 3 minutes */ typedef struct ax25_uid_assoc { - struct ax25_uid_assoc *next; + struct hlist_node uid_node; + atomic_t refcount; uid_t uid; ax25_address call; } ax25_uid_assoc; +#define ax25_uid_for_each(__ax25, node, list) \ + hlist_for_each_entry(__ax25, node, list, uid_node) + +#define ax25_uid_hold(ax25) \ + atomic_inc(&((ax25)->refcount)) + +static inline void ax25_uid_put(ax25_uid_assoc *assoc) +{ + if (atomic_dec_and_test(&assoc->refcount)) { + kfree(assoc); + } +} + typedef struct { ax25_address calls[AX25_MAX_DIGIS]; unsigned char repeated[AX25_MAX_DIGIS]; @@ -376,7 +390,7 @@ extern unsigned long ax25_display_timer(struct timer_list *); /* ax25_uid.c */ extern int ax25_uid_policy; -extern ax25_address *ax25_findbyuid(uid_t); +extern ax25_uid_assoc *ax25_findbyuid(uid_t); extern int ax25_uid_ioctl(int, struct sockaddr_ax25 *); extern struct file_operations ax25_uid_fops; extern void ax25_uid_free(void); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 7d8ecadba668..a5c94f11547c 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1002,7 +1002,8 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk = sock->sk; struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr; ax25_dev *ax25_dev = NULL; - ax25_address *call; + ax25_uid_assoc *user; + ax25_address call; ax25_cb *ax25; int err = 0; @@ -1021,9 +1022,15 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr->fsa_ax25.sax25_family != AF_AX25) return -EINVAL; - call = ax25_findbyuid(current->euid); - if (call == NULL && ax25_uid_policy && !capable(CAP_NET_ADMIN)) { - return -EACCES; + user = ax25_findbyuid(current->euid); + if (user) { + call = user->call; + ax25_uid_put(user); + } else { + if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) + return -EACCES; + + call = addr->fsa_ax25.sax25_call; } lock_sock(sk); @@ -1034,10 +1041,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - if (call == NULL) - ax25->source_addr = addr->fsa_ax25.sax25_call; - else - ax25->source_addr = *call; + ax25->source_addr = call; /* * User already set interface with SO_BINDTODEVICE diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 44b99b1ff9f8..c288526da4ce 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -422,8 +422,8 @@ static inline void ax25_adjust_path(ax25_address *addr, ax25_digi *digipeat) */ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) { + ax25_uid_assoc *user; ax25_route *ax25_rt; - ax25_address *call; int err; if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) @@ -434,16 +434,18 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) goto put; } - if ((call = ax25_findbyuid(current->euid)) == NULL) { + user = ax25_findbyuid(current->euid); + if (user) { + ax25->source_addr = user->call; + ax25_uid_put(user); + } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { err = -EPERM; goto put; } - call = (ax25_address *)ax25->ax25_dev->dev->dev_addr; + ax25->source_addr = *(ax25_address *)ax25->ax25_dev->dev->dev_addr; } - ax25->source_addr = *call; - if (ax25_rt->digipeat != NULL) { if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { err = -ENOMEM; diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index cea6b7d19729..a8b3822f3ee4 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -41,38 +42,41 @@ * Callsign/UID mapper. This is in kernel space for security on multi-amateur machines. */ -static ax25_uid_assoc *ax25_uid_list; +HLIST_HEAD(ax25_uid_list); static DEFINE_RWLOCK(ax25_uid_lock); int ax25_uid_policy = 0; -ax25_address *ax25_findbyuid(uid_t uid) +ax25_uid_assoc *ax25_findbyuid(uid_t uid) { - ax25_uid_assoc *ax25_uid; - ax25_address *res = NULL; + ax25_uid_assoc *ax25_uid, *res = NULL; + struct hlist_node *node; read_lock(&ax25_uid_lock); - for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) { + ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { if (ax25_uid->uid == uid) { - res = &ax25_uid->call; + ax25_uid_hold(ax25_uid); + res = ax25_uid; break; } } read_unlock(&ax25_uid_lock); - return NULL; + return res; } int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) { - ax25_uid_assoc *s, *ax25_uid; + ax25_uid_assoc *ax25_uid; + struct hlist_node *node; + ax25_uid_assoc *user; unsigned long res; switch (cmd) { case SIOCAX25GETUID: res = -ENOENT; read_lock(&ax25_uid_lock); - for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) { + ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { res = ax25_uid->uid; break; @@ -85,19 +89,22 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) case SIOCAX25ADDUID: if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (ax25_findbyuid(sax->sax25_uid)) + user = ax25_findbyuid(sax->sax25_uid); + if (user) { + ax25_uid_put(user); return -EEXIST; + } if (sax->sax25_uid == 0) return -EINVAL; if ((ax25_uid = kmalloc(sizeof(*ax25_uid), GFP_KERNEL)) == NULL) return -ENOMEM; + atomic_set(&ax25_uid->refcount, 1); ax25_uid->uid = sax->sax25_uid; ax25_uid->call = sax->sax25_call; write_lock(&ax25_uid_lock); - ax25_uid->next = ax25_uid_list; - ax25_uid_list = ax25_uid; + hlist_add_head(&ax25_uid->uid_node, &ax25_uid_list); write_unlock(&ax25_uid_lock); return 0; @@ -106,34 +113,21 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) if (!capable(CAP_NET_ADMIN)) return -EPERM; + ax25_uid = NULL; write_lock(&ax25_uid_lock); - for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) { - if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { + ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) break; - } } if (ax25_uid == NULL) { write_unlock(&ax25_uid_lock); return -ENOENT; } - if ((s = ax25_uid_list) == ax25_uid) { - ax25_uid_list = s->next; - write_unlock(&ax25_uid_lock); - kfree(ax25_uid); - return 0; - } - while (s != NULL && s->next != NULL) { - if (s->next == ax25_uid) { - s->next = ax25_uid->next; - write_unlock(&ax25_uid_lock); - kfree(ax25_uid); - return 0; - } - s = s->next; - } + hlist_del_init(&ax25_uid->uid_node); + ax25_uid_put(ax25_uid); write_unlock(&ax25_uid_lock); - return -ENOENT; + return 0; default: return -EINVAL; @@ -147,13 +141,11 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) { struct ax25_uid_assoc *pt; - int i = 1; + struct hlist_node *node; + int i = 0; read_lock(&ax25_uid_lock); - if (*pos == 0) - return SEQ_START_TOKEN; - - for (pt = ax25_uid_list; pt != NULL; pt = pt->next) { + ax25_uid_for_each(pt, node, &ax25_uid_list) { if (i == *pos) return pt; ++i; @@ -164,8 +156,9 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - return (v == SEQ_START_TOKEN) ? ax25_uid_list : - ((struct ax25_uid_assoc *) v)->next; + + return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next, + ax25_uid_assoc, uid_node); } static void ax25_uid_seq_stop(struct seq_file *seq, void *v) @@ -179,7 +172,6 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "Policy: %d\n", ax25_uid_policy); else { struct ax25_uid_assoc *pt = v; - seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(&pt->call)); } @@ -213,16 +205,13 @@ struct file_operations ax25_uid_fops = { */ void __exit ax25_uid_free(void) { - ax25_uid_assoc *s, *ax25_uid; + ax25_uid_assoc *ax25_uid; + struct hlist_node *node; write_lock(&ax25_uid_lock); - ax25_uid = ax25_uid_list; - while (ax25_uid != NULL) { - s = ax25_uid; - ax25_uid = ax25_uid->next; - - kfree(s); + ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + hlist_del_init(&ax25_uid->uid_node); + ax25_uid_put(ax25_uid); } - ax25_uid_list = NULL; write_unlock(&ax25_uid_lock); } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 5385835e9267..162a85fed150 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -536,7 +536,8 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct nr_sock *nr = nr_sk(sk); struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr; struct net_device *dev; - ax25_address *user, *source; + ax25_uid_assoc *user; + ax25_address *source; lock_sock(sk); if (!sock_flag(sk, SOCK_ZAPPED)) { @@ -575,16 +576,19 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } else { source = &addr->fsa_ax25.sax25_call; - if ((user = ax25_findbyuid(current->euid)) == NULL) { + user = ax25_findbyuid(current->euid); + if (user) { + nr->user_addr = user->call; + ax25_uid_put(user); + } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { release_sock(sk); dev_put(dev); return -EPERM; } - user = source; + nr->user_addr = *source; } - nr->user_addr = *user; nr->source_addr = *source; } @@ -604,7 +608,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr; - ax25_address *user, *source = NULL; + ax25_address *source = NULL; + ax25_uid_assoc *user; struct net_device *dev; lock_sock(sk); @@ -645,16 +650,19 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, } source = (ax25_address *)dev->dev_addr; - if ((user = ax25_findbyuid(current->euid)) == NULL) { + user = ax25_findbyuid(current->euid); + if (user) { + nr->user_addr = user->call; + ax25_uid_put(user); + } else { if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) { dev_put(dev); release_sock(sk); return -EPERM; } - user = source; + nr->user_addr = *source; } - nr->user_addr = *user; nr->source_addr = *source; nr->device = dev; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 3fe7e562125a..5480caf8ccc2 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -626,7 +626,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct rose_sock *rose = rose_sk(sk); struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr; struct net_device *dev; - ax25_address *user, *source; + ax25_address *source; + ax25_uid_assoc *user; int n; if (!sock_flag(sk, SOCK_ZAPPED)) @@ -651,14 +652,17 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) source = &addr->srose_call; - if ((user = ax25_findbyuid(current->euid)) == NULL) { + user = ax25_findbyuid(current->euid); + if (user) { + rose->source_call = user->call; + ax25_uid_put(user); + } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; - user = source; + rose->source_call = *source; } rose->source_addr = addr->srose_addr; - rose->source_call = *user; rose->device = dev; rose->source_ndigis = addr->srose_ndigis; @@ -685,8 +689,8 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le struct rose_sock *rose = rose_sk(sk); struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr; unsigned char cause, diagnostic; - ax25_address *user; struct net_device *dev; + ax25_uid_assoc *user; int n; if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { @@ -736,12 +740,14 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le if ((dev = rose_dev_first()) == NULL) return -ENETUNREACH; - if ((user = ax25_findbyuid(current->euid)) == NULL) + user = ax25_findbyuid(current->euid); + if (!user) return -EINVAL; memcpy(&rose->source_addr, dev->dev_addr, ROSE_ADDR_LEN); - rose->source_call = *user; + rose->source_call = user->call; rose->device = dev; + ax25_uid_put(user); rose_insert_socket(sk); /* Finish the bind */ } From d2287f844187158e5eddd0d5de8e95bd607abcb7 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 Aug 2005 10:12:04 -0700 Subject: [PATCH 07/11] [SCTP]: Add SENTINEL to SCTP MIB stats Add SNMP_MIB_SENTINEL to the definition of the sctp_snmp_list so that the output routine in proc correctly terminates. This was causing some problems running on ia64 systems. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 98d49ec9b74b..b74f7772b576 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -57,6 +57,7 @@ static struct snmp_mib sctp_snmp_list[] = { SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), + SNMP_MIB_SENTINEL }; /* Return the current value of a particular entry in the mib by adding its From 0fbbeb1ba43bd04f0f1d4f161b7f72437a1c8a03 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 23 Aug 2005 10:12:44 -0700 Subject: [PATCH 08/11] [PKT_SCHED]: Fix missing qdisc_destroy() in qdisc_create_dflt() qdisc_create_dflt() is missing to destroy the newly allocated default qdisc if the initialization fails resulting in leaks of all kinds. The only caller in mainline which may trigger this bug is sch_tbf.c in tbf_create_dflt_qdisc(). Note: qdisc_create_dflt() doesn't fulfill the official locking requirements of qdisc_destroy() but since the qdisc could never be seen by the outside world this doesn't matter and it can stay as-is until the locking of pkt_sched is cleaned up. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8edefd5d095d..0d066c965342 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -438,6 +438,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) if (!ops->init || ops->init(sch, NULL) == 0) return sch; + qdisc_destroy(sch); errout: return NULL; } From 89ebd197eb2cd31d6187db344d5117064e19fdde Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Aug 2005 10:13:06 -0700 Subject: [PATCH 09/11] [TCP]: Unconditionally clear TCP_NAGLE_PUSH in skb_entail(). Intention of this bit is to force pushing of the existing send queue when TCP_CORK or TCP_NODELAY state changes via setsockopt(). But it's easy to create a situation where the bit never clears. For example, if the send queue starts empty: 1) set TCP_NODELAY 2) clear TCP_NODELAY 3) set TCP_CORK 4) do small write() The current code will leave TCP_NAGLE_PUSH set after that sequence. Unconditionally clearing the bit when new data is added via skb_entail() solves the problem. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ddb6ce4ecff2..69b1fcf70077 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -584,7 +584,7 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, sk_charge_skb(sk, skb); if (!sk->sk_send_head) sk->sk_send_head = skb; - else if (tp->nonagle&TCP_NAGLE_PUSH) + if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; } From d5d283751ef3c05b6766501a46800cbee84959d6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Aug 2005 10:49:54 -0700 Subject: [PATCH 10/11] [TCP]: Document non-trivial locking path in tcp_v{4,6}_get_port(). This trips up a lot of folks reading this code. Put an unlikely() around the port-exhaustion test for good measure. Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 9 +++++++-- net/ipv6/tcp_ipv6.c | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5d91213d34c0..67c670886c1f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -242,9 +242,14 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) tcp_port_rover = rover; spin_unlock(&tcp_portalloc_lock); - /* Exhausted local port range during search? */ + /* Exhausted local port range during search? It is not + * possible for us to be holding one of the bind hash + * locks if this test triggers, because if 'remaining' + * drops to zero, we broke out of the do/while loop at + * the top level, not from the 'break;' statement. + */ ret = 1; - if (remaining <= 0) + if (unlikely(remaining <= 0)) goto fail; /* OK, here is the one we will use. HEAD is diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f6e288dc116e..ef29cfd936d3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -158,9 +158,14 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) tcp_port_rover = rover; spin_unlock(&tcp_portalloc_lock); - /* Exhausted local port range during search? */ + /* Exhausted local port range during search? It is not + * possible for us to be holding one of the bind hash + * locks if this test triggers, because if 'remaining' + * drops to zero, we broke out of the do/while loop at + * the top level, not from the 'break;' statement. + */ ret = 1; - if (remaining <= 0) + if (unlikely(remaining <= 0)) goto fail; /* OK, here is the one we will use. */ From dc16aaf29d64b8c5e0b88f49a4d541edf5b61e42 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Aug 2005 10:50:09 -0700 Subject: [PATCH 11/11] [ROSE]: Fix missing unlocks in rose_route_frame() Noticed by Coverity checker. Signed-off-by: David S. Miller --- net/rose/rose_route.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index ff73ebb912b8..46b23217a353 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -994,8 +994,10 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) * 1. The frame isn't for us, * 2. It isn't "owned" by any existing route. */ - if (frametype != ROSE_CALL_REQUEST) /* XXX */ - return 0; + if (frametype != ROSE_CALL_REQUEST) { /* XXX */ + ret = 0; + goto out; + } len = (((skb->data[3] >> 4) & 0x0F) + 1) / 2; len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2;