diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f44bb5c77a70..cdc08c190638 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -82,6 +82,7 @@ struct inet_bind_bucket { #endif unsigned short port; signed short fastreuse; + int num_owners; struct hlist_node node; struct hlist_head owners; }; @@ -133,7 +134,7 @@ struct inet_hashinfo { struct inet_bind_hashbucket *bhash; unsigned int bhash_size; - /* Note : 4 bytes padding on 64 bit arches */ + int bsockets; struct kmem_cache *bind_bucket_cachep; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f26ab38680de..df8e72f07478 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -93,24 +93,40 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct inet_bind_hashbucket *head; struct hlist_node *node; struct inet_bind_bucket *tb; - int ret; + int ret, attempts = 5; struct net *net = sock_net(sk); + int smallest_size = -1, smallest_rover; local_bh_disable(); if (!snum) { int remaining, rover, low, high; +again: inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; - rover = net_random() % remaining + low; + smallest_rover = rover = net_random() % remaining + low; + smallest_size = -1; do { head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (ib_net(tb) == net && tb->port == rover) + if (ib_net(tb) == net && tb->port == rover) { + if (tb->fastreuse > 0 && + sk->sk_reuse && + sk->sk_state != TCP_LISTEN && + (tb->num_owners < smallest_size || smallest_size == -1)) { + smallest_size = tb->num_owners; + smallest_rover = rover; + if (hashinfo->bsockets > (high - low) + 1) { + spin_unlock(&head->lock); + snum = smallest_rover; + goto have_snum; + } + } goto next; + } break; next: spin_unlock(&head->lock); @@ -125,14 +141,19 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) * the top level, not from the 'break;' statement. */ ret = 1; - if (remaining <= 0) + if (remaining <= 0) { + if (smallest_size != -1) { + snum = smallest_rover; + goto have_snum; + } goto fail; - + } /* OK, here is the one we will use. HEAD is * non-NULL and we hold it's mutex. */ snum = rover; } else { +have_snum: head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)]; spin_lock(&head->lock); @@ -145,12 +166,18 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) tb_found: if (!hlist_empty(&tb->owners)) { if (tb->fastreuse > 0 && - sk->sk_reuse && sk->sk_state != TCP_LISTEN) { + sk->sk_reuse && sk->sk_state != TCP_LISTEN && + smallest_size == -1) { goto success; } else { ret = 1; - if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) + if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { + if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && --attempts >= 0) { + spin_unlock(&head->lock); + goto again; + } goto fail_unlock; + } } } tb_not_found: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 6a1045da48d2..d7b6178bf48b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -38,6 +38,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; + tb->num_owners = 0; INIT_HLIST_HEAD(&tb->owners); hlist_add_head(&tb->node, &head->chain); } @@ -59,8 +60,13 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + + hashinfo->bsockets++; + inet_sk(sk)->num = snum; sk_add_bind_node(sk, &tb->owners); + tb->num_owners++; inet_csk(sk)->icsk_bind_hash = tb; } @@ -75,9 +81,12 @@ static void __inet_put_port(struct sock *sk) struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; + hashinfo->bsockets--; + spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; __sk_del_bind_node(sk); + tb->num_owners--; inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); @@ -444,9 +453,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, */ inet_bind_bucket_for_each(tb, node, &head->chain) { if (ib_net(tb) == net && tb->port == port) { - WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; + WARN_ON(hlist_empty(&tb->owners)); if (!check_established(death_row, sk, port, &tw)) goto ok;