diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index d9112de85261..43f4a818d88f 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -21,7 +21,8 @@ struct sock_reuseport { unsigned int synq_overflow_ts; /* ID stays the same even after the size of socks[] grows. */ unsigned int reuseport_id; - bool bind_inany; + unsigned int bind_inany:1; + unsigned int has_conns:1; struct bpf_prog __rcu *prog; /* optional BPF sock selector */ struct sock *socks[0]; /* array of sock pointers */ }; @@ -37,6 +38,23 @@ extern struct sock *reuseport_select_sock(struct sock *sk, extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); extern int reuseport_detach_prog(struct sock *sk); +static inline bool reuseport_has_conns(struct sock *sk, bool set) +{ + struct sock_reuseport *reuse; + bool ret = false; + + rcu_read_lock(); + reuse = rcu_dereference(sk->sk_reuseport_cb); + if (reuse) { + if (set) + reuse->has_conns = 1; + ret = reuse->has_conns; + } + rcu_read_unlock(); + + return ret; +} + int reuseport_get_id(struct sock_reuseport *reuse); #endif /* _SOCK_REUSEPORT_H */ diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 9408f9264d05..f3ceec93f392 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk, select_by_hash: /* no bpf or invalid bpf result: fall back to hash usage */ - if (!sk2) - sk2 = reuse->socks[reciprocal_scale(hash, socks)]; + if (!sk2) { + int i, j; + + i = j = reciprocal_scale(hash, socks); + while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { + i++; + if (i >= reuse->num_socks) + i = 0; + if (i == j) + goto out; + } + sk2 = reuse->socks[i]; + } } out: diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 7bd29e694603..9a0fe0c2fa02 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -15,6 +15,7 @@ #include #include #include +#include int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len } inet->inet_daddr = fl4->daddr; inet->inet_dport = usin->sin_port; + reuseport_has_conns(sk, true); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); inet->inet_id = jiffies; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d88821c794fb..16486c8b708b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport) { + if (sk->sk_reuseport && + sk->sk_state != TCP_ESTABLISHED) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (result) + if (result && !reuseport_has_conns(sk, false)) return result; } badness = score; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9ab897ded4df..96f939248d2f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -254,6 +255,7 @@ ipv4_connected: goto out; } + reuseport_has_conns(sk, true); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); out: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 827fe7385078..5995fdc99d3f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport) { + if (sk->sk_reuseport && + sk->sk_state != TCP_ESTABLISHED) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (result) + if (result && !reuseport_has_conns(sk, false)) return result; } result = sk;