diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 10e216c6e05e..f35bfe43bf7a 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1132,6 +1132,15 @@ flowlabel_consistency - BOOLEAN FALSE: disabled Default: TRUE +auto_flowlabels - BOOLEAN + Automatically generate flow labels based based on a flow hash + of the packet. This allows intermediate devices, such as routers, + to idenfify packet flows for mechanisms like Equal Cost Multipath + Routing (see RFC 6438). + TRUE: enabled + FALSE: disabled + Default: false + anycast_src_echo_reply - BOOLEAN Controls the use of anycast addresses as source addresses for ICMPv6 echo reply diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5dc68c3ebcbd..ff560537dd61 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -199,7 +199,8 @@ struct ipv6_pinfo { * 010: prefer public address * 100: prefer care-of address */ - dontfrag:1; + dontfrag:1, + autoflowlabel:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2aa86e1135a1..4308f2ada8b3 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -699,6 +699,26 @@ static inline void ip6_set_txhash(struct sock *sk) sk->sk_txhash = flow_hash_from_keys(&keys); } +static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, + __be32 flowlabel, bool autolabel) +{ + if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) { + __be32 hash; + + hash = skb_get_hash(skb); + + /* Since this is being sent on the wire obfuscate hash a bit + * to minimize possbility that any useful information to an + * attacker is leaked. Only lower 20 bits are relevant. + */ + hash ^= hash >> 12; + + flowlabel = hash & IPV6_FLOWLABEL_MASK; + } + + return flowlabel; +} + /* * Header manipulation */ diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 19d3446e59d2..eade27adecf3 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_mtu_expires; int ip6_rt_min_advmss; int flowlabel_consistency; + int auto_flowlabels; int icmpv6_time; int anycast_src_echo_reply; int fwmark_reflect; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 0d8e0f0342dc..22b7a69619d8 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -233,6 +233,7 @@ struct in6_flowlabel_req { #if 0 /* not yet */ #define IPV6_USE_MIN_MTU 63 #endif +#define IPV6_AUTOFLOWLABEL 64 /* * Netfilter (1) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a426cd7099bb..2daa3a133e49 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; + net->ipv6.sysctl.auto_flowlabels = 0; atomic_set(&net->ipv6.rt_genid, 0); err = ipv6_init_mibs(net); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3873181ed856..365b2b6f3942 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -723,7 +723,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, * Push down and install the IP header. */ ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1174,7 +1175,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); __be16 *p = (__be16 *)(ipv6h+1); - ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel); + ip6_flow_hdr(ipv6h, 0, + ip6_make_flowlabel(dev_net(dev), skb, + t->fl.u.ip6.flowlabel, false)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cb9df0eb4023..fa83bdd4c3dd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - ip6_flow_hdr(hdr, tclass, fl6->flowlabel); + ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, + np->autoflowlabel)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1569,7 +1570,9 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); + ip6_flow_hdr(hdr, np->cork.tclass, + ip6_make_flowlabel(net, skb, fl6->flowlabel, + np->autoflowlabel)); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index afa082458360..51a1eb185ea7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1046,7 +1046,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index cc34f65179e4..b50b9e54cf53 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -834,6 +834,10 @@ pref_skip_coa: np->dontfrag = valbool; retv = 0; break; + case IPV6_AUTOFLOWLABEL: + np->autoflowlabel = valbool; + retv = 0; + break; } release_sock(sk); @@ -1273,6 +1277,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->dontfrag; break; + case IPV6_AUTOFLOWLABEL: + val = np->autoflowlabel; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 058f3eca2e53..5bf7b61f8ae8 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "auto_flowlabels", + .data = &init_net.ipv6.sysctl.auto_flowlabels, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .procname = "fwmark_reflect", .data = &init_net.ipv6.sysctl.fwmark_reflect, @@ -74,6 +81,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; + ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table)