diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 1510ce9a4e4e..4562579797d1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -524,8 +524,37 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, false, false) < 0) return PACKET_REJECT; - tunnel->parms.index = ntohl(index); - ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); + if (tunnel->parms.collect_md) { + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + struct erspan_metadata *md; + __be64 tun_id; + __be16 flags; + + tpi->flags |= TUNNEL_KEY; + flags = tpi->flags; + tun_id = key32_to_tunnel_id(tpi->key); + + tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, + sizeof(*md)); + if (!tun_dst) + return PACKET_REJECT; + + info = &tun_dst->u.tun_info; + md = ip_tunnel_info_opts(info); + if (!md) + return PACKET_REJECT; + + md->index = index; + info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + info->options_len = sizeof(*md); + + ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); + + } else { + tunnel->parms.index = ntohl(index); + ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); + } return PACKET_RCVD; } @@ -857,42 +886,73 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, if (gre_handle_offloads(skb, false)) goto tx_err; - switch (skb->protocol) { - case htons(ETH_P_IP): - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, - &dsfield, &encap_limit); - break; - case htons(ETH_P_IPV6): - if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) - goto tx_err; - if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, - &dsfield, &encap_limit)) - goto tx_err; - break; - default: - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - break; - } - if (skb->len > dev->mtu + dev->hard_header_len) { pskb_trim(skb, dev->mtu + dev->hard_header_len); truncate = true; } - erspan_build_header(skb, t->parms.o_key, t->parms.index, - truncate, false); t->parms.o_flags &= ~TUNNEL_KEY; - IPCB(skb)->flags = 0; - fl6.daddr = t->parms.raddr; + + /* For collect_md mode, derive fl6 from the tunnel key, + * for native mode, call prepare_ip6gre_xmit_{ipv4,ipv6}. + */ + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + struct erspan_metadata *md; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || + !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -EINVAL; + + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_GRE; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + + dsfield = key->tos; + md = ip_tunnel_info_opts(tun_info); + if (!md) + goto tx_err; + + erspan_build_header(skb, tunnel_id_to_key32(key->tun_id), + ntohl(md->index), truncate, false); + + } else { + switch (skb->protocol) { + case htons(ETH_P_IP): + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, + &dsfield, &encap_limit); + break; + case htons(ETH_P_IPV6): + if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + goto tx_err; + if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, + &dsfield, &encap_limit)) + goto tx_err; + break; + default: + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + break; + } + + erspan_build_header(skb, t->parms.o_key, t->parms.index, + truncate, false); + fl6.daddr = t->parms.raddr; + } /* Push GRE header. */ gre_build_header(skb, 8, TUNNEL_SEQ, htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++)); /* TooBig packet may have updated dst->dev's mtu */ - if (dst && dst_mtu(dst) > dst->dev->mtu) + if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index 15a469220e19..79ad061079dd 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -181,6 +181,64 @@ int _erspan_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("ip4ip6erspan_set_tunnel") +int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + struct erspan_metadata md; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = _htonl(0x11); + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + md.index = htonl(123); + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip4ip6erspan_get_tunnel") +int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip6 ::%x erspan index 0x%x\n"; + struct bpf_tunnel_key key; + struct erspan_metadata md; + u32 index; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + index = bpf_ntohl(md.index); + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv6[0], index); + + return TC_ACT_OK; +} + SEC("vxlan_set_tunnel") int _vxlan_set_tunnel(struct __sk_buff *skb) { diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh index 226f45381b76..f53efb62f699 100755 --- a/samples/bpf/test_tunnel_bpf.sh +++ b/samples/bpf/test_tunnel_bpf.sh @@ -70,6 +70,28 @@ function add_erspan_tunnel { ip addr add dev $DEV 10.1.1.200/24 } +function add_ip6erspan_tunnel { + + # assign ipv6 address + ip netns exec at_ns0 ip addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 erspan 123 \ + local ::11 remote ::22 + + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # out of namespace + ip link add dev $DEV type $TYPE external + ip addr add dev $DEV 10.1.1.200/24 + ip link set dev $DEV up +} + function add_vxlan_tunnel { # Set static ARP entry here because iptables set-mark works # on L3 packet, as a result not applying to ARP packets, @@ -184,6 +206,18 @@ function test_erspan { cleanup } +function test_ip6erspan { + TYPE=ip6erspan + DEV_NS=ip6erspan00 + DEV=ip6erspan11 + config_device + add_ip6erspan_tunnel + attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel + ping6 -c 3 ::11 + ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup +} + function test_vxlan { TYPE=vxlan DEV_NS=vxlan00 @@ -239,6 +273,7 @@ function cleanup { ip link del vxlan11 ip link del geneve11 ip link del erspan11 + ip link del ip6erspan11 pkill tcpdump pkill cat set -ex @@ -254,6 +289,8 @@ echo "Testing IP6GRETAP tunnel..." test_ip6gretap echo "Testing ERSPAN tunnel..." test_erspan +echo "Testing IP6ERSPAN tunnel..." +test_ip6erspan echo "Testing VXLAN tunnel..." test_vxlan echo "Testing GENEVE tunnel..."