Merge branch 'net-improve-the-process-of-redirect-and-toobig-for-ipv6-tunnels'

Xin Long says:

====================
net: improve the process of redirect and toobig for ipv6 tunnels

Now let's say there are 3 kinds of icmp packets to process for tunnels,
toobig(needfrag), redirect, others, their process should be:

 - toobig(needfrag)
   update the lower dst's pmtu by route cache, also update sk dst's pmtu
   if possible, or it will be fine if sk dst pmtu will get updated on tx
   path.

 - redirect
   update the lower dst's gw by route cache and return, no need to send
   this redirect packet to user sk.

 - others
   send the packet to user's sk, or it will also be fine to use err_count
   to count it and report fail link on tx path.

All ipv4 tunnels basically follow this while some of ipv6 tunnels are
doing in different ways, like ip6gre and ip6_tunnels update tnl dev's
mtu instead of updating lower dst pmtu, no redirect process on their
err_handlers, which doesn't make any sense and even causes performance
problems.

This patchset is to improve the process of redirect and toobig for ip6gre
ip4ip6, ip6ip6 tunnels, as in ipv4 tunnels.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-11-13 10:44:06 +09:00
commit ede372dcae
2 changed files with 34 additions and 50 deletions

View File

@ -369,6 +369,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
const struct gre_base_hdr *greh;
const struct ipv6hdr *ipv6h;
int grehlen = sizeof(*greh);
@ -402,9 +403,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
switch (type) {
__u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu;
__u32 teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@ -435,12 +435,11 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
return;
case ICMPV6_PKT_TOOBIG:
mtu = be32_to_cpu(info) - offset - t->tun_hlen;
if (t->dev->type == ARPHRD_ETHER)
mtu -= ETH_HLEN;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
return;
case NDISC_REDIRECT:
ip6_redirect(skb, net, skb->dev->ifindex, 0,
sock_net_uid(net, NULL));
return;
}
@ -503,7 +502,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
struct dst_entry *dst = skb_dst(skb);
__be16 protocol;
if (dev->type == ARPHRD_ETHER)
@ -522,10 +520,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
/* TooBig packet may have updated dst->dev's mtu */
if (dst && dst_mtu(dst) > dst->dev->mtu)
dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
}

View File

@ -471,15 +471,16 @@ static int
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
u8 *type, u8 *code, int *msg, __u32 *info, int offset)
{
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
struct ip6_tnl *t;
int rel_msg = 0;
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
struct net *net = dev_net(skb->dev);
u8 rel_type = ICMPV6_DEST_UNREACH;
u8 rel_code = ICMPV6_ADDR_UNREACH;
u8 tproto;
__u32 rel_info = 0;
__u16 len;
struct ip6_tnl *t;
int err = -ENOENT;
int rel_msg = 0;
u8 tproto;
__u16 len;
/* If the packet doesn't contain the original IPv6 header we are
in trouble since we might need the source address for further
@ -497,9 +498,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
err = 0;
switch (*type) {
__u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu;
__u32 mtu, teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@ -530,11 +530,11 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
}
break;
case ICMPV6_PKT_TOOBIG:
ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
sock_net_uid(net, NULL));
mtu = *info - offset;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
if (len > mtu) {
rel_type = ICMPV6_PKT_TOOBIG;
@ -543,6 +543,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
case NDISC_REDIRECT:
ip6_redirect(skb, net, skb->dev->ifindex, 0,
sock_net_uid(net, NULL));
break;
}
*type = rel_type;
@ -559,13 +563,12 @@ static int
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
int rel_msg = 0;
__u32 rel_info = ntohl(info);
const struct iphdr *eiph;
struct sk_buff *skb2;
int err, rel_msg = 0;
u8 rel_type = type;
u8 rel_code = code;
__u32 rel_info = ntohl(info);
int err;
struct sk_buff *skb2;
const struct iphdr *eiph;
struct rtable *rt;
struct flowi4 fl4;
@ -590,10 +593,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_FRAG_NEEDED;
break;
case NDISC_REDIRECT:
rel_type = ICMP_REDIRECT;
rel_code = ICMP_REDIR_HOST;
/* fall through */
default:
return 0;
}
@ -612,33 +611,26 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
eiph = ip_hdr(skb2);
/* Try to guess incoming interface */
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->saddr, 0,
0, 0,
IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
if (IS_ERR(rt))
goto out;
skb2->dev = rt->dst.dev;
ip_rt_put(rt);
/* route "incoming" packet */
if (rt->rt_flags & RTCF_LOCAL) {
ip_rt_put(rt);
rt = NULL;
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->daddr, eiph->saddr,
0, 0,
IPPROTO_IPIP,
RT_TOS(eiph->tos), 0);
if (IS_ERR(rt) ||
rt->dst.dev->type != ARPHRD_TUNNEL) {
eiph->daddr, eiph->saddr, 0, 0,
IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
if (!IS_ERR(rt))
ip_rt_put(rt);
goto out;
}
skb_dst_set(skb2, &rt->dst);
} else {
ip_rt_put(rt);
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
skb2->dev) ||
skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
@ -650,10 +642,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
rel_info);
}
if (rel_type == ICMP_REDIRECT)
skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@ -666,11 +657,10 @@ static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
int rel_msg = 0;
__u32 rel_info = ntohl(info);
int err, rel_msg = 0;
u8 rel_type = type;
u8 rel_code = code;
__u32 rel_info = ntohl(info);
int err;
err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
&rel_msg, &rel_info, offset);