From 333fad5364d6b457c8d837f7d05802d2aaf8a961 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 8 Sep 2005 09:59:17 +0900 Subject: [PATCH 01/13] [IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542). Support several new socket options / ancillary data: IPV6_RECVPKTINFO, IPV6_PKTINFO, IPV6_RECVHOPOPTS, IPV6_HOPOPTS, IPV6_RECVDSTOPTS, IPV6_DSTOPTS, IPV6_RTHDRDSTOPTS, IPV6_RECVRTHDR, IPV6_RTHDR, IPV6_RECVHOPOPTS, IPV6_HOPOPTS Old semantics are preserved as IPV6_2292xxxx so that we can maintain backward compatibility. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/in6.h | 38 +++++++-- include/linux/ipv6.h | 10 ++- include/net/ipv6.h | 4 + net/ipv6/datagram.c | 110 ++++++++++++++++++++++++-- net/ipv6/exthdrs.c | 107 ++++++++++++++++++++++++- net/ipv6/ip6_flowlabel.c | 12 ++- net/ipv6/ipv6_sockglue.c | 164 ++++++++++++++++++++++++++++++++++++--- net/ipv6/raw.c | 3 +- net/ipv6/tcp_ipv6.c | 21 +++-- net/ipv6/udp.c | 3 +- 10 files changed, 423 insertions(+), 49 deletions(-) diff --git a/include/linux/in6.h b/include/linux/in6.h index dcf5720ffcbb..c11022f2f2ac 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -148,13 +148,13 @@ struct in6_flowlabel_req */ #define IPV6_ADDRFORM 1 -#define IPV6_PKTINFO 2 -#define IPV6_HOPOPTS 3 -#define IPV6_DSTOPTS 4 -#define IPV6_RTHDR 5 -#define IPV6_PKTOPTIONS 6 +#define IPV6_2292PKTINFO 2 +#define IPV6_2292HOPOPTS 3 +#define IPV6_2292DSTOPTS 4 +#define IPV6_2292RTHDR 5 +#define IPV6_2292PKTOPTIONS 6 #define IPV6_CHECKSUM 7 -#define IPV6_HOPLIMIT 8 +#define IPV6_2292HOPLIMIT 8 #define IPV6_NEXTHOP 9 #define IPV6_AUTHHDR 10 /* obsolete */ #define IPV6_FLOWINFO 11 @@ -198,4 +198,30 @@ struct in6_flowlabel_req * MCAST_MSFILTER 48 */ +/* RFC3542 advanced socket options (50-67) */ +#define IPV6_RECVPKTINFO 50 +#define IPV6_PKTINFO 51 +#if 0 +#define IPV6_RECVPATHMTU 52 +#define IPV6_PATHMTU 53 +#define IPV6_DONTFRAG 54 +#define IPV6_USE_MIN_MTU 55 +#endif +#define IPV6_RECVHOPOPTS 56 +#define IPV6_HOPOPTS 57 +#if 0 +#define IPV6_RECVRTHDRDSTOPTS 58 /* Unused, see net/ipv6/datagram.c */ +#endif +#define IPV6_RTHDRDSTOPTS 59 +#define IPV6_RECVRTHDR 60 +#define IPV6_RTHDR 61 +#define IPV6_RECVDSTOPTS 62 +#define IPV6_DSTOPTS 63 +#define IPV6_RECVHOPLIMIT 64 +#define IPV6_HOPLIMIT 65 +#if 0 +#define IPV6_RECVTCLASS 66 +#define IPV6_TCLASS 67 +#endif + #endif diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 3c7dbc6a0a70..2581f1c94df5 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -189,6 +189,7 @@ struct inet6_skb_parm { __u16 dst0; __u16 srcrt; __u16 dst1; + __u16 lastopt; }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) @@ -234,14 +235,19 @@ struct ipv6_pinfo { /* pktoption flags */ union { struct { - __u8 srcrt:2, + __u16 srcrt:2, + osrcrt:2, rxinfo:1, + rxoinfo:1, rxhlim:1, + rxohlim:1, hopopts:1, + ohopopts:1, dstopts:1, + odstopts:1, rxflow:1; } bits; - __u8 all; + __u16 all; } rxopt; /* sockopt flags */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3203eaff4bd4..8a9fe9434e94 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -233,6 +233,10 @@ extern int ip6_ra_control(struct sock *sk, int sel, extern int ipv6_parse_hopopts(struct sk_buff *skb, int); extern struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt); +extern struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, + int newtype, + struct ipv6_opt_hdr __user *newopt, + int newoptlen); extern int ip6_frag_nqueues; extern atomic_t ip6_frag_mem; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 01468fab3d3d..832476bbc5cb 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -394,21 +394,85 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK; put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); } + + /* HbH is allowed only once */ if (np->rxopt.bits.hopopts && opt->hop) { u8 *ptr = skb->nh.raw + opt->hop; put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); } - if (np->rxopt.bits.dstopts && opt->dst0) { + + if (opt->lastopt && + (np->rxopt.bits.dstopts || np->rxopt.bits.srcrt)) { + /* + * Silly enough, but we need to reparse in order to + * report extension headers (except for HbH) + * in order. + * + * Also note that IPV6_RECVRTHDRDSTOPTS is NOT + * (and WILL NOT be) defined because + * IPV6_RECVDSTOPTS is more generic. --yoshfuji + */ + unsigned int off = sizeof(struct ipv6hdr); + u8 nexthdr = skb->nh.ipv6h->nexthdr; + + while (off <= opt->lastopt) { + unsigned len; + u8 *ptr = skb->nh.raw + off; + + switch(nexthdr) { + case IPPROTO_DSTOPTS: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + if (np->rxopt.bits.dstopts) + put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, len, ptr); + break; + case IPPROTO_ROUTING: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + if (np->rxopt.bits.srcrt) + put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, len, ptr); + break; + case IPPROTO_AH: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 2; + break; + default: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + break; + } + + off += len; + } + } + + /* socket options in old style */ + if (np->rxopt.bits.rxoinfo) { + struct in6_pktinfo src_info; + + src_info.ipi6_ifindex = opt->iif; + ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr); + put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); + } + if (np->rxopt.bits.rxohlim) { + int hlim = skb->nh.ipv6h->hop_limit; + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); + } + if (np->rxopt.bits.ohopopts && opt->hop) { + u8 *ptr = skb->nh.raw + opt->hop; + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); + } + if (np->rxopt.bits.odstopts && opt->dst0) { u8 *ptr = skb->nh.raw + opt->dst0; - put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr); + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); } - if (np->rxopt.bits.srcrt && opt->srcrt) { + if (np->rxopt.bits.osrcrt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt); - put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, (rthdr->hdrlen+1) << 3, rthdr); + put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr); } - if (np->rxopt.bits.dstopts && opt->dst1) { + if (np->rxopt.bits.odstopts && opt->dst1) { u8 *ptr = skb->nh.raw + opt->dst1; - put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr); + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); } return 0; } @@ -438,6 +502,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, switch (cmsg->cmsg_type) { case IPV6_PKTINFO: + case IPV6_2292PKTINFO: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; @@ -492,6 +557,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg); break; + case IPV6_2292HOPOPTS: case IPV6_HOPOPTS: if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { err = -EINVAL; @@ -512,7 +578,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->hopopt = hdr; break; - case IPV6_DSTOPTS: + case IPV6_2292DSTOPTS: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { err = -EINVAL; goto exit_f; @@ -536,6 +602,33 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->dst1opt = hdr; break; + case IPV6_DSTOPTS: + case IPV6_RTHDRDSTOPTS: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { + err = -EINVAL; + goto exit_f; + } + + hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg); + len = ((hdr->hdrlen + 1) << 3); + if (cmsg->cmsg_len < CMSG_LEN(len)) { + err = -EINVAL; + goto exit_f; + } + if (!capable(CAP_NET_RAW)) { + err = -EPERM; + goto exit_f; + } + if (cmsg->cmsg_type == IPV6_DSTOPTS) { + opt->opt_flen += len; + opt->dst1opt = hdr; + } else { + opt->opt_nflen += len; + opt->dst0opt = hdr; + } + break; + + case IPV6_2292RTHDR: case IPV6_RTHDR: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) { err = -EINVAL; @@ -568,7 +661,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->opt_nflen += len; opt->srcrt = rthdr; - if (opt->dst1opt) { + if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) { int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3); opt->opt_nflen += dsthdrlen; @@ -579,6 +672,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, break; + case IPV6_2292HOPLIMIT: case IPV6_HOPLIMIT: if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { err = -EINVAL; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5be6da2584ee..ffcda45e2c1e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -164,6 +164,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) return -1; } + opt->lastopt = skb->h.raw - skb->nh.raw; opt->dst1 = skb->h.raw - skb->nh.raw; if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { @@ -243,6 +244,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp) looped_back: if (hdr->segments_left == 0) { + opt->lastopt = skb->h.raw - skb->nh.raw; opt->srcrt = skb->h.raw - skb->nh.raw; skb->h.raw += (hdr->hdrlen + 1) << 3; opt->dst0 = opt->dst1; @@ -539,10 +541,15 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, struct in6_addr **daddr) { - if (opt->srcrt) + if (opt->srcrt) { ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); - if (opt->dst0opt) - ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); + /* + * IPV6_RTHDRDSTOPTS is ignored + * unless IPV6_RTHDR is set (RFC3542). + */ + if (opt->dst0opt) + ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); + } if (opt->hopopt) ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); } @@ -573,3 +580,97 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) } return opt2; } + +static int ipv6_renew_option(void *ohdr, + struct ipv6_opt_hdr __user *newopt, int newoptlen, + int inherit, + struct ipv6_opt_hdr **hdr, + char **p) +{ + if (inherit) { + if (ohdr) { + memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr)); + *hdr = (struct ipv6_opt_hdr *)*p; + *p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr)); + } + } else { + if (newopt) { + if (copy_from_user(*p, newopt, newoptlen)) + return -EFAULT; + *hdr = (struct ipv6_opt_hdr *)*p; + if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen) + return -EINVAL; + *p += CMSG_ALIGN(newoptlen); + } + } + return 0; +} + +struct ipv6_txoptions * +ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, + int newtype, + struct ipv6_opt_hdr __user *newopt, int newoptlen) +{ + int tot_len = 0; + char *p; + struct ipv6_txoptions *opt2; + int err; + + if (newtype != IPV6_HOPOPTS && opt->hopopt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt)); + if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt)); + if (newtype != IPV6_RTHDR && opt->srcrt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt)); + if (newtype != IPV6_DSTOPTS && opt->dst1opt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt)); + if (newopt && newoptlen) + tot_len += CMSG_ALIGN(newoptlen); + + if (!tot_len) + return NULL; + + opt2 = sock_kmalloc(sk, tot_len, GFP_ATOMIC); + if (!opt2) + return ERR_PTR(-ENOBUFS); + + memset(opt2, 0, tot_len); + + opt2->tot_len = tot_len; + p = (char *)(opt2 + 1); + + err = ipv6_renew_option(opt->hopopt, newopt, newoptlen, + newtype != IPV6_HOPOPTS, + &opt2->hopopt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->dst0opt, newopt, newoptlen, + newtype != IPV6_RTHDRDSTOPTS, + &opt2->dst0opt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->srcrt, newopt, newoptlen, + newtype != IPV6_RTHDR, + (struct ipv6_opt_hdr **)opt2->srcrt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->dst1opt, newopt, newoptlen, + newtype != IPV6_DSTOPTS, + &opt2->dst1opt, &p); + if (err) + goto out; + + opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) + + (opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) + + (opt2->srcrt ? ipv6_optlen(opt2->srcrt) : 0); + opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0); + + return opt2; +out: + sock_kfree_s(sk, p, tot_len); + return ERR_PTR(err); +} + diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b6c73da5ff35..2d5ce376c265 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -225,16 +225,20 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, struct ip6_flowlabel * fl, struct ipv6_txoptions * fopt) { - struct ipv6_txoptions * fl_opt = fl->opt; + struct ipv6_txoptions * fl_opt = fl ? fl->opt : NULL; - if (fopt == NULL || fopt->opt_flen == 0) - return fl_opt; + if (fopt == NULL || fopt->opt_flen == 0) { + if (!fl_opt || !fl_opt->dst0opt || fl_opt->srcrt) + return fl_opt; + } if (fl_opt != NULL) { opt_space->hopopt = fl_opt->hopopt; - opt_space->dst0opt = fl_opt->dst0opt; + opt_space->dst0opt = fl_opt->srcrt ? fl_opt->dst0opt : NULL; opt_space->srcrt = fl_opt->srcrt; opt_space->opt_nflen = fl_opt->opt_nflen; + if (fl_opt->dst0opt && !fl_opt->srcrt) + opt_space->opt_nflen -= ipv6_optlen(fl_opt->dst0opt); } else { if (fopt->opt_nflen == 0) return fopt; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 76466af8331e..dc1d9914bf7d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -210,39 +210,127 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; - case IPV6_PKTINFO: + case IPV6_RECVPKTINFO: np->rxopt.bits.rxinfo = valbool; retv = 0; break; + + case IPV6_2292PKTINFO: + np->rxopt.bits.rxoinfo = valbool; + retv = 0; + break; - case IPV6_HOPLIMIT: + case IPV6_RECVHOPLIMIT: np->rxopt.bits.rxhlim = valbool; retv = 0; break; - case IPV6_RTHDR: + case IPV6_2292HOPLIMIT: + np->rxopt.bits.rxohlim = valbool; + retv = 0; + break; + + case IPV6_RECVRTHDR: if (val < 0 || val > 2) goto e_inval; np->rxopt.bits.srcrt = val; retv = 0; break; - case IPV6_HOPOPTS: + case IPV6_2292RTHDR: + if (val < 0 || val > 2) + goto e_inval; + np->rxopt.bits.osrcrt = val; + retv = 0; + break; + + case IPV6_RECVHOPOPTS: np->rxopt.bits.hopopts = valbool; retv = 0; break; - case IPV6_DSTOPTS: + case IPV6_2292HOPOPTS: + np->rxopt.bits.ohopopts = valbool; + retv = 0; + break; + + case IPV6_RECVDSTOPTS: np->rxopt.bits.dstopts = valbool; retv = 0; break; + case IPV6_2292DSTOPTS: + np->rxopt.bits.odstopts = valbool; + retv = 0; + break; + case IPV6_FLOWINFO: np->rxopt.bits.rxflow = valbool; retv = 0; break; - case IPV6_PKTOPTIONS: + case IPV6_HOPOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_RTHDR: + case IPV6_DSTOPTS: + { + struct ipv6_txoptions *opt; + if (optlen == 0) + optval = 0; + + /* hop-by-hop / destination options are privileged option */ + retv = -EPERM; + if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) + break; + + retv = -EINVAL; + if (optlen & 0x7 || optlen > 8 * 255) + break; + + opt = ipv6_renew_options(sk, np->opt, optname, + (struct ipv6_opt_hdr __user *)optval, + optlen); + if (IS_ERR(opt)) { + retv = PTR_ERR(opt); + break; + } + + /* routing header option needs extra check */ + if (optname == IPV6_RTHDR && opt->srcrt) { + struct ipv6_rt_hdr *rthdr = opt->srcrt; + if (rthdr->type) + goto sticky_done; + if ((rthdr->hdrlen & 1) || + (rthdr->hdrlen >> 1) != rthdr->segments_left) + goto sticky_done; + } + + retv = 0; + if (sk->sk_type == SOCK_STREAM) { + if (opt) { + struct tcp_sock *tp = tcp_sk(sk); + if (!((1 << sk->sk_state) & + (TCPF_LISTEN | TCPF_CLOSE)) + && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { + tp->ext_header_len = opt->opt_flen + opt->opt_nflen; + tcp_sync_mss(sk, tp->pmtu_cookie); + } + } + opt = xchg(&np->opt, opt); + sk_dst_reset(sk); + } else { + write_lock(&sk->sk_dst_lock); + opt = xchg(&np->opt, opt); + write_unlock(&sk->sk_dst_lock); + sk_dst_reset(sk); + } +sticky_done: + if (opt) + sock_kfree_s(sk, opt, opt->tot_len); + break; + } + + case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; @@ -529,6 +617,17 @@ e_inval: return -EINVAL; } +int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_opt_hdr *hdr, + char __user *optval, int len) +{ + if (!hdr) + return 0; + len = min_t(int, len, ipv6_optlen(hdr)); + if (copy_to_user(optval, hdr, ipv6_optlen(hdr))) + return -EFAULT; + return len; +} + int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -567,7 +666,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, return err; } - case IPV6_PKTOPTIONS: + case IPV6_2292PKTOPTIONS: { struct msghdr msg; struct sk_buff *skb; @@ -601,6 +700,16 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxoinfo) { + struct in6_pktinfo src_info; + src_info.ipi6_ifindex = np->mcast_oif; + ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); + put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); + } + if (np->rxopt.bits.rxohlim) { + int hlim = np->mcast_hops; + put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); + } } len -= msg.msg_controllen; return put_user(len, optlen); @@ -625,26 +734,59 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->ipv6only; break; - case IPV6_PKTINFO: + case IPV6_RECVPKTINFO: val = np->rxopt.bits.rxinfo; break; - case IPV6_HOPLIMIT: + case IPV6_2292PKTINFO: + val = np->rxopt.bits.rxoinfo; + break; + + case IPV6_RECVHOPLIMIT: val = np->rxopt.bits.rxhlim; break; - case IPV6_RTHDR: + case IPV6_2292HOPLIMIT: + val = np->rxopt.bits.rxohlim; + break; + + case IPV6_RECVRTHDR: val = np->rxopt.bits.srcrt; break; + case IPV6_2292RTHDR: + val = np->rxopt.bits.osrcrt; + break; + case IPV6_HOPOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_RTHDR: + case IPV6_DSTOPTS: + { + + lock_sock(sk); + len = ipv6_getsockopt_sticky(sk, np->opt->hopopt, + optval, len); + release_sock(sk); + return put_user(len, optlen); + } + + case IPV6_RECVHOPOPTS: val = np->rxopt.bits.hopopts; break; - case IPV6_DSTOPTS: + case IPV6_2292HOPOPTS: + val = np->rxopt.bits.ohopopts; + break; + + case IPV6_RECVDSTOPTS: val = np->rxopt.bits.dstopts; break; + case IPV6_2292DSTOPTS: + val = np->rxopt.bits.odstopts; + break; + case IPV6_FLOWINFO: val = np->rxopt.bits.rxflow; break; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ed3a76b30fd9..e527a1652d7c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -755,8 +755,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, } if (opt == NULL) opt = np->opt; - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); + opt = fl6_merge_options(&opt_space, flowlabel, opt); fl.proto = proto; rawv6_probe_proto_opt(&fl, msg); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 794734f1d230..246414b27d0e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -849,7 +849,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, if (dst == NULL) { opt = np->opt; if (opt == NULL && - np->rxopt.bits.srcrt == 2 && + np->rxopt.bits.osrcrt == 2 && treq->pktopts) { struct sk_buff *pktopts = treq->pktopts; struct inet6_skb_parm *rxopt = IP6CB(pktopts); @@ -915,11 +915,10 @@ static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); if (np->rxopt.all) { - if ((opt->hop && np->rxopt.bits.hopopts) || - ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) && - np->rxopt.bits.rxflow) || - (opt->srcrt && np->rxopt.bits.srcrt) || - ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts)) + if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || + ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) || + (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || + ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) return 1; } return 0; @@ -1190,8 +1189,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, skb->h.th); treq->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || - np->rxopt.bits.rxhlim) { + np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); treq->pktopts = skb; } @@ -1288,7 +1287,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (np->rxopt.bits.srcrt == 2 && + if (np->rxopt.bits.osrcrt == 2 && opt == NULL && treq->pktopts) { struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts); if (rxopt->srcrt) @@ -1544,9 +1543,9 @@ ipv6_pktoptions: tp = tcp_sk(sk); if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - if (np->rxopt.bits.rxinfo) + if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = inet6_iif(opt_skb); - if (np->rxopt.bits.rxhlim) + if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = opt_skb->nh.ipv6h->hop_limit; if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 390d750449ce..aa6eaf3f18a6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -773,8 +773,7 @@ do_udp_sendmsg: } if (opt == NULL) opt = np->opt; - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); + opt = fl6_merge_options(&opt_space, flowlabel, opt); fl->proto = IPPROTO_UDP; ipv6_addr_copy(&fl->fl6_dst, daddr); From 41a1f8ea4fbfcdc4232f023732584aae2220de31 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 8 Sep 2005 10:19:03 +0900 Subject: [PATCH 02/13] [IPV6]: Support IPV6_{RECV,}TCLASS socket options / ancillary data. Based on patch from David L Stevens Signed-off-by: David L Stevens Signed-off-by: YOSHIFUJI Hideaki --- include/linux/in6.h | 2 -- include/linux/ipv6.h | 5 ++++- include/net/ipv6.h | 1 + include/net/transp_v6.h | 2 +- net/ipv6/datagram.c | 25 ++++++++++++++++++++++++- net/ipv6/icmp.c | 15 ++++++++++++--- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/ip6_output.c | 24 +++++++++++++++++------- net/ipv6/ipv6_sockglue.c | 22 +++++++++++++++++++++- net/ipv6/raw.c | 14 +++++++++++--- net/ipv6/udp.c | 16 ++++++++++++---- 11 files changed, 104 insertions(+), 24 deletions(-) diff --git a/include/linux/in6.h b/include/linux/in6.h index c11022f2f2ac..bd32b79d6295 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -219,9 +219,7 @@ struct in6_flowlabel_req #define IPV6_DSTOPTS 63 #define IPV6_RECVHOPLIMIT 64 #define IPV6_HOPLIMIT 65 -#if 0 #define IPV6_RECVTCLASS 66 #define IPV6_TCLASS 67 -#endif #endif diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 2581f1c94df5..6c5f7b39a4b0 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -245,7 +245,8 @@ struct ipv6_pinfo { ohopopts:1, dstopts:1, odstopts:1, - rxflow:1; + rxflow:1, + rxtclass:1; } bits; __u16 all; } rxopt; @@ -256,6 +257,7 @@ struct ipv6_pinfo { sndflow:1, pmtudisc:2, ipv6only:1; + __u8 tclass; __u32 dst_cookie; @@ -269,6 +271,7 @@ struct ipv6_pinfo { struct ipv6_txoptions *opt; struct rt6_info *rt; int hop_limit; + int tclass; } cork; }; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8a9fe9434e94..65ec86678a08 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -377,6 +377,7 @@ extern int ip6_append_data(struct sock *sk, int length, int transhdrlen, int hlimit, + int tclass, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 8b075ab7a26c..4e86f2de6638 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -37,7 +37,7 @@ extern int datagram_recv_ctl(struct sock *sk, extern int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, - int *hlimit); + int *hlimit, int *tclass); #define LOOPBACK4_IPV6 __constant_htonl(0x7f000006) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 832476bbc5cb..157cec648032 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -390,6 +390,11 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxtclass) { + int tclass = (ntohl(*(u32 *)skb->nh.ipv6h) >> 20) & 0xff; + put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); + } + if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) { u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK; put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); @@ -479,7 +484,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, - int *hlimit) + int *hlimit, int *tclass) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -682,6 +687,24 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, *hlimit = *(int *)CMSG_DATA(cmsg); break; + case IPV6_TCLASS: + { + int tc; + + err = -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { + goto exit_f; + } + + tc = *(int *)CMSG_DATA(cmsg); + if (tc < 0 || tc > 0xff) + goto exit_f; + + err = 0; + *tclass = tc; + + break; + } default: LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fa8f1bb0aa52..34e99c55e856 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -287,7 +287,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, int iif = 0; int addr_type = 0; int len; - int hlimit; + int hlimit, tclass; int err = 0; if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail) @@ -385,6 +385,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + msg.skb = skb; msg.offset = skb->nh.raw - skb->data; @@ -400,7 +404,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), - hlimit, NULL, &fl, (struct rt6_info*)dst, + hlimit, tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT); if (err) { ip6_flush_pending_frames(sk); @@ -434,6 +438,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + int tclass; saddr = &skb->nh.ipv6h->daddr; @@ -475,13 +480,17 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + idev = in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, NULL, &fl, + sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT); if (err) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 2d5ce376c265..a7db762de14a 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -314,7 +314,7 @@ fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int * msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk); + err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 01ef94f7c7f1..2f589f24c093 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -166,7 +166,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct ipv6hdr *hdr; u8 proto = fl->proto; int seg_len = skb->len; - int hlimit; + int hlimit, tclass; u32 mtu; if (opt) { @@ -202,7 +202,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, * Fill in the IPv6 header */ - *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel; hlimit = -1; if (np) hlimit = np->hop_limit; @@ -211,6 +210,14 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = -1; + if (np) + tclass = np->tclass; + if (tclass < 0) + tclass = 0; + + *(u32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; + hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; @@ -762,10 +769,11 @@ out_err_release: return err; } -int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, - unsigned int flags) +int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, + int offset, int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, + struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -803,6 +811,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse np->cork.rt = rt; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; + np->cork.tclass = tclass; inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); if (dst_allfrag(rt->u.dst.path)) inet->cork.flags |= IPCORK_ALLFRAG; @@ -1084,7 +1093,8 @@ int ip6_push_pending_frames(struct sock *sk) skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr)); - *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000); + *(u32*)hdr = fl->fl6_flowlabel | + htonl(0x60000000 | ((int)np->cork.tclass << 20)); if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index dc1d9914bf7d..8567873d0dd8 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -264,6 +264,18 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; + case IPV6_TCLASS: + if (val < 0 || val > 0xff) + goto e_inval; + np->tclass = val; + retv = 0; + break; + + case IPV6_RECVTCLASS: + np->rxopt.bits.rxtclass = valbool; + retv = 0; + break; + case IPV6_FLOWINFO: np->rxopt.bits.rxflow = valbool; retv = 0; @@ -364,7 +376,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(&msg, &fl, opt, &junk); + retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); if (retv) goto done; update: @@ -787,6 +799,14 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->rxopt.bits.odstopts; break; + case IPV6_TCLASS: + val = np->tclass; + break; + + case IPV6_RECVTCLASS: + val = np->rxopt.bits.rxtclass; + break; + case IPV6_FLOWINFO: val = np->rxopt.bits.rxflow; break; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e527a1652d7c..2ad37893334a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -655,6 +655,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct flowi fl; int addr_len = msg->msg_namelen; int hlimit = -1; + int tclass = -1; u16 proto; int err; @@ -740,7 +741,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(msg, &fl, opt, &hlimit); + err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -797,6 +798,12 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, hlimit = ipv6_get_hoplimit(dst->dev); } + if (tclass < 0) { + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + } + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -805,8 +812,9 @@ back_from_confirm: err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); } else { lock_sock(sk); - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, - hlimit, opt, &fl, (struct rt6_info*)dst, msg->msg_flags); + err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, + len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, + msg->msg_flags); if (err) ip6_flush_pending_frames(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index aa6eaf3f18a6..dbd18a9d1669 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -637,6 +637,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int addr_len = msg->msg_namelen; int ulen = len; int hlimit = -1; + int tclass = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; @@ -758,7 +759,7 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, fl, opt, &hlimit); + err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -814,6 +815,12 @@ do_udp_sendmsg: hlimit = ipv6_get_hoplimit(dst->dev); } + if (tclass < 0) { + tclass = np->tclass; + if (tclass < 0) + tclass = 0; + } + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -833,9 +840,10 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), - hlimit, opt, fl, (struct rt6_info*)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), hlimit, tclass, opt, fl, + (struct rt6_info*)dst, + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) From 0e4e4220f10bf8f58a8606f0cb28538088c64b1a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 8 Sep 2005 12:32:03 -0700 Subject: [PATCH 03/13] [NET]: Optimize pskb_trim_rcsum() Since packets almost never contain extra garbage at the end, it is worthwhile to optimize for that case. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index da7da9c0ed1b..2741c0c55e83 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1167,7 +1167,7 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { - if (len >= skb->len) + if (likely(len >= skb->len)) return 0; if (skb->ip_summed == CHECKSUM_HW) skb->ip_summed = CHECKSUM_NONE; From e308e25c97f06cf704e65eeb773412f5460a3b93 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 8 Sep 2005 12:32:21 -0700 Subject: [PATCH 04/13] [IPV4] udp: trim forgets about CHECKSUM_HW A UDP packet may contain extra data that needs to be trimmed off. But when doing so, UDP forgets to fixup the skb checksum if CHECKSUM_HW is being used. I think this explains the case of a NFS receive using skge driver causing 'udp hw checksum failures' when interacting with a crufty settop box. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e5beca7de86c..e0bd1013cb0d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,7 +1141,7 @@ int udp_rcv(struct sk_buff *skb) if (ulen > len || ulen < sizeof(*uh)) goto short_packet; - if (pskb_trim(skb, ulen)) + if (pskb_trim_rcsum(skb, ulen)) goto short_packet; if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0) From e50ef933e649a2b43aa10c8a60c491543b8b4c02 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 8 Sep 2005 12:32:46 -0700 Subject: [PATCH 05/13] [NET]: Need struct sock forward decl in net/compat.h Else we get build failures like: CC arch/sparc64/kernel/sparc64_ksyms.o In file included from arch/sparc64/kernel/sparc64_ksyms.c:28: include/net/compat.h:37: warning: "struct sock" declared inside parameter list include/net/compat.h:37: warning: its scope is only this definition or declaration, which is probably not what you want Signed-off-by: David S. Miller --- include/net/compat.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/compat.h b/include/net/compat.h index 482eb820f13a..290bab46d457 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -33,7 +33,8 @@ extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsi extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *); extern int put_cmsg_compat(struct msghdr*, int, int, int, void *); -extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsigned char *, - int); + +struct sock; +extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsigned char *, int); #endif /* NET_COMPAT_H */ From 42ca89c18b75e1c4c3b02aa5589ad3aa916909a8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 8 Sep 2005 12:57:43 -0700 Subject: [PATCH 06/13] [IPV6]: Need to use pskb_trim_rcsum(). Fix pskb_trim usage in ipv6. Only the udp one is really a bug, other places are just doing equivalent code. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 9 ++++----- net/ipv6/reassembly.c | 9 +++------ net/ipv6/udp.c | 2 +- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5be6da2584ee..3b9fa900a4bf 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -459,11 +459,10 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } - if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { - __pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr)); - if (skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_NONE; - } + + if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) + goto drop; + return 1; drop: diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 9d9e04344c77..e4fe9ee484dd 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -479,12 +479,9 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) goto err; - if (end-offset < skb->len) { - if (pskb_trim(skb, end - offset)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } + + if (pskb_trim_rcsum(skb, end - offset)) + goto err; /* Find out which fragments are in front and at the back of us * in the chain of fragments so far. We must know where to put diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 390d750449ce..7cbcaa30cf5e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -483,7 +483,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) } if (ulen < skb->len) { - if (__pskb_trim(skb, ulen)) + if (pskb_trim_rcsum(skb, ulen)) goto discard; saddr = &skb->nh.ipv6h->saddr; daddr = &skb->nh.ipv6h->daddr; From c921e4c4dbb043f9435414c4e661e7f0a783053d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 8 Sep 2005 13:15:32 -0700 Subject: [PATCH 07/13] [BNX2]: Fix bug in irq handler and add prefetch Fix bug in bnx2_interrupt() that caused an unnecessary register read. The BNX2_PCICFG_MISC_STATUS should only be read when the status tag has not changed. Add prefetch of the status block in bnx2_msi() similar to tg3_msi(). The status block is not touched in bnx2_msi() and prefetching it will speed up bnx2_poll() that will run on the same CPU that received the MSI. Update version. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 7 ++++--- drivers/net/bnx2.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 55a72c7ad001..83598e32179c 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -14,8 +14,8 @@ #define DRV_MODULE_NAME "bnx2" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.2.20" -#define DRV_MODULE_RELDATE "August 22, 2005" +#define DRV_MODULE_VERSION "1.2.21" +#define DRV_MODULE_RELDATE "September 7, 2005" #define RUN_AT(x) (jiffies + (x)) @@ -1533,6 +1533,7 @@ bnx2_msi(int irq, void *dev_instance, struct pt_regs *regs) struct net_device *dev = dev_instance; struct bnx2 *bp = dev->priv; + prefetch(bp->status_blk); REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, BNX2_PCICFG_INT_ACK_CMD_USE_INT_HC_PARAM | BNX2_PCICFG_INT_ACK_CMD_MASK_INT); @@ -1558,7 +1559,7 @@ bnx2_interrupt(int irq, void *dev_instance, struct pt_regs *regs) * When using MSI, the MSI message will always complete after * the status block write. */ - if ((bp->status_blk->status_idx == bp->last_status_idx) || + if ((bp->status_blk->status_idx == bp->last_status_idx) && (REG_RD(bp, BNX2_PCICFG_MISC_STATUS) & BNX2_PCICFG_MISC_STATUS_INTA_VALUE)) return IRQ_NONE; diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index 9ad3f5740cd8..62857b6a6ee4 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h @@ -50,6 +50,7 @@ #endif #include #include +#include /* Hardware data structures and register definitions automatically * generated from RTL code. Do not modify. From ce723d8e048ef98ea64d12379e3921c933f5b3e0 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 8 Sep 2005 13:34:47 -0700 Subject: [PATCH 08/13] [IPV4]: Fix refcount damaging in net/ipv4/route.c One such place that can damage the dst refcnts is route.c with CONFIG_IP_ROUTE_MULTIPATH_CACHED enabled, i don't see the user's .config. In this new code i see that rt_intern_hash is called before dst->refcnt is set to 1, dst is the 2nd arg to rt_intern_hash. Arg 2 of rt_intern_hash must come with refcnt 1 as it is added to table or dropped depending on error/add/update. One such example is ip_mkroute_input where __mkroute_input return rth with refcnt 0 which is provided to rt_intern_hash. ip_mkroute_output looks like a 2nd such place. Appending untested patch for comments and review. The idea is to put previous reference as we are going to return next result/error. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/route.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8c0b14e3beec..8549f26e2495 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1760,6 +1760,7 @@ static inline int __mkroute_input(struct sk_buff *skb, goto cleanup; } + atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED if (res->fi->fib_nhs > 1) @@ -1820,7 +1821,6 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); if (err) return err; - atomic_set(&rth->u.dst.__refcnt, 1); /* put it into the cache */ hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); @@ -1834,8 +1834,8 @@ static inline int ip_mkroute_input(struct sk_buff *skb, u32 daddr, u32 saddr, u32 tos) { #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct rtable* rth = NULL; - unsigned char hop, hopcount, lasthop; + struct rtable* rth = NULL, *rtres; + unsigned char hop, hopcount; int err = -EINVAL; unsigned int hash; @@ -1844,8 +1844,6 @@ static inline int ip_mkroute_input(struct sk_buff *skb, else hopcount = 1; - lasthop = hopcount - 1; - /* distinguish between multipath and singlepath */ if (hopcount < 2) return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, @@ -1855,6 +1853,10 @@ static inline int ip_mkroute_input(struct sk_buff *skb, for (hop = 0; hop < hopcount; hop++) { res->nh_sel = hop; + /* put reference to previous result */ + if (hop) + ip_rt_put(rtres); + /* create a routing cache entry */ err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); @@ -1863,7 +1865,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); - err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); + err = rt_intern_hash(hash, rth, &rtres); if (err) return err; @@ -1873,13 +1875,8 @@ static inline int ip_mkroute_input(struct sk_buff *skb, FIB_RES_NETMASK(*res), res->prefixlen, &FIB_RES_NH(*res)); - - /* only for the last hop the reference count is handled - * outside - */ - if (hop == lasthop) - atomic_set(&(skb->dst->__refcnt), 1); } + skb->dst = &rtres->u.dst; return err; #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); @@ -2208,6 +2205,7 @@ static inline int __mkroute_output(struct rtable **result, goto cleanup; } + atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED if (res->fi) { @@ -2290,8 +2288,6 @@ static inline int ip_mkroute_output_def(struct rtable **rp, if (err == 0) { u32 tos = RT_FL_TOS(oldflp); - atomic_set(&rth->u.dst.__refcnt, 1); - hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ (oldflp->oif << 5), tos); err = rt_intern_hash(hash, rth, rp); @@ -2326,6 +2322,10 @@ static inline int ip_mkroute_output(struct rtable** rp, dev2nexthop = FIB_RES_DEV(*res); dev_hold(dev2nexthop); + /* put reference to previous result */ + if (hop) + ip_rt_put(*rp); + err = __mkroute_output(&rth, res, fl, oldflp, dev2nexthop, flags); @@ -2350,7 +2350,6 @@ static inline int ip_mkroute_output(struct rtable** rp, if (err != 0) return err; } - atomic_set(&(*rp)->u.dst.__refcnt, 1); return err; } else { return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, From 3a93481589dc80d9ff9082731f35031b0345442e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 8 Sep 2005 13:36:34 -0700 Subject: [PATCH 09/13] [NETFILTER]: ip_conntrack_netbios_ns.c gcc-2.95.x build fix gcc-2.95.x can't do this sort of initialisation Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netbios_ns.c | 24 ++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 2b5cf9c51309..bb7246683b74 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -104,12 +104,28 @@ out: static struct ip_conntrack_helper helper = { .name = "netbios-ns", .tuple = { - .src.u.udp.port = __constant_htons(137), - .dst.protonum = IPPROTO_UDP, + .src = { + .u = { + .udp = { + .port = __constant_htons(137), + } + } + }, + .dst = { + .protonum = IPPROTO_UDP, + }, }, .mask = { - .src.u.udp.port = 0xFFFF, - .dst.protonum = 0xFF, + .src = { + .u = { + .udp = { + .port = 0xFFFF, + } + } + }, + .dst = { + .protonum = 0xFF, + }, }, .max_expected = 1, .me = THIS_MODULE, From baed16a7ff5194487764db300c2753ac7409c4c5 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 8 Sep 2005 13:40:41 -0700 Subject: [PATCH 10/13] [AX.25]: Make asc2ax() thread-proof Asc2ax was still using a static buffer for all invocations which isn't exactly SMP-safe. Change asc2ax to take an additional result buffer as the argument. Change all callers to provide such a buffer. This one only really is a fix for ROSE and as per recent discussions there's still much more to fix in ROSE ... Signed-off-by: Ralf Baechle DL5RB Signed-off-by: David S. Miller --- include/net/ax25.h | 2 +- net/ax25/ax25_addr.c | 27 ++++++++++++--------------- net/rose/rose_subr.c | 4 ++-- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index 364b046e9f47..227d3378decd 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -258,7 +258,7 @@ extern struct sock *ax25_make_new(struct sock *, struct ax25_dev *); /* ax25_addr.c */ extern ax25_address null_ax25_address; extern char *ax2asc(char *buf, ax25_address *); -extern ax25_address *asc2ax(char *); +extern void asc2ax(ax25_address *addr, char *callsign); extern int ax25cmp(ax25_address *, ax25_address *); extern int ax25digicmp(ax25_digi *, ax25_digi *); extern unsigned char *ax25_addr_parse(unsigned char *, int, ax25_address *, ax25_address *, ax25_digi *, int *, int *); diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c index dca179daf415..0164a155b8c4 100644 --- a/net/ax25/ax25_addr.c +++ b/net/ax25/ax25_addr.c @@ -67,37 +67,34 @@ char *ax2asc(char *buf, ax25_address *a) /* * ascii -> ax25 conversion */ -ax25_address *asc2ax(char *callsign) +void asc2ax(ax25_address *addr, char *callsign) { - static ax25_address addr; char *s; int n; for (s = callsign, n = 0; n < 6; n++) { if (*s != '\0' && *s != '-') - addr.ax25_call[n] = *s++; + addr->ax25_call[n] = *s++; else - addr.ax25_call[n] = ' '; - addr.ax25_call[n] <<= 1; - addr.ax25_call[n] &= 0xFE; + addr->ax25_call[n] = ' '; + addr->ax25_call[n] <<= 1; + addr->ax25_call[n] &= 0xFE; } if (*s++ == '\0') { - addr.ax25_call[6] = 0x00; - return &addr; + addr->ax25_call[6] = 0x00; + return; } - addr.ax25_call[6] = *s++ - '0'; + addr->ax25_call[6] = *s++ - '0'; if (*s != '\0') { - addr.ax25_call[6] *= 10; - addr.ax25_call[6] += *s++ - '0'; + addr->ax25_call[6] *= 10; + addr->ax25_call[6] += *s++ - '0'; } - addr.ax25_call[6] <<= 1; - addr.ax25_call[6] &= 0x1E; - - return &addr; + addr->ax25_call[6] <<= 1; + addr->ax25_call[6] &= 0x1E; } /* diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 02891ce2db37..36a77944622b 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -337,13 +337,13 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN); memcpy(callsign, p + 12, l - 10); callsign[l - 10] = '\0'; - facilities->source_call = *asc2ax(callsign); + asc2ax(&facilities->source_call, callsign); } if (*p == FAC_CCITT_SRC_NSAP) { memcpy(&facilities->dest_addr, p + 7, ROSE_ADDR_LEN); memcpy(callsign, p + 12, l - 10); callsign[l - 10] = '\0'; - facilities->dest_call = *asc2ax(callsign); + asc2ax(&facilities->dest_call, callsign); } p += l + 2; n += l + 2; From a57ebc90f1350296edded12d33d7c278831bc3bf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 8 Sep 2005 14:27:47 -0700 Subject: [PATCH 11/13] [IPV6]: Don't redo xfrm_lookup for cached dst entries The xfrm lookup is already done when the dst entry is looked up first and stored in the cache. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 09613729404c..cf94372d1af3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -673,11 +673,12 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); - else + else { dst = ip6_route_output(NULL, &fl); - if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0) - goto tx_err_link_failure; + if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0) + goto tx_err_link_failure; + } tdev = dst->dev; From cf0b450cd5176b68ac7d5bbe68aeae6bb6a5a4b8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 8 Sep 2005 15:10:52 -0700 Subject: [PATCH 12/13] [TCP]: Fix off by one in tcp_fragment() "already sent" test. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6094db5e11be..15e1134da1b2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -499,7 +499,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss /* If this packet has been sent out already, we must * adjust the various packet counters. */ - if (after(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { + if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { int diff = old_factor - tcp_skb_pcount(skb) - tcp_skb_pcount(buff); From e104411b82f5c4d19752c335492036abdbf5880d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 8 Sep 2005 15:11:55 -0700 Subject: [PATCH 13/13] [XFRM]: Always release dst_entry on error in xfrm_lookup Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_REJECT.c | 5 +---- net/ipv6/datagram.c | 4 +--- net/ipv6/icmp.c | 5 ++--- net/ipv6/ndisc.c | 16 ++++------------ net/ipv6/netfilter/ip6t_REJECT.c | 5 +---- net/ipv6/raw.c | 4 +--- net/ipv6/tcp_ipv6.c | 15 +++------------ net/ipv6/udp.c | 4 +--- net/xfrm/xfrm_policy.c | 8 ++++---- 9 files changed, 18 insertions(+), 48 deletions(-) diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index f115a84a4ac6..f057025a719e 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -92,10 +92,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, fl.fl_ip_sport = tcph->dest; fl.fl_ip_dport = tcph->source; - if (xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0)) { - dst_release(&rt->u.dst); - rt = NULL; - } + xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); return rt; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 157cec648032..cc518405b3e1 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -175,10 +175,8 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out; - } /* source address lookup done in ip6_dst_lookup */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 34e99c55e856..b7185fb3377c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -374,7 +374,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (err) goto out; if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) - goto out_dst_release; + goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; @@ -469,7 +469,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (err) goto out; if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) - goto out_dst_release; + goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; @@ -505,7 +505,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) out_put: if (likely(idev != NULL)) in6_dev_put(idev); -out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a7eae30f4554..555a31347eda 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -447,10 +447,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) { - dst_release(dst); + if (err < 0) return; - } if (inc_opt) { if (dev->addr_len) @@ -539,10 +537,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) { - dst_release(dst); + if (err < 0) return; - } len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); send_llinfo = dev->addr_len && !ipv6_addr_any(saddr); @@ -616,10 +612,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) { - dst_release(dst); + if (err < 0) return; - } len = sizeof(struct icmp6hdr); if (dev->addr_len) @@ -1353,10 +1347,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err) { - dst_release(dst); + if (err) return; - } rt = (struct rt6_info *) dst; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 14316c3ebde4..b03e87adca93 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -100,11 +100,8 @@ static void send_reset(struct sk_buff *oldskb) dst = ip6_route_output(NULL, &fl); if (dst == NULL) return; - if (dst->error || - xfrm_lookup(&dst, &fl, NULL, 0)) { - dst_release(dst); + if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) return; - } hh_len = (dst->dev->hard_header_len + 15)&~15; nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2ad37893334a..5aa3691c578d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -782,10 +782,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out; - } if (hlimit < 0) { if (ipv6_addr_is_multicast(&fl.fl6_dst)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 246414b27d0e..80643e6b346b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -632,10 +632,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto failure; - } if (saddr == NULL) { saddr = &fl.fl6_src; @@ -888,7 +886,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, } done: - dst_release(dst); if (opt && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); return err; @@ -1000,10 +997,8 @@ static void tcp_v6_send_reset(struct sk_buff *skb) /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { - if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) { - dst_release(buff->dst); + if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) return; - } ip6_xmit(NULL, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); @@ -1067,10 +1062,8 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 fl.fl_ip_sport = t1->source; if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { - if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) { - dst_release(buff->dst); + if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) return; - } ip6_xmit(NULL, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); return; @@ -1733,7 +1726,6 @@ static int tcp_v6_rebuild_header(struct sock *sk) if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; - dst_release(dst); return err; } @@ -1786,7 +1778,6 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { sk->sk_route_caps = 0; - dst_release(dst); return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f5ae14810a70..69b146843a20 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -799,10 +799,8 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl->fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) goto out; - } if (hlimit < 0) { if (ipv6_addr_is_multicast(&fl->fl6_dst)) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 83c8135e1764..fda737d77edc 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -765,8 +765,8 @@ restart: switch (policy->action) { case XFRM_POLICY_BLOCK: /* Prohibit the flow */ - xfrm_pol_put(policy); - return -EPERM; + err = -EPERM; + goto error; case XFRM_POLICY_ALLOW: if (policy->xfrm_nr == 0) { @@ -782,8 +782,8 @@ restart: */ dst = xfrm_find_bundle(fl, policy, family); if (IS_ERR(dst)) { - xfrm_pol_put(policy); - return PTR_ERR(dst); + err = PTR_ERR(dst); + goto error; } if (dst)