2010-02-18 12:31:05 +01:00
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/ip.h>
|
|
|
|
#include <linux/sctp.h>
|
|
|
|
#include <net/ip.h>
|
|
|
|
#include <net/ip6_checksum.h>
|
|
|
|
#include <linux/netfilter.h>
|
|
|
|
#include <linux/netfilter_ipv4.h>
|
|
|
|
#include <net/sctp/checksum.h>
|
|
|
|
#include <net/ip_vs.h>
|
|
|
|
|
|
|
|
static int
|
2011-01-03 14:44:51 +01:00
|
|
|
sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
int *verdict, struct ip_vs_conn **cpp,
|
|
|
|
struct ip_vs_iphdr *iph)
|
2010-02-18 12:31:05 +01:00
|
|
|
{
|
2011-01-03 14:44:43 +01:00
|
|
|
struct net *net;
|
2010-02-18 12:31:05 +01:00
|
|
|
struct ip_vs_service *svc;
|
2013-06-13 09:56:15 +02:00
|
|
|
struct netns_ipvs *ipvs;
|
2010-02-18 12:31:05 +01:00
|
|
|
sctp_chunkhdr_t _schunkh, *sch;
|
|
|
|
sctp_sctphdr_t *sh, _sctph;
|
|
|
|
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
|
2013-10-25 11:05:04 +02:00
|
|
|
if (sh == NULL) {
|
|
|
|
*verdict = NF_DROP;
|
2010-02-18 12:31:05 +01:00
|
|
|
return 0;
|
2013-10-25 11:05:04 +02:00
|
|
|
}
|
2010-02-18 12:31:05 +01:00
|
|
|
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
|
2010-02-18 12:31:05 +01:00
|
|
|
sizeof(_schunkh), &_schunkh);
|
2013-10-25 11:05:04 +02:00
|
|
|
if (sch == NULL) {
|
|
|
|
*verdict = NF_DROP;
|
2010-02-18 12:31:05 +01:00
|
|
|
return 0;
|
2013-10-25 11:05:04 +02:00
|
|
|
}
|
|
|
|
|
2011-01-03 14:44:43 +01:00
|
|
|
net = skb_net(skb);
|
2013-06-13 09:56:15 +02:00
|
|
|
ipvs = net_ipvs(net);
|
2013-03-22 10:46:53 +01:00
|
|
|
rcu_read_lock();
|
2013-06-13 09:56:15 +02:00
|
|
|
if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) &&
|
2013-03-22 10:46:53 +01:00
|
|
|
(svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
|
|
|
|
&iph->daddr, sh->dest))) {
|
2010-10-17 15:24:37 +02:00
|
|
|
int ignored;
|
|
|
|
|
2013-06-13 09:56:15 +02:00
|
|
|
if (ip_vs_todrop(ipvs)) {
|
2010-02-18 12:31:05 +01:00
|
|
|
/*
|
|
|
|
* It seems that we are very loaded.
|
|
|
|
* We have to drop this packet :(
|
|
|
|
*/
|
2013-03-22 10:46:53 +01:00
|
|
|
rcu_read_unlock();
|
2010-02-18 12:31:05 +01:00
|
|
|
*verdict = NF_DROP;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Let the virtual server select a real server for the
|
|
|
|
* incoming connection, and create a connection entry.
|
|
|
|
*/
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
*cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
|
2010-11-19 14:25:10 +01:00
|
|
|
if (!*cpp && ignored <= 0) {
|
|
|
|
if (!ignored)
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
*verdict = ip_vs_leave(svc, skb, pd, iph);
|
2013-03-22 10:46:53 +01:00
|
|
|
else
|
2010-11-19 14:25:10 +01:00
|
|
|
*verdict = NF_DROP;
|
2013-03-22 10:46:53 +01:00
|
|
|
rcu_read_unlock();
|
2010-02-18 12:31:05 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
2013-03-22 10:46:53 +01:00
|
|
|
rcu_read_unlock();
|
2010-11-19 14:25:10 +01:00
|
|
|
/* NF_ACCEPT */
|
2010-02-18 12:31:05 +01:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2013-02-05 17:21:31 +01:00
|
|
|
static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
|
|
|
|
unsigned int sctphoff)
|
|
|
|
{
|
2013-07-25 03:52:05 +02:00
|
|
|
sctph->checksum = sctp_compute_cksum(skb, sctphoff);
|
2013-02-05 17:21:31 +01:00
|
|
|
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
|
|
}
|
|
|
|
|
2010-02-18 12:31:05 +01:00
|
|
|
static int
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
|
|
struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
|
2010-02-18 12:31:05 +01:00
|
|
|
{
|
|
|
|
sctp_sctphdr_t *sctph;
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
unsigned int sctphoff = iph->len;
|
net: ipvs: sctp: do not recalc sctp csum when ports didn't change
Unlike UDP or TCP, we do not take the pseudo-header into
account in SCTP checksums. So in case port mapping is the
very same, we do not need to recalculate the whole SCTP
checksum in software, which is very expensive.
Also, similarly as in TCP, take into account when a private
helper mangled the packet. In that case, we also need to
recalculate the checksum even if ports might be same.
Thanks for feedback regarding skb->ip_summed checks from
Julian Anastasov; here's a discussion on these checks for
snat and dnat:
* For snat_handler(), we can see CHECKSUM_PARTIAL from
virtual devices, and from LOCAL_OUT, otherwise it
should be CHECKSUM_UNNECESSARY. In general, in snat it
is more complex. skb contains the original route and
ip_vs_route_me_harder() can change the route after
snat_handler. So, for locally generated replies from
local server we can not preserve the CHECKSUM_PARTIAL
mode. It is an chicken or egg dilemma: snat_handler
needs the device after rerouting (to check for
NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants
the snat_handler() to put the new saddr for proper
rerouting.
* For dnat_handler(), we should not see CHECKSUM_COMPLETE
for SCTP, in fact the small set of drivers that support
SCTP offloading return CHECKSUM_UNNECESSARY on correctly
received SCTP csum. We can see CHECKSUM_PARTIAL from
local stack or received from virtual drivers. The idea is
that SCTP decides to avoid csum calculation if hardware
supports offloading. IPVS can change the device after
rerouting to real server but we can preserve the
CHECKSUM_PARTIAL mode if the new device supports
offloading too. This works because skb dst is changed
before dnat_handler and we see the new device. So, checks
in the 'if' part will decide whether it is ok to keep
CHECKSUM_PARTIAL for the output. If the packet was with
CHECKSUM_NONE, hence we deal with unknown checksum. As we
recalculate the sum for IP header in all cases, it should
be safe to use CHECKSUM_UNNECESSARY. We can forward wrong
checksum in this case (without cp->app). In case of
CHECKSUM_UNNECESSARY, the csum was valid on receive.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-10-28 10:56:20 +01:00
|
|
|
bool payload_csum = false;
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
if (cp->af == AF_INET6 && iph->fragoffs)
|
ipvs: Fix faulty IPv6 extension header handling in IPVS
IPv6 packets can contain extension headers, thus its wrong to assume
that the transport/upper-layer header, starts right after (struct
ipv6hdr) the IPv6 header. IPVS uses this false assumption, and will
write SNAT & DNAT modifications at a fixed pos which will corrupt the
message.
To fix this, proper header position must be found before modifying
packets. Introducing ip_vs_fill_iph_skb(), which uses ipv6_find_hdr()
to skip the exthdrs. It finds (1) the transport header offset, (2) the
protocol, and (3) detects if the packet is a fragment.
Note, that fragments in IPv6 is represented via an exthdr. Thus, this
is detected while skipping through the exthdrs.
This patch depends on commit 84018f55a:
"netfilter: ip6_tables: add flags parameter to ipv6_find_hdr()"
This also adds a dependency to ip6_tables.
Originally based on patch from: Hans Schillstrom
kABI notes:
Changing struct ip_vs_iphdr is a potential minor kABI breaker,
because external modules can be compiled with another version of
this struct. This should not matter, as they would most-likely
be using a compiled-in version of ip_vs_fill_iphdr(). When
recompiled, they will notice ip_vs_fill_iphdr() no longer exists,
and they have to used ip_vs_fill_iph_skb() instead.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:06:41 +02:00
|
|
|
return 1;
|
2010-02-18 12:31:05 +01:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* csum_check requires unshared skb */
|
|
|
|
if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (unlikely(cp->app != NULL)) {
|
net: ipvs: sctp: do not recalc sctp csum when ports didn't change
Unlike UDP or TCP, we do not take the pseudo-header into
account in SCTP checksums. So in case port mapping is the
very same, we do not need to recalculate the whole SCTP
checksum in software, which is very expensive.
Also, similarly as in TCP, take into account when a private
helper mangled the packet. In that case, we also need to
recalculate the checksum even if ports might be same.
Thanks for feedback regarding skb->ip_summed checks from
Julian Anastasov; here's a discussion on these checks for
snat and dnat:
* For snat_handler(), we can see CHECKSUM_PARTIAL from
virtual devices, and from LOCAL_OUT, otherwise it
should be CHECKSUM_UNNECESSARY. In general, in snat it
is more complex. skb contains the original route and
ip_vs_route_me_harder() can change the route after
snat_handler. So, for locally generated replies from
local server we can not preserve the CHECKSUM_PARTIAL
mode. It is an chicken or egg dilemma: snat_handler
needs the device after rerouting (to check for
NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants
the snat_handler() to put the new saddr for proper
rerouting.
* For dnat_handler(), we should not see CHECKSUM_COMPLETE
for SCTP, in fact the small set of drivers that support
SCTP offloading return CHECKSUM_UNNECESSARY on correctly
received SCTP csum. We can see CHECKSUM_PARTIAL from
local stack or received from virtual drivers. The idea is
that SCTP decides to avoid csum calculation if hardware
supports offloading. IPVS can change the device after
rerouting to real server but we can preserve the
CHECKSUM_PARTIAL mode if the new device supports
offloading too. This works because skb dst is changed
before dnat_handler and we see the new device. So, checks
in the 'if' part will decide whether it is ok to keep
CHECKSUM_PARTIAL for the output. If the packet was with
CHECKSUM_NONE, hence we deal with unknown checksum. As we
recalculate the sum for IP header in all cases, it should
be safe to use CHECKSUM_UNNECESSARY. We can forward wrong
checksum in this case (without cp->app). In case of
CHECKSUM_UNNECESSARY, the csum was valid on receive.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-10-28 10:56:20 +01:00
|
|
|
int ret;
|
|
|
|
|
2010-02-18 12:31:05 +01:00
|
|
|
/* Some checks before mangling */
|
|
|
|
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Call application helper if needed */
|
net: ipvs: sctp: do not recalc sctp csum when ports didn't change
Unlike UDP or TCP, we do not take the pseudo-header into
account in SCTP checksums. So in case port mapping is the
very same, we do not need to recalculate the whole SCTP
checksum in software, which is very expensive.
Also, similarly as in TCP, take into account when a private
helper mangled the packet. In that case, we also need to
recalculate the checksum even if ports might be same.
Thanks for feedback regarding skb->ip_summed checks from
Julian Anastasov; here's a discussion on these checks for
snat and dnat:
* For snat_handler(), we can see CHECKSUM_PARTIAL from
virtual devices, and from LOCAL_OUT, otherwise it
should be CHECKSUM_UNNECESSARY. In general, in snat it
is more complex. skb contains the original route and
ip_vs_route_me_harder() can change the route after
snat_handler. So, for locally generated replies from
local server we can not preserve the CHECKSUM_PARTIAL
mode. It is an chicken or egg dilemma: snat_handler
needs the device after rerouting (to check for
NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants
the snat_handler() to put the new saddr for proper
rerouting.
* For dnat_handler(), we should not see CHECKSUM_COMPLETE
for SCTP, in fact the small set of drivers that support
SCTP offloading return CHECKSUM_UNNECESSARY on correctly
received SCTP csum. We can see CHECKSUM_PARTIAL from
local stack or received from virtual drivers. The idea is
that SCTP decides to avoid csum calculation if hardware
supports offloading. IPVS can change the device after
rerouting to real server but we can preserve the
CHECKSUM_PARTIAL mode if the new device supports
offloading too. This works because skb dst is changed
before dnat_handler and we see the new device. So, checks
in the 'if' part will decide whether it is ok to keep
CHECKSUM_PARTIAL for the output. If the packet was with
CHECKSUM_NONE, hence we deal with unknown checksum. As we
recalculate the sum for IP header in all cases, it should
be safe to use CHECKSUM_UNNECESSARY. We can forward wrong
checksum in this case (without cp->app). In case of
CHECKSUM_UNNECESSARY, the csum was valid on receive.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-10-28 10:56:20 +01:00
|
|
|
ret = ip_vs_app_pkt_out(cp, skb);
|
|
|
|
if (ret == 0)
|
2010-02-18 12:31:05 +01:00
|
|
|
return 0;
|
net: ipvs: sctp: do not recalc sctp csum when ports didn't change
Unlike UDP or TCP, we do not take the pseudo-header into
account in SCTP checksums. So in case port mapping is the
very same, we do not need to recalculate the whole SCTP
checksum in software, which is very expensive.
Also, similarly as in TCP, take into account when a private
helper mangled the packet. In that case, we also need to
recalculate the checksum even if ports might be same.
Thanks for feedback regarding skb->ip_summed checks from
Julian Anastasov; here's a discussion on these checks for
snat and dnat:
* For snat_handler(), we can see CHECKSUM_PARTIAL from
virtual devices, and from LOCAL_OUT, otherwise it
should be CHECKSUM_UNNECESSARY. In general, in snat it
is more complex. skb contains the original route and
ip_vs_route_me_harder() can change the route after
snat_handler. So, for locally generated replies from
local server we can not preserve the CHECKSUM_PARTIAL
mode. It is an chicken or egg dilemma: snat_handler
needs the device after rerouting (to check for
NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants
the snat_handler() to put the new saddr for proper
rerouting.
* For dnat_handler(), we should not see CHECKSUM_COMPLETE
for SCTP, in fact the small set of drivers that support
SCTP offloading return CHECKSUM_UNNECESSARY on correctly
received SCTP csum. We can see CHECKSUM_PARTIAL from
local stack or received from virtual drivers. The idea is
that SCTP decides to avoid csum calculation if hardware
supports offloading. IPVS can change the device after
rerouting to real server but we can preserve the
CHECKSUM_PARTIAL mode if the new device supports
offloading too. This works because skb dst is changed
before dnat_handler and we see the new device. So, checks
in the 'if' part will decide whether it is ok to keep
CHECKSUM_PARTIAL for the output. If the packet was with
CHECKSUM_NONE, hence we deal with unknown checksum. As we
recalculate the sum for IP header in all cases, it should
be safe to use CHECKSUM_UNNECESSARY. We can forward wrong
checksum in this case (without cp->app). In case of
CHECKSUM_UNNECESSARY, the csum was valid on receive.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-10-28 10:56:20 +01:00
|
|
|
/* ret=2: csum update is needed after payload mangling */
|
|
|
|
if (ret == 2)
|
|
|
|
payload_csum = true;
|
2010-02-18 12:31:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
sctph = (void *) skb_network_header(skb) + sctphoff;
|
|
|
|
|
net: ipvs: sctp: do not recalc sctp csum when ports didn't change
Unlike UDP or TCP, we do not take the pseudo-header into
account in SCTP checksums. So in case port mapping is the
very same, we do not need to recalculate the whole SCTP
checksum in software, which is very expensive.
Also, similarly as in TCP, take into account when a private
helper mangled the packet. In that case, we also need to
recalculate the checksum even if ports might be same.
Thanks for feedback regarding skb->ip_summed checks from
Julian Anastasov; here's a discussion on these checks for
snat and dnat:
* For snat_handler(), we can see CHECKSUM_PARTIAL from
virtual devices, and from LOCAL_OUT, otherwise it
should be CHECKSUM_UNNECESSARY. In general, in snat it
is more complex. skb contains the original route and
ip_vs_route_me_harder() can change the route after
snat_handler. So, for locally generated replies from
local server we can not preserve the CHECKSUM_PARTIAL
mode. It is an chicken or egg dilemma: snat_handler
needs the device after rerouting (to check for
NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants
the snat_handler() to put the new saddr for proper
rerouting.
* For dnat_handler(), we should not see CHECKSUM_COMPLETE
for SCTP, in fact the small set of drivers that support
SCTP offloading return CHECKSUM_UNNECESSARY on correctly
received SCTP csum. We can see CHECKSUM_PARTIAL from
local stack or received from virtual drivers. The idea is
that SCTP decides to avoid csum calculation if hardware
supports offloading. IPVS can change the device after
rerouting to real server but we can preserve the
CHECKSUM_PARTIAL mode if the new device supports
offloading too. This works because skb dst is changed
before dnat_handler and we see the new device. So, checks
in the 'if' part will decide whether it is ok to keep
CHECKSUM_PARTIAL for the output. If the packet was with
CHECKSUM_NONE, hence we deal with unknown checksum. As we
recalculate the sum for IP header in all cases, it should
be safe to use CHECKSUM_UNNECESSARY. We can forward wrong
checksum in this case (without cp->app). In case of
CHECKSUM_UNNECESSARY, the csum was valid on receive.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-10-28 10:56:20 +01:00
|
|
|
/* Only update csum if we really have to */
|
|
|
|
if (sctph->source != cp->vport || payload_csum ||
|
|
|
|
skb->ip_summed == CHECKSUM_PARTIAL) {
|
|
|
|
sctph->source = cp->vport;
|
|
|
|
sctp_nat_csum(skb, sctph, sctphoff);
|
|
|
|
} else {
|
|
|
|
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
|
|
}
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
|
|
struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
|
2010-02-18 12:31:05 +01:00
|
|
|
{
|
|
|
|
sctp_sctphdr_t *sctph;
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
unsigned int sctphoff = iph->len;
|
net: ipvs: sctp: do not recalc sctp csum when ports didn't change
Unlike UDP or TCP, we do not take the pseudo-header into
account in SCTP checksums. So in case port mapping is the
very same, we do not need to recalculate the whole SCTP
checksum in software, which is very expensive.
Also, similarly as in TCP, take into account when a private
helper mangled the packet. In that case, we also need to
recalculate the checksum even if ports might be same.
Thanks for feedback regarding skb->ip_summed checks from
Julian Anastasov; here's a discussion on these checks for
snat and dnat:
* For snat_handler(), we can see CHECKSUM_PARTIAL from
virtual devices, and from LOCAL_OUT, otherwise it
should be CHECKSUM_UNNECESSARY. In general, in snat it
is more complex. skb contains the original route and
ip_vs_route_me_harder() can change the route after
snat_handler. So, for locally generated replies from
local server we can not preserve the CHECKSUM_PARTIAL
mode. It is an chicken or egg dilemma: snat_handler
needs the device after rerouting (to check for
NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants
the snat_handler() to put the new saddr for proper
rerouting.
* For dnat_handler(), we should not see CHECKSUM_COMPLETE
for SCTP, in fact the small set of drivers that support
SCTP offloading return CHECKSUM_UNNECESSARY on correctly
received SCTP csum. We can see CHECKSUM_PARTIAL from
local stack or received from virtual drivers. The idea is
that SCTP decides to avoid csum calculation if hardware
supports offloading. IPVS can change the device after
rerouting to real server but we can preserve the
CHECKSUM_PARTIAL mode if the new device supports
offloading too. This works because skb dst is changed
before dnat_handler and we see the new device. So, checks
in the 'if' part will decide whether it is ok to keep
CHECKSUM_PARTIAL for the output. If the packet was with
CHECKSUM_NONE, hence we deal with unknown checksum. As we
recalculate the sum for IP header in all cases, it should
be safe to use CHECKSUM_UNNECESSARY. We can forward wrong
checksum in this case (without cp->app). In case of
CHECKSUM_UNNECESSARY, the csum was valid on receive.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-10-28 10:56:20 +01:00
|
|
|
bool payload_csum = false;
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
ipvs: API change to avoid rescan of IPv6 exthdr
Reduce the number of times we scan/skip the IPv6 exthdrs.
This patch contains a lot of API changes. This is done, to avoid
repeating the scan of finding the IPv6 headers, via ipv6_find_hdr(),
which is called by ip_vs_fill_iph_skb().
Finding the IPv6 headers is done as early as possible, and passed on
as a pointer "struct ip_vs_iphdr *" to the affected functions.
This patch reduce/removes 19 calls to ip_vs_fill_iph_skb().
Notice, I have choosen, not to change the API of function
pointer "(*schedule)" (in struct ip_vs_scheduler) as it can be
used by external schedulers, via {un,}register_ip_vs_scheduler.
Only 4 out of 10 schedulers use info from ip_vs_iphdr*, and when
they do, they are only interested in iph->{s,d}addr.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:07:17 +02:00
|
|
|
if (cp->af == AF_INET6 && iph->fragoffs)
|
ipvs: Fix faulty IPv6 extension header handling in IPVS
IPv6 packets can contain extension headers, thus its wrong to assume
that the transport/upper-layer header, starts right after (struct
ipv6hdr) the IPv6 header. IPVS uses this false assumption, and will
write SNAT & DNAT modifications at a fixed pos which will corrupt the
message.
To fix this, proper header position must be found before modifying
packets. Introducing ip_vs_fill_iph_skb(), which uses ipv6_find_hdr()
to skip the exthdrs. It finds (1) the transport header offset, (2) the
protocol, and (3) detects if the packet is a fragment.
Note, that fragments in IPv6 is represented via an exthdr. Thus, this
is detected while skipping through the exthdrs.
This patch depends on commit 84018f55a:
"netfilter: ip6_tables: add flags parameter to ipv6_find_hdr()"
This also adds a dependency to ip6_tables.
Originally based on patch from: Hans Schillstrom
kABI notes:
Changing struct ip_vs_iphdr is a potential minor kABI breaker,
because external modules can be compiled with another version of
this struct. This should not matter, as they would most-likely
be using a compiled-in version of ip_vs_fill_iphdr(). When
recompiled, they will notice ip_vs_fill_iphdr() no longer exists,
and they have to used ip_vs_fill_iph_skb() instead.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2012-09-26 14:06:41 +02:00
|
|
|
return 1;
|
2010-02-18 12:31:05 +01:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* csum_check requires unshared skb */
|
|
|
|
if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (unlikely(cp->app != NULL)) {
|
net: ipvs: sctp: do not recalc sctp csum when ports didn't change
Unlike UDP or TCP, we do not take the pseudo-header into
account in SCTP checksums. So in case port mapping is the
very same, we do not need to recalculate the whole SCTP
checksum in software, which is very expensive.
Also, similarly as in TCP, take into account when a private
helper mangled the packet. In that case, we also need to
recalculate the checksum even if ports might be same.
Thanks for feedback regarding skb->ip_summed checks from
Julian Anastasov; here's a discussion on these checks for
snat and dnat:
* For snat_handler(), we can see CHECKSUM_PARTIAL from
virtual devices, and from LOCAL_OUT, otherwise it
should be CHECKSUM_UNNECESSARY. In general, in snat it
is more complex. skb contains the original route and
ip_vs_route_me_harder() can change the route after
snat_handler. So, for locally generated replies from
local server we can not preserve the CHECKSUM_PARTIAL
mode. It is an chicken or egg dilemma: snat_handler
needs the device after rerouting (to check for
NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants
the snat_handler() to put the new saddr for proper
rerouting.
* For dnat_handler(), we should not see CHECKSUM_COMPLETE
for SCTP, in fact the small set of drivers that support
SCTP offloading return CHECKSUM_UNNECESSARY on correctly
received SCTP csum. We can see CHECKSUM_PARTIAL from
local stack or received from virtual drivers. The idea is
that SCTP decides to avoid csum calculation if hardware
supports offloading. IPVS can change the device after
rerouting to real server but we can preserve the
CHECKSUM_PARTIAL mode if the new device supports
offloading too. This works because skb dst is changed
before dnat_handler and we see the new device. So, checks
in the 'if' part will decide whether it is ok to keep
CHECKSUM_PARTIAL for the output. If the packet was with
CHECKSUM_NONE, hence we deal with unknown checksum. As we
recalculate the sum for IP header in all cases, it should
be safe to use CHECKSUM_UNNECESSARY. We can forward wrong
checksum in this case (without cp->app). In case of
CHECKSUM_UNNECESSARY, the csum was valid on receive.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-10-28 10:56:20 +01:00
|
|
|
int ret;
|
|
|
|
|
2010-02-18 12:31:05 +01:00
|
|
|
/* Some checks before mangling */
|
|
|
|
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Call application helper if needed */
|
net: ipvs: sctp: do not recalc sctp csum when ports didn't change
Unlike UDP or TCP, we do not take the pseudo-header into
account in SCTP checksums. So in case port mapping is the
very same, we do not need to recalculate the whole SCTP
checksum in software, which is very expensive.
Also, similarly as in TCP, take into account when a private
helper mangled the packet. In that case, we also need to
recalculate the checksum even if ports might be same.
Thanks for feedback regarding skb->ip_summed checks from
Julian Anastasov; here's a discussion on these checks for
snat and dnat:
* For snat_handler(), we can see CHECKSUM_PARTIAL from
virtual devices, and from LOCAL_OUT, otherwise it
should be CHECKSUM_UNNECESSARY. In general, in snat it
is more complex. skb contains the original route and
ip_vs_route_me_harder() can change the route after
snat_handler. So, for locally generated replies from
local server we can not preserve the CHECKSUM_PARTIAL
mode. It is an chicken or egg dilemma: snat_handler
needs the device after rerouting (to check for
NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants
the snat_handler() to put the new saddr for proper
rerouting.
* For dnat_handler(), we should not see CHECKSUM_COMPLETE
for SCTP, in fact the small set of drivers that support
SCTP offloading return CHECKSUM_UNNECESSARY on correctly
received SCTP csum. We can see CHECKSUM_PARTIAL from
local stack or received from virtual drivers. The idea is
that SCTP decides to avoid csum calculation if hardware
supports offloading. IPVS can change the device after
rerouting to real server but we can preserve the
CHECKSUM_PARTIAL mode if the new device supports
offloading too. This works because skb dst is changed
before dnat_handler and we see the new device. So, checks
in the 'if' part will decide whether it is ok to keep
CHECKSUM_PARTIAL for the output. If the packet was with
CHECKSUM_NONE, hence we deal with unknown checksum. As we
recalculate the sum for IP header in all cases, it should
be safe to use CHECKSUM_UNNECESSARY. We can forward wrong
checksum in this case (without cp->app). In case of
CHECKSUM_UNNECESSARY, the csum was valid on receive.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-10-28 10:56:20 +01:00
|
|
|
ret = ip_vs_app_pkt_in(cp, skb);
|
|
|
|
if (ret == 0)
|
2010-02-18 12:31:05 +01:00
|
|
|
return 0;
|
net: ipvs: sctp: do not recalc sctp csum when ports didn't change
Unlike UDP or TCP, we do not take the pseudo-header into
account in SCTP checksums. So in case port mapping is the
very same, we do not need to recalculate the whole SCTP
checksum in software, which is very expensive.
Also, similarly as in TCP, take into account when a private
helper mangled the packet. In that case, we also need to
recalculate the checksum even if ports might be same.
Thanks for feedback regarding skb->ip_summed checks from
Julian Anastasov; here's a discussion on these checks for
snat and dnat:
* For snat_handler(), we can see CHECKSUM_PARTIAL from
virtual devices, and from LOCAL_OUT, otherwise it
should be CHECKSUM_UNNECESSARY. In general, in snat it
is more complex. skb contains the original route and
ip_vs_route_me_harder() can change the route after
snat_handler. So, for locally generated replies from
local server we can not preserve the CHECKSUM_PARTIAL
mode. It is an chicken or egg dilemma: snat_handler
needs the device after rerouting (to check for
NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants
the snat_handler() to put the new saddr for proper
rerouting.
* For dnat_handler(), we should not see CHECKSUM_COMPLETE
for SCTP, in fact the small set of drivers that support
SCTP offloading return CHECKSUM_UNNECESSARY on correctly
received SCTP csum. We can see CHECKSUM_PARTIAL from
local stack or received from virtual drivers. The idea is
that SCTP decides to avoid csum calculation if hardware
supports offloading. IPVS can change the device after
rerouting to real server but we can preserve the
CHECKSUM_PARTIAL mode if the new device supports
offloading too. This works because skb dst is changed
before dnat_handler and we see the new device. So, checks
in the 'if' part will decide whether it is ok to keep
CHECKSUM_PARTIAL for the output. If the packet was with
CHECKSUM_NONE, hence we deal with unknown checksum. As we
recalculate the sum for IP header in all cases, it should
be safe to use CHECKSUM_UNNECESSARY. We can forward wrong
checksum in this case (without cp->app). In case of
CHECKSUM_UNNECESSARY, the csum was valid on receive.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-10-28 10:56:20 +01:00
|
|
|
/* ret=2: csum update is needed after payload mangling */
|
|
|
|
if (ret == 2)
|
|
|
|
payload_csum = true;
|
2010-02-18 12:31:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
sctph = (void *) skb_network_header(skb) + sctphoff;
|
|
|
|
|
net: ipvs: sctp: do not recalc sctp csum when ports didn't change
Unlike UDP or TCP, we do not take the pseudo-header into
account in SCTP checksums. So in case port mapping is the
very same, we do not need to recalculate the whole SCTP
checksum in software, which is very expensive.
Also, similarly as in TCP, take into account when a private
helper mangled the packet. In that case, we also need to
recalculate the checksum even if ports might be same.
Thanks for feedback regarding skb->ip_summed checks from
Julian Anastasov; here's a discussion on these checks for
snat and dnat:
* For snat_handler(), we can see CHECKSUM_PARTIAL from
virtual devices, and from LOCAL_OUT, otherwise it
should be CHECKSUM_UNNECESSARY. In general, in snat it
is more complex. skb contains the original route and
ip_vs_route_me_harder() can change the route after
snat_handler. So, for locally generated replies from
local server we can not preserve the CHECKSUM_PARTIAL
mode. It is an chicken or egg dilemma: snat_handler
needs the device after rerouting (to check for
NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants
the snat_handler() to put the new saddr for proper
rerouting.
* For dnat_handler(), we should not see CHECKSUM_COMPLETE
for SCTP, in fact the small set of drivers that support
SCTP offloading return CHECKSUM_UNNECESSARY on correctly
received SCTP csum. We can see CHECKSUM_PARTIAL from
local stack or received from virtual drivers. The idea is
that SCTP decides to avoid csum calculation if hardware
supports offloading. IPVS can change the device after
rerouting to real server but we can preserve the
CHECKSUM_PARTIAL mode if the new device supports
offloading too. This works because skb dst is changed
before dnat_handler and we see the new device. So, checks
in the 'if' part will decide whether it is ok to keep
CHECKSUM_PARTIAL for the output. If the packet was with
CHECKSUM_NONE, hence we deal with unknown checksum. As we
recalculate the sum for IP header in all cases, it should
be safe to use CHECKSUM_UNNECESSARY. We can forward wrong
checksum in this case (without cp->app). In case of
CHECKSUM_UNNECESSARY, the csum was valid on receive.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-10-28 10:56:20 +01:00
|
|
|
/* Only update csum if we really have to */
|
|
|
|
if (sctph->dest != cp->dport || payload_csum ||
|
|
|
|
(skb->ip_summed == CHECKSUM_PARTIAL &&
|
|
|
|
!(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
|
|
|
|
sctph->dest = cp->dport;
|
|
|
|
sctp_nat_csum(skb, sctph, sctphoff);
|
|
|
|
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
|
|
|
|
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
|
|
}
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
|
|
|
|
{
|
|
|
|
unsigned int sctphoff;
|
|
|
|
struct sctphdr *sh, _sctph;
|
2013-07-25 03:52:05 +02:00
|
|
|
__le32 cmp, val;
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
|
if (af == AF_INET6)
|
|
|
|
sctphoff = sizeof(struct ipv6hdr);
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
sctphoff = ip_hdrlen(skb);
|
|
|
|
|
|
|
|
sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
|
|
|
|
if (sh == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
cmp = sh->checksum;
|
2013-07-25 03:52:05 +02:00
|
|
|
val = sctp_compute_cksum(skb, sctphoff);
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
if (val != cmp) {
|
|
|
|
/* CRC failure, dump it. */
|
2010-10-17 15:46:17 +02:00
|
|
|
IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
|
2010-02-18 12:31:05 +01:00
|
|
|
"Failed checksum for");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
enum ipvs_sctp_event_t {
|
2013-06-18 09:08:07 +02:00
|
|
|
IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */
|
|
|
|
IP_VS_SCTP_INIT,
|
|
|
|
IP_VS_SCTP_INIT_ACK,
|
|
|
|
IP_VS_SCTP_COOKIE_ECHO,
|
|
|
|
IP_VS_SCTP_COOKIE_ACK,
|
|
|
|
IP_VS_SCTP_SHUTDOWN,
|
|
|
|
IP_VS_SCTP_SHUTDOWN_ACK,
|
|
|
|
IP_VS_SCTP_SHUTDOWN_COMPLETE,
|
|
|
|
IP_VS_SCTP_ERROR,
|
|
|
|
IP_VS_SCTP_ABORT,
|
|
|
|
IP_VS_SCTP_EVENT_LAST
|
2010-02-18 12:31:05 +01:00
|
|
|
};
|
|
|
|
|
2013-06-18 09:08:07 +02:00
|
|
|
/* RFC 2960, 3.2 Chunk Field Descriptions */
|
|
|
|
static __u8 sctp_events[] = {
|
|
|
|
[SCTP_CID_DATA] = IP_VS_SCTP_DATA,
|
|
|
|
[SCTP_CID_INIT] = IP_VS_SCTP_INIT,
|
|
|
|
[SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK,
|
|
|
|
[SCTP_CID_SACK] = IP_VS_SCTP_DATA,
|
|
|
|
[SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA,
|
|
|
|
[SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA,
|
|
|
|
[SCTP_CID_ABORT] = IP_VS_SCTP_ABORT,
|
|
|
|
[SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN,
|
|
|
|
[SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK,
|
|
|
|
[SCTP_CID_ERROR] = IP_VS_SCTP_ERROR,
|
|
|
|
[SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO,
|
|
|
|
[SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK,
|
|
|
|
[SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA,
|
|
|
|
[SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA,
|
|
|
|
[SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE,
|
2010-02-18 12:31:05 +01:00
|
|
|
};
|
|
|
|
|
2013-06-18 09:08:07 +02:00
|
|
|
/* SCTP States:
|
|
|
|
* See RFC 2960, 4. SCTP Association State Diagram
|
|
|
|
*
|
|
|
|
* New states (not in diagram):
|
|
|
|
* - INIT1 state: use shorter timeout for dropped INIT packets
|
|
|
|
* - REJECTED state: use shorter timeout if INIT is rejected with ABORT
|
|
|
|
* - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
|
|
|
|
*
|
|
|
|
* The states are as seen in real server. In the diagram, INIT1, INIT,
|
|
|
|
* COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
|
|
|
|
*
|
|
|
|
* States as per packets from client (C) and server (S):
|
|
|
|
*
|
|
|
|
* Setup of client connection:
|
|
|
|
* IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
|
|
|
|
* IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
|
|
|
|
* IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
|
|
|
|
* IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
|
|
|
|
*
|
|
|
|
* Setup of server connection:
|
|
|
|
* IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
|
|
|
|
* IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
|
|
|
|
* IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
|
|
|
|
*/
|
2010-02-18 12:31:05 +01:00
|
|
|
|
2013-06-18 09:08:07 +02:00
|
|
|
#define sNO IP_VS_SCTP_S_NONE
|
|
|
|
#define sI1 IP_VS_SCTP_S_INIT1
|
|
|
|
#define sIN IP_VS_SCTP_S_INIT
|
|
|
|
#define sCS IP_VS_SCTP_S_COOKIE_SENT
|
|
|
|
#define sCR IP_VS_SCTP_S_COOKIE_REPLIED
|
|
|
|
#define sCW IP_VS_SCTP_S_COOKIE_WAIT
|
|
|
|
#define sCO IP_VS_SCTP_S_COOKIE
|
|
|
|
#define sCE IP_VS_SCTP_S_COOKIE_ECHOED
|
|
|
|
#define sES IP_VS_SCTP_S_ESTABLISHED
|
|
|
|
#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
|
|
|
|
#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
|
|
|
|
#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
|
|
|
|
#define sRJ IP_VS_SCTP_S_REJECTED
|
|
|
|
#define sCL IP_VS_SCTP_S_CLOSED
|
|
|
|
|
|
|
|
static const __u8 sctp_states
|
|
|
|
[IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
|
|
|
|
{ /* INPUT */
|
|
|
|
/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
|
|
|
|
/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
|
|
|
|
/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
|
|
|
|
/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
|
|
|
|
/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
|
|
|
|
},
|
|
|
|
{ /* OUTPUT */
|
|
|
|
/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
|
|
|
|
/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
|
|
|
|
/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
|
|
|
|
/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
|
|
|
|
},
|
|
|
|
{ /* INPUT-ONLY */
|
|
|
|
/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
|
|
|
|
/* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
|
|
|
|
/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
|
|
|
|
/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
|
|
|
|
/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
|
|
|
|
/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
|
|
|
|
},
|
2010-02-18 12:31:05 +01:00
|
|
|
};
|
|
|
|
|
2013-06-18 09:08:07 +02:00
|
|
|
#define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ)
|
|
|
|
|
|
|
|
/* Timeout table[state] */
|
2011-01-03 14:44:49 +01:00
|
|
|
static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
|
2013-06-18 09:08:07 +02:00
|
|
|
[IP_VS_SCTP_S_NONE] = 2 * HZ,
|
|
|
|
[IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ,
|
|
|
|
[IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO,
|
|
|
|
[IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO,
|
|
|
|
[IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO,
|
|
|
|
[IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO,
|
|
|
|
[IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO,
|
|
|
|
[IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO,
|
|
|
|
[IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ,
|
|
|
|
[IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO,
|
|
|
|
[IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO,
|
|
|
|
[IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO,
|
|
|
|
[IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ,
|
|
|
|
[IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO,
|
|
|
|
[IP_VS_SCTP_S_LAST] = 2 * HZ,
|
2010-02-18 12:31:05 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
|
2013-06-18 09:08:07 +02:00
|
|
|
[IP_VS_SCTP_S_NONE] = "NONE",
|
|
|
|
[IP_VS_SCTP_S_INIT1] = "INIT1",
|
|
|
|
[IP_VS_SCTP_S_INIT] = "INIT",
|
|
|
|
[IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT",
|
|
|
|
[IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED",
|
|
|
|
[IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT",
|
|
|
|
[IP_VS_SCTP_S_COOKIE] = "COOKIE",
|
|
|
|
[IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED",
|
|
|
|
[IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED",
|
|
|
|
[IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT",
|
|
|
|
[IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED",
|
|
|
|
[IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT",
|
|
|
|
[IP_VS_SCTP_S_REJECTED] = "REJECTED",
|
|
|
|
[IP_VS_SCTP_S_CLOSED] = "CLOSED",
|
|
|
|
[IP_VS_SCTP_S_LAST] = "BUG!",
|
2010-02-18 12:31:05 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
static const char *sctp_state_name(int state)
|
|
|
|
{
|
|
|
|
if (state >= IP_VS_SCTP_S_LAST)
|
|
|
|
return "ERR!";
|
|
|
|
if (sctp_state_name_table[state])
|
|
|
|
return sctp_state_name_table[state];
|
|
|
|
return "?";
|
|
|
|
}
|
|
|
|
|
2011-09-16 07:11:49 +02:00
|
|
|
static inline void
|
2011-01-03 14:44:51 +01:00
|
|
|
set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
|
2010-02-18 12:31:05 +01:00
|
|
|
int direction, const struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
sctp_chunkhdr_t _sctpch, *sch;
|
|
|
|
unsigned char chunk_type;
|
|
|
|
int event, next_state;
|
2013-03-09 22:25:06 +01:00
|
|
|
int ihl, cofs;
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
|
ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
|
|
|
|
#else
|
|
|
|
ihl = ip_hdrlen(skb);
|
|
|
|
#endif
|
|
|
|
|
2013-03-09 22:25:06 +01:00
|
|
|
cofs = ihl + sizeof(sctp_sctphdr_t);
|
|
|
|
sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
|
2010-02-18 12:31:05 +01:00
|
|
|
if (sch == NULL)
|
2011-09-16 07:11:49 +02:00
|
|
|
return;
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
chunk_type = sch->type;
|
|
|
|
/*
|
|
|
|
* Section 3: Multiple chunks can be bundled into one SCTP packet
|
|
|
|
* up to the MTU size, except for the INIT, INIT ACK, and
|
|
|
|
* SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
|
|
|
|
* any other chunk in a packet.
|
|
|
|
*
|
|
|
|
* Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
|
|
|
|
* chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
|
|
|
|
* bundled with an ABORT, but they MUST be placed before the ABORT
|
|
|
|
* in the SCTP packet or they will be ignored by the receiver.
|
|
|
|
*/
|
|
|
|
if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
|
|
|
|
(sch->type == SCTP_CID_COOKIE_ACK)) {
|
2013-03-09 22:25:06 +01:00
|
|
|
int clen = ntohs(sch->length);
|
|
|
|
|
|
|
|
if (clen >= sizeof(sctp_chunkhdr_t)) {
|
|
|
|
sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
|
|
|
|
sizeof(_sctpch), &_sctpch);
|
|
|
|
if (sch && sch->type == SCTP_CID_ABORT)
|
2010-02-18 12:31:05 +01:00
|
|
|
chunk_type = sch->type;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-18 09:08:07 +02:00
|
|
|
event = (chunk_type < sizeof(sctp_events)) ?
|
|
|
|
sctp_events[chunk_type] : IP_VS_SCTP_DATA;
|
2010-02-18 12:31:05 +01:00
|
|
|
|
2013-06-18 09:08:07 +02:00
|
|
|
/* Update direction to INPUT_ONLY if necessary
|
|
|
|
* or delete NO_OUTPUT flag if output packet detected
|
2010-02-18 12:31:05 +01:00
|
|
|
*/
|
2013-06-18 09:08:07 +02:00
|
|
|
if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
|
|
|
|
if (direction == IP_VS_DIR_OUTPUT)
|
|
|
|
cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
|
|
|
|
else
|
|
|
|
direction = IP_VS_DIR_INPUT_ONLY;
|
|
|
|
}
|
|
|
|
|
|
|
|
next_state = sctp_states[direction][event][cp->state];
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
if (next_state != cp->state) {
|
|
|
|
struct ip_vs_dest *dest = cp->dest;
|
|
|
|
|
|
|
|
IP_VS_DBG_BUF(8, "%s %s %s:%d->"
|
|
|
|
"%s:%d state: %s->%s conn->refcnt:%d\n",
|
2011-01-03 14:44:51 +01:00
|
|
|
pd->pp->name,
|
2010-02-18 12:31:05 +01:00
|
|
|
((direction == IP_VS_DIR_OUTPUT) ?
|
|
|
|
"output " : "input "),
|
|
|
|
IP_VS_DBG_ADDR(cp->af, &cp->daddr),
|
|
|
|
ntohs(cp->dport),
|
|
|
|
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
|
|
|
|
ntohs(cp->cport),
|
|
|
|
sctp_state_name(cp->state),
|
|
|
|
sctp_state_name(next_state),
|
|
|
|
atomic_read(&cp->refcnt));
|
|
|
|
if (dest) {
|
|
|
|
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
|
|
|
|
(next_state != IP_VS_SCTP_S_ESTABLISHED)) {
|
|
|
|
atomic_dec(&dest->activeconns);
|
|
|
|
atomic_inc(&dest->inactconns);
|
|
|
|
cp->flags |= IP_VS_CONN_F_INACTIVE;
|
|
|
|
} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
|
|
|
|
(next_state == IP_VS_SCTP_S_ESTABLISHED)) {
|
|
|
|
atomic_inc(&dest->activeconns);
|
|
|
|
atomic_dec(&dest->inactconns);
|
|
|
|
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-01-03 14:44:49 +01:00
|
|
|
if (likely(pd))
|
|
|
|
cp->timeout = pd->timeout_table[cp->state = next_state];
|
|
|
|
else /* What to do ? */
|
|
|
|
cp->timeout = sctp_timeouts[cp->state = next_state];
|
2010-02-18 12:31:05 +01:00
|
|
|
}
|
|
|
|
|
2011-09-16 07:11:49 +02:00
|
|
|
static void
|
2010-02-18 12:31:05 +01:00
|
|
|
sctp_state_transition(struct ip_vs_conn *cp, int direction,
|
2011-01-03 14:44:51 +01:00
|
|
|
const struct sk_buff *skb, struct ip_vs_proto_data *pd)
|
2010-02-18 12:31:05 +01:00
|
|
|
{
|
2013-03-22 10:46:54 +01:00
|
|
|
spin_lock_bh(&cp->lock);
|
2011-09-16 07:11:49 +02:00
|
|
|
set_sctp_state(pd, cp, direction, skb);
|
2013-03-22 10:46:54 +01:00
|
|
|
spin_unlock_bh(&cp->lock);
|
2010-02-18 12:31:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline __u16 sctp_app_hashkey(__be16 port)
|
|
|
|
{
|
|
|
|
return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
|
|
|
|
& SCTP_APP_TAB_MASK;
|
|
|
|
}
|
|
|
|
|
2011-01-03 14:44:53 +01:00
|
|
|
static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
|
2010-02-18 12:31:05 +01:00
|
|
|
{
|
|
|
|
struct ip_vs_app *i;
|
|
|
|
__u16 hash;
|
|
|
|
__be16 port = inc->port;
|
|
|
|
int ret = 0;
|
2011-01-03 14:44:53 +01:00
|
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
|
|
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
hash = sctp_app_hashkey(port);
|
|
|
|
|
2011-01-03 14:44:49 +01:00
|
|
|
list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
|
2010-02-18 12:31:05 +01:00
|
|
|
if (i->port == port) {
|
|
|
|
ret = -EEXIST;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
2013-03-21 10:58:07 +01:00
|
|
|
list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
|
2011-01-03 14:44:52 +01:00
|
|
|
atomic_inc(&pd->appcnt);
|
2010-02-18 12:31:05 +01:00
|
|
|
out:
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-01-03 14:44:53 +01:00
|
|
|
static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
|
2010-02-18 12:31:05 +01:00
|
|
|
{
|
2011-01-03 14:44:53 +01:00
|
|
|
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
|
2011-01-03 14:44:49 +01:00
|
|
|
|
2011-01-03 14:44:52 +01:00
|
|
|
atomic_dec(&pd->appcnt);
|
2013-03-21 10:58:07 +01:00
|
|
|
list_del_rcu(&inc->p_list);
|
2010-02-18 12:31:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
|
|
|
|
{
|
2011-01-03 14:44:57 +01:00
|
|
|
struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
|
2010-02-18 12:31:05 +01:00
|
|
|
int hash;
|
|
|
|
struct ip_vs_app *inc;
|
|
|
|
int result = 0;
|
|
|
|
|
|
|
|
/* Default binding: bind app only for NAT */
|
|
|
|
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
|
|
|
|
return 0;
|
|
|
|
/* Lookup application incarnations and bind the right one */
|
|
|
|
hash = sctp_app_hashkey(cp->vport);
|
|
|
|
|
2013-03-21 10:58:07 +01:00
|
|
|
rcu_read_lock();
|
|
|
|
list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
|
2010-02-18 12:31:05 +01:00
|
|
|
if (inc->port == cp->vport) {
|
|
|
|
if (unlikely(!ip_vs_app_inc_get(inc)))
|
|
|
|
break;
|
2013-03-21 10:58:07 +01:00
|
|
|
rcu_read_unlock();
|
2010-02-18 12:31:05 +01:00
|
|
|
|
|
|
|
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
|
|
|
|
"%s:%u to app %s on port %u\n",
|
|
|
|
__func__,
|
|
|
|
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
|
|
|
|
ntohs(cp->cport),
|
|
|
|
IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
|
|
|
|
ntohs(cp->vport),
|
|
|
|
inc->name, ntohs(inc->port));
|
|
|
|
cp->app = inc;
|
|
|
|
if (inc->init_conn)
|
|
|
|
result = inc->init_conn(inc, cp);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
2013-03-21 10:58:07 +01:00
|
|
|
rcu_read_unlock();
|
2010-02-18 12:31:05 +01:00
|
|
|
out:
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2011-01-03 14:44:49 +01:00
|
|
|
/* ---------------------------------------------
|
|
|
|
* timeouts is netns related now.
|
|
|
|
* ---------------------------------------------
|
|
|
|
*/
|
2012-04-26 09:45:35 +02:00
|
|
|
static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
|
2010-02-18 12:31:05 +01:00
|
|
|
{
|
2011-01-03 14:44:49 +01:00
|
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
2010-02-18 12:31:05 +01:00
|
|
|
|
2011-01-03 14:44:49 +01:00
|
|
|
ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
|
|
|
|
pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
|
|
|
|
sizeof(sctp_timeouts));
|
2012-04-26 09:45:35 +02:00
|
|
|
if (!pd->timeout_table)
|
|
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
2011-01-03 14:44:49 +01:00
|
|
|
}
|
2010-02-18 12:31:05 +01:00
|
|
|
|
2011-01-03 14:44:49 +01:00
|
|
|
static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
|
2010-02-18 12:31:05 +01:00
|
|
|
{
|
2011-01-03 14:44:49 +01:00
|
|
|
kfree(pd->timeout_table);
|
2010-02-18 12:31:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
struct ip_vs_protocol ip_vs_protocol_sctp = {
|
2011-01-03 14:44:49 +01:00
|
|
|
.name = "SCTP",
|
|
|
|
.protocol = IPPROTO_SCTP,
|
|
|
|
.num_states = IP_VS_SCTP_S_LAST,
|
|
|
|
.dont_defrag = 0,
|
|
|
|
.init = NULL,
|
|
|
|
.exit = NULL,
|
|
|
|
.init_netns = __ip_vs_sctp_init,
|
|
|
|
.exit_netns = __ip_vs_sctp_exit,
|
|
|
|
.register_app = sctp_register_app,
|
2010-02-18 12:31:05 +01:00
|
|
|
.unregister_app = sctp_unregister_app,
|
2011-01-03 14:44:49 +01:00
|
|
|
.conn_schedule = sctp_conn_schedule,
|
|
|
|
.conn_in_get = ip_vs_conn_in_get_proto,
|
|
|
|
.conn_out_get = ip_vs_conn_out_get_proto,
|
|
|
|
.snat_handler = sctp_snat_handler,
|
|
|
|
.dnat_handler = sctp_dnat_handler,
|
|
|
|
.csum_check = sctp_csum_check,
|
|
|
|
.state_name = sctp_state_name,
|
2010-02-18 12:31:05 +01:00
|
|
|
.state_transition = sctp_state_transition,
|
2011-01-03 14:44:49 +01:00
|
|
|
.app_conn_bind = sctp_app_conn_bind,
|
|
|
|
.debug_packet = ip_vs_tcpudp_debug_packet,
|
|
|
|
.timeout_change = NULL,
|
2010-02-18 12:31:05 +01:00
|
|
|
};
|