From afe00251dd9b53d51de91ff0099961f42bbf3754 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Mon, 20 Mar 2006 17:43:56 -0800 Subject: [PATCH] [DCCP]: Initial feature negotiation implementation Still needs more work, but boots and doesn't crashes, even does some negotiation! 18:38:52.174934 127.0.0.1.43458 > 127.0.0.1.5001: request 18:38:52.218526 127.0.0.1.5001 > 127.0.0.1.43458: response 18:38:52.185398 127.0.0.1.43458 > 127.0.0.1.5001: :-) Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 35 ++- net/dccp/Makefile | 2 +- net/dccp/feat.c | 554 +++++++++++++++++++++++++++++++++++++++++++ net/dccp/feat.h | 28 +++ net/dccp/input.c | 3 + net/dccp/ipv4.c | 19 +- net/dccp/minisocks.c | 4 + net/dccp/options.c | 139 ++++++++++- net/dccp/output.c | 4 + net/dccp/proto.c | 53 +++++ net/dccp/timer.c | 12 + 11 files changed, 847 insertions(+), 6 deletions(-) create mode 100644 net/dccp/feat.c create mode 100644 net/dccp/feat.h diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 268b4579d7e5..f91c8a62406d 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -154,6 +154,10 @@ enum { DCCPO_MANDATORY = 1, DCCPO_MIN_RESERVED = 3, DCCPO_MAX_RESERVED = 31, + DCCPO_CHANGE_L = 32, + DCCPO_CONFIRM_L = 33, + DCCPO_CHANGE_R = 34, + DCCPO_CONFIRM_R = 35, DCCPO_NDP_COUNT = 37, DCCPO_ACK_VECTOR_0 = 38, DCCPO_ACK_VECTOR_1 = 39, @@ -168,7 +172,9 @@ enum { /* DCCP features */ enum { DCCPF_RESERVED = 0, + DCCPF_CCID = 1, DCCPF_SEQUENCE_WINDOW = 3, + DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, /* 10-127 reserved */ @@ -176,9 +182,18 @@ enum { DCCPF_MAX_CCID_SPECIFIC = 255, }; +/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ +struct dccp_so_feat { + __u8 dccpsf_feat; + __u8 *dccpsf_val; + __u8 dccpsf_len; +}; + /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 #define DCCP_SOCKOPT_SERVICE 2 +#define DCCP_SOCKOPT_CHANGE_L 3 +#define DCCP_SOCKOPT_CHANGE_R 4 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 @@ -314,8 +329,8 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) /* initial values for each feature */ #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 -/* FIXME: for now we're using CCID 2 (TCP-Like) */ #define DCCPF_INITIAL_CCID 2 +#define DCCPF_INITIAL_ACK_RATIO 2 #define DCCPF_INITIAL_SEND_ACK_VECTOR 1 /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 @@ -335,6 +350,24 @@ struct dccp_options { __u8 dccpo_tx_ccid; __u8 dccpo_send_ack_vector; __u8 dccpo_send_ndp_count; + __u8 dccpo_ack_ratio; + struct list_head dccpo_pending; + struct list_head dccpo_conf; +}; + +struct dccp_opt_conf { + __u8 *dccpoc_val; + __u8 dccpoc_len; +}; + +struct dccp_opt_pend { + struct list_head dccpop_node; + __u8 dccpop_type; + __u8 dccpop_feat; + __u8 *dccpop_val; + __u8 dccpop_len; + int dccpop_conf; + struct dccp_opt_conf *dccpop_sc; }; extern void __dccp_options_init(struct dccp_options *dccpo); diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 87b27fff6e3b..5736acea1c86 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -4,7 +4,7 @@ dccp_ipv6-y := ipv6.o obj-$(CONFIG_IP_DCCP) += dccp.o -dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ +dccp-y := ccid.o feat.o input.o ipv4.o minisocks.o options.o output.o proto.o \ timer.o dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o diff --git a/net/dccp/feat.c b/net/dccp/feat.c new file mode 100644 index 000000000000..99d7b7f9efa9 --- /dev/null +++ b/net/dccp/feat.c @@ -0,0 +1,554 @@ +/* + * net/dccp/feat.c + * + * An implementation of the DCCP protocol + * Andrea Bittau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +#include "dccp.h" +#include "feat.h" + +#define DCCP_FEAT_SP_NOAGREE (-123) + +int dccp_feat_change(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len, + gfp_t gfp) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_opt_pend *opt; + + dccp_pr_debug("feat change type=%d feat=%d\n", type, feature); + + /* check if that feature is already being negotiated */ + list_for_each_entry(opt, &dp->dccps_options.dccpo_pending, + dccpop_node) { + /* ok we found a negotiation for this option already */ + if (opt->dccpop_feat == feature && opt->dccpop_type == type) { + dccp_pr_debug("Replacing old\n"); + /* replace */ + BUG_ON(opt->dccpop_val == NULL); + kfree(opt->dccpop_val); + opt->dccpop_val = val; + opt->dccpop_len = len; + opt->dccpop_conf = 0; + return 0; + } + } + + /* negotiation for a new feature */ + opt = kmalloc(sizeof(*opt), gfp); + if (opt == NULL) + return -ENOMEM; + + opt->dccpop_type = type; + opt->dccpop_feat = feature; + opt->dccpop_len = len; + opt->dccpop_val = val; + opt->dccpop_conf = 0; + opt->dccpop_sc = NULL; + + BUG_ON(opt->dccpop_val == NULL); + + list_add_tail(&opt->dccpop_node, &dp->dccps_options.dccpo_pending); + return 0; +} + +EXPORT_SYMBOL_GPL(dccp_feat_change); + +/* XXX taking only u8 vals */ +static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val) +{ + /* FIXME implement */ + dccp_pr_debug("changing [%d] feat %d to %d\n", type, feat, val); + return 0; +} + +static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt, + u8 *rpref, u8 rlen) +{ + struct dccp_sock *dp = dccp_sk(sk); + u8 *spref, slen, *res = NULL; + int i, j, rc, agree = 1; + + BUG_ON(rpref == NULL); + + /* check if we are the black sheep */ + if (dp->dccps_role == DCCP_ROLE_CLIENT) { + spref = rpref; + slen = rlen; + rpref = opt->dccpop_val; + rlen = opt->dccpop_len; + } else { + spref = opt->dccpop_val; + slen = opt->dccpop_len; + } + /* + * Now we have server preference list in spref and client preference in + * rpref + */ + BUG_ON(spref == NULL); + BUG_ON(rpref == NULL); + + /* FIXME sanity check vals */ + + /* Are values in any order? XXX Lame "algorithm" here */ + /* XXX assume values are 1 byte */ + for (i = 0; i < slen; i++) { + for (j = 0; j < rlen; j++) { + if (spref[i] == rpref[j]) { + res = &spref[i]; + break; + } + } + if (res) + break; + } + + /* we didn't agree on anything */ + if (res == NULL) { + /* confirm previous value */ + switch (opt->dccpop_feat) { + case DCCPF_CCID: + /* XXX did i get this right? =P */ + if (opt->dccpop_type == DCCPO_CHANGE_L) + res = &dp->dccps_options.dccpo_tx_ccid; + else + res = &dp->dccps_options.dccpo_rx_ccid; + break; + + default: + WARN_ON(1); /* XXX implement res */ + return -EFAULT; + } + + dccp_pr_debug("Don't agree... reconfirming %d\n", *res); + agree = 0; /* this is used for mandatory options... */ + } + + /* need to put result and our preference list */ + /* XXX assume 1 byte vals */ + rlen = 1 + opt->dccpop_len; + rpref = kmalloc(rlen, GFP_ATOMIC); + if (rpref == NULL) + return -ENOMEM; + + *rpref = *res; + memcpy(&rpref[1], opt->dccpop_val, opt->dccpop_len); + + /* put it in the "confirm queue" */ + if (opt->dccpop_sc == NULL) { + opt->dccpop_sc = kmalloc(sizeof(*opt->dccpop_sc), GFP_ATOMIC); + if (opt->dccpop_sc == NULL) { + kfree(rpref); + return -ENOMEM; + } + } else { + /* recycle the confirm slot */ + BUG_ON(opt->dccpop_sc->dccpoc_val == NULL); + kfree(opt->dccpop_sc->dccpoc_val); + dccp_pr_debug("recycling confirm slot\n"); + } + memset(opt->dccpop_sc, 0, sizeof(*opt->dccpop_sc)); + + opt->dccpop_sc->dccpoc_val = rpref; + opt->dccpop_sc->dccpoc_len = rlen; + + /* update the option on our side [we are about to send the confirm] */ + rc = dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *res); + if (rc) { + kfree(opt->dccpop_sc->dccpoc_val); + kfree(opt->dccpop_sc); + opt->dccpop_sc = 0; + return rc; + } + + dccp_pr_debug("Will confirm %d\n", *rpref); + + /* say we want to change to X but we just got a confirm X, suppress our + * change + */ + if (!opt->dccpop_conf) { + if (*opt->dccpop_val == *res) + opt->dccpop_conf = 1; + dccp_pr_debug("won't ask for change of same feature\n"); + } + + return agree ? 0 : DCCP_FEAT_SP_NOAGREE; /* used for mandatory opts */ +} + +static int dccp_feat_sp(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_opt_pend *opt; + int rc = 1; + u8 t; + + /* + * We received a CHANGE. We gotta match it against our own preference + * list. If we got a CHANGE_R it means it's a change for us, so we need + * to compare our CHANGE_L list. + */ + if (type == DCCPO_CHANGE_L) + t = DCCPO_CHANGE_R; + else + t = DCCPO_CHANGE_L; + + /* find our preference list for this feature */ + list_for_each_entry(opt, &dp->dccps_options.dccpo_pending, + dccpop_node) { + if (opt->dccpop_type != t || opt->dccpop_feat != feature) + continue; + + /* find the winner from the two preference lists */ + rc = dccp_feat_reconcile(sk, opt, val, len); + break; + } + + /* We didn't deal with the change. This can happen if we have no + * preference list for the feature. In fact, it just shouldn't + * happen---if we understand a feature, we should have a preference list + * with at least the default value. + */ + BUG_ON(rc == 1); + + return rc; +} + +static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) +{ + struct dccp_opt_pend *opt; + struct dccp_sock *dp = dccp_sk(sk); + u8 *copy; + int rc; + + /* NN features must be change L */ + if (type == DCCPO_CHANGE_R) { + dccp_pr_debug("received CHANGE_R %d for NN feat %d\n", + type, feature); + return -EFAULT; + } + + /* XXX sanity check opt val */ + + /* copy option so we can confirm it */ + opt = kzalloc(sizeof(*opt), GFP_ATOMIC); + if (opt == NULL) + return -ENOMEM; + + copy = kmalloc(len, GFP_ATOMIC); + if (copy == NULL) { + kfree(opt); + return -ENOMEM; + } + memcpy(copy, val, len); + + opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */ + opt->dccpop_feat = feature; + opt->dccpop_val = copy; + opt->dccpop_len = len; + + /* change feature */ + rc = dccp_feat_update(sk, type, feature, *val); + if (rc) { + kfree(opt->dccpop_val); + kfree(opt); + return rc; + } + + dccp_pr_debug("Confirming NN feature %d (val=%d)\n", feature, *copy); + list_add_tail(&opt->dccpop_node, &dp->dccps_options.dccpo_conf); + + return 0; +} + +static void dccp_feat_empty_confirm(struct sock *sk, u8 type, u8 feature) +{ + struct dccp_sock *dp = dccp_sk(sk); + /* XXX check if other confirms for that are queued and recycle slot */ + struct dccp_opt_pend *opt = kzalloc(sizeof(*opt), GFP_ATOMIC); + + if (opt == NULL) { + /* XXX what do we do? Ignoring should be fine. It's a change + * after all =P + */ + return; + } + + opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R : + DCCPO_CONFIRM_L; + opt->dccpop_feat = feature; + opt->dccpop_val = 0; + opt->dccpop_len = 0; + + /* change feature */ + dccp_pr_debug("Empty confirm feature %d type %d\n", feature, type); + list_add_tail(&opt->dccpop_node, &dp->dccps_options.dccpo_conf); +} + +static void dccp_feat_flush_confirm(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + /* Check if there is anything to confirm in the first place */ + int yes = !list_empty(&dp->dccps_options.dccpo_conf); + + if (!yes) { + struct dccp_opt_pend *opt; + + list_for_each_entry(opt, &dp->dccps_options.dccpo_pending, + dccpop_node) { + if (opt->dccpop_conf) { + yes = 1; + break; + } + } + } + + if (!yes) + return; + + /* OK there is something to confirm... */ + /* XXX check if packet is in flight? Send delayed ack?? */ + if (sk->sk_state == DCCP_OPEN) + dccp_send_ack(sk); +} + +int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) +{ + int rc; + + dccp_pr_debug("got feat change type=%d feat=%d\n", type, feature); + + /* figure out if it's SP or NN feature */ + switch (feature) { + /* deal with SP features */ + case DCCPF_CCID: + rc = dccp_feat_sp(sk, type, feature, val, len); + break; + + /* deal with NN features */ + case DCCPF_ACK_RATIO: + rc = dccp_feat_nn(sk, type, feature, val, len); + break; + + /* XXX implement other features */ + default: + rc = -EFAULT; + break; + } + + /* check if there were problems changing features */ + if (rc) { + /* If we don't agree on SP, we sent a confirm for old value. + * However we propagate rc to caller in case option was + * mandatory + */ + if (rc != DCCP_FEAT_SP_NOAGREE) + dccp_feat_empty_confirm(sk, type, feature); + } + + /* generate the confirm [if required] */ + dccp_feat_flush_confirm(sk); + + return rc; +} + +EXPORT_SYMBOL_GPL(dccp_feat_change_recv); + +int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, + u8 *val, u8 len) +{ + u8 t; + struct dccp_opt_pend *opt; + struct dccp_sock *dp = dccp_sk(sk); + int rc = 1; + int all_confirmed = 1; + + dccp_pr_debug("got feat confirm type=%d feat=%d\n", type, feature); + + /* XXX sanity check type & feat */ + + /* locate our change request */ + t = type == DCCPO_CONFIRM_L ? DCCPO_CHANGE_R : DCCPO_CHANGE_L; + + list_for_each_entry(opt, &dp->dccps_options.dccpo_pending, + dccpop_node) { + if (!opt->dccpop_conf && opt->dccpop_type == t && + opt->dccpop_feat == feature) { + /* we found it */ + /* XXX do sanity check */ + + opt->dccpop_conf = 1; + + /* We got a confirmation---change the option */ + dccp_feat_update(sk, opt->dccpop_type, + opt->dccpop_feat, *val); + + dccp_pr_debug("feat %d type %d confirmed %d\n", + feature, type, *val); + rc = 0; + break; + } + + if (!opt->dccpop_conf) + all_confirmed = 0; + } + + /* fix re-transmit timer */ + /* XXX gotta make sure that no option negotiation occurs during + * connection shutdown. Consider that the CLOSEREQ is sent and timer is + * on. if all options are confirmed it might kill timer which should + * remain alive until close is received. + */ + if (all_confirmed) { + dccp_pr_debug("clear feat negotiation timer %p\n", sk); + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); + } + + if (rc) + dccp_pr_debug("feat %d type %d never requested\n", + feature, type); + return 0; +} + +EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv); + +void dccp_feat_clean(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_opt_pend *opt, *next; + + list_for_each_entry_safe(opt, next, &dp->dccps_options.dccpo_pending, + dccpop_node) { + BUG_ON(opt->dccpop_val == NULL); + kfree(opt->dccpop_val); + + if (opt->dccpop_sc != NULL) { + BUG_ON(opt->dccpop_sc->dccpoc_val == NULL); + kfree(opt->dccpop_sc->dccpoc_val); + kfree(opt->dccpop_sc); + } + + kfree(opt); + } + INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending); + + list_for_each_entry_safe(opt, next, &dp->dccps_options.dccpo_conf, + dccpop_node) { + BUG_ON(opt == NULL); + if (opt->dccpop_val != NULL) + kfree(opt->dccpop_val); + kfree(opt); + } + INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf); +} + +EXPORT_SYMBOL_GPL(dccp_feat_clean); + +/* this is to be called only when a listening sock creates its child. It is + * assumed by the function---the confirm is not duplicated, but rather it is + * "passed on". + */ +int dccp_feat_clone(struct sock *oldsk, struct sock *newsk) +{ + struct dccp_sock *olddp = dccp_sk(oldsk); + struct dccp_sock *newdp = dccp_sk(newsk); + struct dccp_opt_pend *opt; + int rc = 0; + + INIT_LIST_HEAD(&newdp->dccps_options.dccpo_pending); + INIT_LIST_HEAD(&newdp->dccps_options.dccpo_conf); + + list_for_each_entry(opt, &olddp->dccps_options.dccpo_pending, + dccpop_node) { + struct dccp_opt_pend *newopt; + /* copy the value of the option */ + u8 *val = kmalloc(opt->dccpop_len, GFP_ATOMIC); + + if (val == NULL) + goto out_clean; + memcpy(val, opt->dccpop_val, opt->dccpop_len); + + newopt = kmalloc(sizeof(*newopt), GFP_ATOMIC); + if (newopt == NULL) { + kfree(val); + goto out_clean; + } + + /* insert the option */ + memcpy(newopt, opt, sizeof(*newopt)); + newopt->dccpop_val = val; + list_add_tail(&newopt->dccpop_node, + &newdp->dccps_options.dccpo_pending); + + /* XXX what happens with backlogs and multiple connections at + * once... + */ + /* the master socket no longer needs to worry about confirms */ + opt->dccpop_sc = 0; /* it's not a memleak---new socket has it */ + + /* reset state for a new socket */ + opt->dccpop_conf = 0; + } + + /* XXX not doing anything about the conf queue */ + +out: + return rc; + +out_clean: + dccp_feat_clean(newsk); + rc = -ENOMEM; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_feat_clone); + +static int __dccp_feat_init(struct sock *sk, u8 type, u8 feat, u8 *val, u8 len) +{ + int rc = -ENOMEM; + u8 *copy = kmalloc(len, GFP_KERNEL); + + if (copy != NULL) { + memcpy(copy, val, len); + rc = dccp_feat_change(sk, type, feat, copy, len, GFP_KERNEL); + if (rc) + kfree(copy); + } + return rc; +} + +int dccp_feat_init(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + int rc; + + INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending); + INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf); + + /* CCID L */ + rc = __dccp_feat_init(sk, DCCPO_CHANGE_L, DCCPF_CCID, + &dp->dccps_options.dccpo_tx_ccid, 1); + if (rc) + goto out; + + /* CCID R */ + rc = __dccp_feat_init(sk, DCCPO_CHANGE_R, DCCPF_CCID, + &dp->dccps_options.dccpo_rx_ccid, 1); + if (rc) + goto out; + + /* Ack ratio */ + rc = __dccp_feat_init(sk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO, + &dp->dccps_options.dccpo_ack_ratio, 1); +out: + return rc; +} + +EXPORT_SYMBOL_GPL(dccp_feat_init); diff --git a/net/dccp/feat.h b/net/dccp/feat.h new file mode 100644 index 000000000000..67da06265f14 --- /dev/null +++ b/net/dccp/feat.h @@ -0,0 +1,28 @@ +#ifndef _DCCP_FEAT_H +#define _DCCP_FEAT_H +/* + * net/dccp/feat.h + * + * An implementation of the DCCP protocol + * Copyright (c) 2005 Andrea Bittau + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +struct sock; + +extern int dccp_feat_change(struct sock *sk, u8 type, u8 feature, + u8 *val, u8 len, gfp_t gfp); +extern int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, + u8 *val, u8 len); +extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, + u8 *val, u8 len); +extern void dccp_feat_clean(struct sock *sk); +extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); +extern int dccp_feat_init(struct sock *sk); + +#endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index b6cba72b44e8..4b6d43d8b920 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -300,6 +300,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, goto out_invalid_packet; } + if (dccp_parse_options(sk, skb)) + goto out_invalid_packet; + if (dp->dccps_options.dccpo_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 38321ad81875..fcfb486f90c2 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -28,6 +28,7 @@ #include "ackvec.h" #include "ccid.h" #include "dccp.h" +#include "feat.h" struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { .lhash_lock = RW_LOCK_UNLOCKED, @@ -535,7 +536,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - /* FIXME: process options */ + if (dccp_parse_options(sk, skb)) + goto drop; dccp_openreq_init(req, &dp, skb); @@ -1049,6 +1051,8 @@ int dccp_v4_init_sock(struct sock *sk) * setsockopt(CCIDs-I-want/accept). -acme */ if (likely(!dccp_ctl_socket_init)) { + int rc; + if (dp->dccps_options.dccpo_send_ack_vector) { dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL); if (dp->dccps_hc_rx_ackvec == NULL) @@ -1069,8 +1073,16 @@ int dccp_v4_init_sock(struct sock *sk) dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; return -ENOMEM; } - } else + + rc = dccp_feat_init(sk); + if (rc) + return rc; + } else { + /* control socket doesn't need feat nego */ + INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending); + INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf); dccp_ctl_socket_init = 0; + } dccp_init_xmit_timers(sk); icsk->icsk_rto = DCCP_TIMEOUT_INIT; @@ -1118,6 +1130,9 @@ int dccp_v4_destroy_sock(struct sock *sk) ccid_exit(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + /* clean up feature negotiation state */ + dccp_feat_clean(sk); + return 0; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index a60a3e948c36..9e1de5919ee5 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -22,6 +22,7 @@ #include "ackvec.h" #include "ccid.h" #include "dccp.h" +#include "feat.h" struct inet_timewait_death_row dccp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, @@ -114,6 +115,9 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newicsk->icsk_rto = DCCP_TIMEOUT_INIT; do_gettimeofday(&newdp->dccps_epoch); + if (dccp_feat_clone(sk, newsk)) + goto out_free; + if (newdp->dccps_options.dccpo_send_ack_vector) { newdp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_ATOMIC); diff --git a/net/dccp/options.c b/net/dccp/options.c index 0a76426c9aea..7f99306c8e99 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -21,12 +21,14 @@ #include "ackvec.h" #include "ccid.h" #include "dccp.h" +#include "feat.h" /* stores the default values for new connection. may be changed with sysctl */ static const struct dccp_options dccpo_default_values = { .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, .dccpo_rx_ccid = DCCPF_INITIAL_CCID, .dccpo_tx_ccid = DCCPF_INITIAL_CCID, + .dccpo_ack_ratio = DCCPF_INITIAL_ACK_RATIO, .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, }; @@ -69,6 +71,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) unsigned char opt, len; unsigned char *value; u32 elapsed_time; + int rc; + int mandatory = 0; memset(opt_recv, 0, sizeof(*opt_recv)); @@ -100,6 +104,11 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) switch (opt) { case DCCPO_PADDING: break; + case DCCPO_MANDATORY: + if (mandatory) + goto out_invalid_option; + mandatory = 1; + break; case DCCPO_NDP_COUNT: if (len > 3) goto out_invalid_option; @@ -108,6 +117,31 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp); break; + case DCCPO_CHANGE_L: + /* fall through */ + case DCCPO_CHANGE_R: + if (len < 2) + goto out_invalid_option; + rc = dccp_feat_change_recv(sk, opt, *value, value + 1, + len - 1); + /* + * When there is a change error, change_recv is + * responsible for dealing with it. i.e. reply with an + * empty confirm. + * If the change was mandatory, then we need to die. + */ + if (rc && mandatory) + goto out_invalid_option; + break; + case DCCPO_CONFIRM_L: + /* fall through */ + case DCCPO_CONFIRM_R: + if (len < 2) + goto out_invalid_option; + if (dccp_feat_confirm_recv(sk, opt, *value, + value + 1, len - 1)) + goto out_invalid_option; + break; case DCCPO_ACK_VECTOR_0: case DCCPO_ACK_VECTOR_1: if (pkt_type == DCCP_PKT_DATA) @@ -208,6 +242,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) sk, opt, len); break; } + + if (opt != DCCPO_MANDATORY) + mandatory = 0; } return 0; @@ -356,7 +393,7 @@ void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { struct timeval tv; u32 now; - + dccp_timestamp(sk, &tv); now = timeval_usecs(&tv) / 10; /* yes this will overflow but that is the point as we want a @@ -402,7 +439,7 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, tstamp_echo = htonl(dp->dccps_timestamp_echo); memcpy(to, &tstamp_echo, 4); to += 4; - + if (elapsed_time_len == 2) { const u16 var16 = htons((u16)elapsed_time); memcpy(to, &var16, 2); @@ -421,6 +458,93 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, dp->dccps_timestamp_time.tv_usec = 0; } +static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, + u8 *val, u8 len) +{ + u8 *to; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small" + " to insert feature %d option!\n", feat); + return -1; + } + + DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3; + + to = skb_push(skb, len + 3); + *to++ = type; + *to++ = len + 3; + *to++ = feat; + + if (len) + memcpy(to, val, len); + dccp_pr_debug("option %d feat %d len %d\n", type, feat, len); + + return 0; +} + +static void dccp_insert_feat(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_opt_pend *opt, *next; + int change = 0; + + /* confirm any options [NN opts] */ + list_for_each_entry_safe(opt, next, &dp->dccps_options.dccpo_conf, + dccpop_node) { + dccp_insert_feat_opt(skb, opt->dccpop_type, + opt->dccpop_feat, opt->dccpop_val, + opt->dccpop_len); + /* fear empty confirms */ + if (opt->dccpop_val) + kfree(opt->dccpop_val); + kfree(opt); + } + INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf); + + /* see which features we need to send */ + list_for_each_entry(opt, &dp->dccps_options.dccpo_pending, + dccpop_node) { + /* see if we need to send any confirm */ + if (opt->dccpop_sc) { + dccp_insert_feat_opt(skb, opt->dccpop_type + 1, + opt->dccpop_feat, + opt->dccpop_sc->dccpoc_val, + opt->dccpop_sc->dccpoc_len); + + BUG_ON(!opt->dccpop_sc->dccpoc_val); + kfree(opt->dccpop_sc->dccpoc_val); + kfree(opt->dccpop_sc); + opt->dccpop_sc = NULL; + } + + /* any option not confirmed, re-send it */ + if (!opt->dccpop_conf) { + dccp_insert_feat_opt(skb, opt->dccpop_type, + opt->dccpop_feat, opt->dccpop_val, + opt->dccpop_len); + change++; + } + } + + /* Retransmit timer. + * If this is the master listening sock, we don't set a timer on it. It + * should be fine because if the dude doesn't receive our RESPONSE + * [which will contain the CHANGE] he will send another REQUEST which + * will "retrnasmit" the change. + */ + if (change && dp->dccps_role != DCCP_ROLE_LISTEN) { + dccp_pr_debug("reset feat negotiation timer %p\n", sk); + + /* XXX don't reset the timer on re-transmissions. I.e. reset it + * only when sending new stuff i guess. Currently the timer + * never backs off because on re-transmission it just resets it! + */ + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); + } +} + void dccp_insert_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); @@ -447,6 +571,17 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) dp->dccps_hc_tx_insert_options = 0; } + /* Feature negotiation */ + switch(DCCP_SKB_CB(skb)->dccpd_type) { + /* Data packets can't do feat negotiation */ + case DCCP_PKT_DATA: + case DCCP_PKT_DATAACK: + break; + default: + dccp_insert_feat(sk, skb); + break; + } + /* XXX: insert other options when appropriate */ if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { diff --git a/net/dccp/output.c b/net/dccp/output.c index efd7ffb903a1..0cc2bcf56522 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -64,6 +64,10 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_DATAACK: break; + case DCCP_PKT_REQUEST: + set_ack = 0; + /* fall through */ + case DCCP_PKT_SYNC: case DCCP_PKT_SYNCACK: ackno = dcb->dccpd_seq; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 81ad24953710..1a8cf8ecfe63 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -37,6 +37,7 @@ #include "ccid.h" #include "dccp.h" +#include "feat.h" DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; @@ -255,6 +256,39 @@ static int dccp_setsockopt_service(struct sock *sk, const u32 service, return 0; } +/* byte 1 is feature. the rest is the preference list */ +static int dccp_setsockopt_change(struct sock *sk, int type, + struct dccp_so_feat __user *optval) +{ + struct dccp_so_feat opt; + u8 *val; + int rc; + + if (copy_from_user(&opt, optval, sizeof(opt))) + return -EFAULT; + + val = kmalloc(opt.dccpsf_len, GFP_KERNEL); + if (!val) + return -ENOMEM; + + if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) { + rc = -EFAULT; + goto out_free_val; + } + + rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len, + GFP_KERNEL); + if (rc) + goto out_free_val; + +out: + return rc; + +out_free_val: + kfree(val); + goto out; +} + int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -284,6 +318,25 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_PACKET_SIZE: dp->dccps_packet_size = val; break; + + case DCCP_SOCKOPT_CHANGE_L: + if (optlen != sizeof(struct dccp_so_feat)) + err = -EINVAL; + else + err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L, + (struct dccp_so_feat *) + optval); + break; + + case DCCP_SOCKOPT_CHANGE_R: + if (optlen != sizeof(struct dccp_so_feat)) + err = -EINVAL; + else + err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R, + (struct dccp_so_feat *) + optval); + break; + default: err = -ENOPROTOOPT; break; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index aa34b576e228..d7c786608ec6 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -141,6 +141,17 @@ static void dccp_retransmit_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + /* retransmit timer is used for feature negotiation throughout + * connection. In this case, no packet is re-transmitted, but rather an + * ack is generated and pending changes are splaced into its options. + */ + if (sk->sk_send_head == NULL) { + dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk); + if (sk->sk_state == DCCP_OPEN) + dccp_send_ack(sk); + goto backoff; + } + /* * sk->sk_send_head has to have one skb with * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP @@ -177,6 +188,7 @@ static void dccp_retransmit_timer(struct sock *sk) goto out; } +backoff: icsk->icsk_backoff++; icsk->icsk_retransmits++;