85f7e7508a
congestion control algorithms that do not halve cwnd in their .ssthresh should provide a .cwnd_undo rather than rely on current fallback which assumes reno halving (and thus doubles the cwnd). All of these do 'something else' in their .ssthresh implementation, thus store the cwnd on loss and provide .undo_cwnd to restore it again. A followup patch will remove the fallback and all algorithms will need to provide a .cwnd_undo function. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>
367 lines
8.5 KiB
C
367 lines
8.5 KiB
C
/*
|
|
* TCP Illinois congestion control.
|
|
* Home page:
|
|
* http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
|
|
*
|
|
* The algorithm is described in:
|
|
* "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
|
|
* for High-Speed Networks"
|
|
* http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf
|
|
*
|
|
* Implemented from description in paper and ns-2 simulation.
|
|
* Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/inet_diag.h>
|
|
#include <asm/div64.h>
|
|
#include <net/tcp.h>
|
|
|
|
#define ALPHA_SHIFT 7
|
|
#define ALPHA_SCALE (1u<<ALPHA_SHIFT)
|
|
#define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */
|
|
#define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */
|
|
#define ALPHA_BASE ALPHA_SCALE /* 1.0 */
|
|
#define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */
|
|
|
|
#define BETA_SHIFT 6
|
|
#define BETA_SCALE (1u<<BETA_SHIFT)
|
|
#define BETA_MIN (BETA_SCALE/8) /* 0.125 */
|
|
#define BETA_MAX (BETA_SCALE/2) /* 0.5 */
|
|
#define BETA_BASE BETA_MAX
|
|
|
|
static int win_thresh __read_mostly = 15;
|
|
module_param(win_thresh, int, 0);
|
|
MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
|
|
|
|
static int theta __read_mostly = 5;
|
|
module_param(theta, int, 0);
|
|
MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
|
|
|
|
/* TCP Illinois Parameters */
|
|
struct illinois {
|
|
u64 sum_rtt; /* sum of rtt's measured within last rtt */
|
|
u16 cnt_rtt; /* # of rtts measured within last rtt */
|
|
u32 base_rtt; /* min of all rtt in usec */
|
|
u32 max_rtt; /* max of all rtt in usec */
|
|
u32 end_seq; /* right edge of current RTT */
|
|
u32 alpha; /* Additive increase */
|
|
u32 beta; /* Muliplicative decrease */
|
|
u32 loss_cwnd; /* cwnd on loss */
|
|
u16 acked; /* # packets acked by current ACK */
|
|
u8 rtt_above; /* average rtt has gone above threshold */
|
|
u8 rtt_low; /* # of rtts measurements below threshold */
|
|
};
|
|
|
|
static void rtt_reset(struct sock *sk)
|
|
{
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
struct illinois *ca = inet_csk_ca(sk);
|
|
|
|
ca->end_seq = tp->snd_nxt;
|
|
ca->cnt_rtt = 0;
|
|
ca->sum_rtt = 0;
|
|
|
|
/* TODO: age max_rtt? */
|
|
}
|
|
|
|
static void tcp_illinois_init(struct sock *sk)
|
|
{
|
|
struct illinois *ca = inet_csk_ca(sk);
|
|
|
|
ca->alpha = ALPHA_MAX;
|
|
ca->beta = BETA_BASE;
|
|
ca->base_rtt = 0x7fffffff;
|
|
ca->max_rtt = 0;
|
|
|
|
ca->acked = 0;
|
|
ca->rtt_low = 0;
|
|
ca->rtt_above = 0;
|
|
|
|
rtt_reset(sk);
|
|
}
|
|
|
|
/* Measure RTT for each ack. */
|
|
static void tcp_illinois_acked(struct sock *sk, const struct ack_sample *sample)
|
|
{
|
|
struct illinois *ca = inet_csk_ca(sk);
|
|
s32 rtt_us = sample->rtt_us;
|
|
|
|
ca->acked = sample->pkts_acked;
|
|
|
|
/* dup ack, no rtt sample */
|
|
if (rtt_us < 0)
|
|
return;
|
|
|
|
/* ignore bogus values, this prevents wraparound in alpha math */
|
|
if (rtt_us > RTT_MAX)
|
|
rtt_us = RTT_MAX;
|
|
|
|
/* keep track of minimum RTT seen so far */
|
|
if (ca->base_rtt > rtt_us)
|
|
ca->base_rtt = rtt_us;
|
|
|
|
/* and max */
|
|
if (ca->max_rtt < rtt_us)
|
|
ca->max_rtt = rtt_us;
|
|
|
|
++ca->cnt_rtt;
|
|
ca->sum_rtt += rtt_us;
|
|
}
|
|
|
|
/* Maximum queuing delay */
|
|
static inline u32 max_delay(const struct illinois *ca)
|
|
{
|
|
return ca->max_rtt - ca->base_rtt;
|
|
}
|
|
|
|
/* Average queuing delay */
|
|
static inline u32 avg_delay(const struct illinois *ca)
|
|
{
|
|
u64 t = ca->sum_rtt;
|
|
|
|
do_div(t, ca->cnt_rtt);
|
|
return t - ca->base_rtt;
|
|
}
|
|
|
|
/*
|
|
* Compute value of alpha used for additive increase.
|
|
* If small window then use 1.0, equivalent to Reno.
|
|
*
|
|
* For larger windows, adjust based on average delay.
|
|
* A. If average delay is at minimum (we are uncongested),
|
|
* then use large alpha (10.0) to increase faster.
|
|
* B. If average delay is at maximum (getting congested)
|
|
* then use small alpha (0.3)
|
|
*
|
|
* The result is a convex window growth curve.
|
|
*/
|
|
static u32 alpha(struct illinois *ca, u32 da, u32 dm)
|
|
{
|
|
u32 d1 = dm / 100; /* Low threshold */
|
|
|
|
if (da <= d1) {
|
|
/* If never got out of low delay zone, then use max */
|
|
if (!ca->rtt_above)
|
|
return ALPHA_MAX;
|
|
|
|
/* Wait for 5 good RTT's before allowing alpha to go alpha max.
|
|
* This prevents one good RTT from causing sudden window increase.
|
|
*/
|
|
if (++ca->rtt_low < theta)
|
|
return ca->alpha;
|
|
|
|
ca->rtt_low = 0;
|
|
ca->rtt_above = 0;
|
|
return ALPHA_MAX;
|
|
}
|
|
|
|
ca->rtt_above = 1;
|
|
|
|
/*
|
|
* Based on:
|
|
*
|
|
* (dm - d1) amin amax
|
|
* k1 = -------------------
|
|
* amax - amin
|
|
*
|
|
* (dm - d1) amin
|
|
* k2 = ---------------- - d1
|
|
* amax - amin
|
|
*
|
|
* k1
|
|
* alpha = ----------
|
|
* k2 + da
|
|
*/
|
|
|
|
dm -= d1;
|
|
da -= d1;
|
|
return (dm * ALPHA_MAX) /
|
|
(dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
|
|
}
|
|
|
|
/*
|
|
* Beta used for multiplicative decrease.
|
|
* For small window sizes returns same value as Reno (0.5)
|
|
*
|
|
* If delay is small (10% of max) then beta = 1/8
|
|
* If delay is up to 80% of max then beta = 1/2
|
|
* In between is a linear function
|
|
*/
|
|
static u32 beta(u32 da, u32 dm)
|
|
{
|
|
u32 d2, d3;
|
|
|
|
d2 = dm / 10;
|
|
if (da <= d2)
|
|
return BETA_MIN;
|
|
|
|
d3 = (8 * dm) / 10;
|
|
if (da >= d3 || d3 <= d2)
|
|
return BETA_MAX;
|
|
|
|
/*
|
|
* Based on:
|
|
*
|
|
* bmin d3 - bmax d2
|
|
* k3 = -------------------
|
|
* d3 - d2
|
|
*
|
|
* bmax - bmin
|
|
* k4 = -------------
|
|
* d3 - d2
|
|
*
|
|
* b = k3 + k4 da
|
|
*/
|
|
return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
|
|
/ (d3 - d2);
|
|
}
|
|
|
|
/* Update alpha and beta values once per RTT */
|
|
static void update_params(struct sock *sk)
|
|
{
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
struct illinois *ca = inet_csk_ca(sk);
|
|
|
|
if (tp->snd_cwnd < win_thresh) {
|
|
ca->alpha = ALPHA_BASE;
|
|
ca->beta = BETA_BASE;
|
|
} else if (ca->cnt_rtt > 0) {
|
|
u32 dm = max_delay(ca);
|
|
u32 da = avg_delay(ca);
|
|
|
|
ca->alpha = alpha(ca, da, dm);
|
|
ca->beta = beta(da, dm);
|
|
}
|
|
|
|
rtt_reset(sk);
|
|
}
|
|
|
|
/*
|
|
* In case of loss, reset to default values
|
|
*/
|
|
static void tcp_illinois_state(struct sock *sk, u8 new_state)
|
|
{
|
|
struct illinois *ca = inet_csk_ca(sk);
|
|
|
|
if (new_state == TCP_CA_Loss) {
|
|
ca->alpha = ALPHA_BASE;
|
|
ca->beta = BETA_BASE;
|
|
ca->rtt_low = 0;
|
|
ca->rtt_above = 0;
|
|
rtt_reset(sk);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Increase window in response to successful acknowledgment.
|
|
*/
|
|
static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
|
|
{
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
struct illinois *ca = inet_csk_ca(sk);
|
|
|
|
if (after(ack, ca->end_seq))
|
|
update_params(sk);
|
|
|
|
/* RFC2861 only increase cwnd if fully utilized */
|
|
if (!tcp_is_cwnd_limited(sk))
|
|
return;
|
|
|
|
/* In slow start */
|
|
if (tcp_in_slow_start(tp))
|
|
tcp_slow_start(tp, acked);
|
|
|
|
else {
|
|
u32 delta;
|
|
|
|
/* snd_cwnd_cnt is # of packets since last cwnd increment */
|
|
tp->snd_cwnd_cnt += ca->acked;
|
|
ca->acked = 1;
|
|
|
|
/* This is close approximation of:
|
|
* tp->snd_cwnd += alpha/tp->snd_cwnd
|
|
*/
|
|
delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
|
|
if (delta >= tp->snd_cwnd) {
|
|
tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
|
|
(u32)tp->snd_cwnd_clamp);
|
|
tp->snd_cwnd_cnt = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
static u32 tcp_illinois_ssthresh(struct sock *sk)
|
|
{
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
struct illinois *ca = inet_csk_ca(sk);
|
|
|
|
ca->loss_cwnd = tp->snd_cwnd;
|
|
/* Multiplicative decrease */
|
|
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
|
|
}
|
|
|
|
static u32 tcp_illinois_cwnd_undo(struct sock *sk)
|
|
{
|
|
const struct illinois *ca = inet_csk_ca(sk);
|
|
|
|
return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
|
|
}
|
|
|
|
/* Extract info for Tcp socket info provided via netlink. */
|
|
static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
|
|
union tcp_cc_info *info)
|
|
{
|
|
const struct illinois *ca = inet_csk_ca(sk);
|
|
|
|
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
|
|
info->vegas.tcpv_enabled = 1;
|
|
info->vegas.tcpv_rttcnt = ca->cnt_rtt;
|
|
info->vegas.tcpv_minrtt = ca->base_rtt;
|
|
info->vegas.tcpv_rtt = 0;
|
|
|
|
if (info->vegas.tcpv_rttcnt > 0) {
|
|
u64 t = ca->sum_rtt;
|
|
|
|
do_div(t, info->vegas.tcpv_rttcnt);
|
|
info->vegas.tcpv_rtt = t;
|
|
}
|
|
*attr = INET_DIAG_VEGASINFO;
|
|
return sizeof(struct tcpvegas_info);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static struct tcp_congestion_ops tcp_illinois __read_mostly = {
|
|
.init = tcp_illinois_init,
|
|
.ssthresh = tcp_illinois_ssthresh,
|
|
.undo_cwnd = tcp_illinois_cwnd_undo,
|
|
.cong_avoid = tcp_illinois_cong_avoid,
|
|
.set_state = tcp_illinois_state,
|
|
.get_info = tcp_illinois_info,
|
|
.pkts_acked = tcp_illinois_acked,
|
|
|
|
.owner = THIS_MODULE,
|
|
.name = "illinois",
|
|
};
|
|
|
|
static int __init tcp_illinois_register(void)
|
|
{
|
|
BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
|
|
return tcp_register_congestion_control(&tcp_illinois);
|
|
}
|
|
|
|
static void __exit tcp_illinois_unregister(void)
|
|
{
|
|
tcp_unregister_congestion_control(&tcp_illinois);
|
|
}
|
|
|
|
module_init(tcp_illinois_register);
|
|
module_exit(tcp_illinois_unregister);
|
|
|
|
MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_DESCRIPTION("TCP Illinois");
|
|
MODULE_VERSION("1.0");
|