diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 02ff2dde9609..a5786e3e2c16 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -128,6 +128,9 @@ static const u32 bbr_probe_rtt_mode_ms = 200; /* Skip TSO below the following bandwidth (bits/sec): */ static const int bbr_min_tso_rate = 1200000; +/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. */ +static const int bbr_pacing_marging_percent = 1; + /* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain * that will allow a smoothly increasing pacing rate that will double each RTT * and send the same number of packets per RTT that an un-paced, slow-starting @@ -208,12 +211,10 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) { unsigned int mss = tcp_sk(sk)->mss_cache; - if (!tcp_needs_internal_pacing(sk)) - mss = tcp_mss_to_mtu(sk, mss); rate *= mss; rate *= gain; rate >>= BBR_SCALE; - rate *= USEC_PER_SEC; + rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_marging_percent); return rate >> BW_SCALE; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a87068fa9b1a..2adb719e97b8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1012,9 +1012,23 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) sock_hold(sk); } -static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) +static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + skb->skb_mstamp_ns = tp->tcp_wstamp_ns; + if (sk->sk_pacing_status != SK_PACING_NONE) { + u32 rate = sk->sk_pacing_rate; + + /* Original sch_fq does not pace first 10 MSS + * Note that tp->data_segs_out overflows after 2^32 packets, + * this is a minor annoyance. + */ + if (rate != ~0U && rate && tp->data_segs_out >= 10) { + tp->tcp_wstamp_ns += div_u64((u64)skb->len * NSEC_PER_SEC, rate); + /* TODO: update internal pacing here */ + } + } list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); } @@ -1178,7 +1192,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, err = net_xmit_eval(err); } if (!err && oskb) { - tcp_update_skb_after_send(tp, oskb); + tcp_update_skb_after_send(sk, oskb); tcp_rate_skb_sent(sk, oskb); } return err; @@ -2327,7 +2341,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp" is used as a start point for the retransmit timer */ - tcp_update_skb_after_send(tp, skb); + tcp_update_skb_after_send(sk, skb); goto repair; /* Skip network transmission */ } @@ -2902,7 +2916,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) } tcp_skb_tsorted_restore(skb); if (!err) { - tcp_update_skb_after_send(tp, skb); + tcp_update_skb_after_send(sk, skb); tcp_rate_skb_sent(sk, skb); } } else { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index d5185c44e9a5..77692ad6741d 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -491,11 +491,16 @@ begin: } skb = f->head; - if (unlikely(skb && now < f->time_next_packet && - !skb_is_tcp_pure_ack(skb))) { - head->first = f->next; - fq_flow_set_throttled(q, f); - goto begin; + if (skb && !skb_is_tcp_pure_ack(skb)) { + u64 time_next_packet = max_t(u64, ktime_to_ns(skb->tstamp), + f->time_next_packet); + + if (now < time_next_packet) { + head->first = f->next; + f->time_next_packet = time_next_packet; + fq_flow_set_throttled(q, f); + goto begin; + } } skb = fq_dequeue_head(sch, f); @@ -513,11 +518,7 @@ begin: prefetch(&skb->end); f->credit -= qdisc_pkt_len(skb); - if (!q->rate_enable) - goto out; - - /* Do not pace locally generated ack packets */ - if (skb_is_tcp_pure_ack(skb)) + if (ktime_to_ns(skb->tstamp) || !q->rate_enable) goto out; rate = q->flow_max_rate;