diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0a5d97c20aa9..e13d77857225 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -839,26 +839,38 @@ void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); + int wmem; + + /* Keep one reference on sk_wmem_alloc. + * Will be released by sk_free() from here or tcp_tasklet_func() + */ + wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); + + /* If this softirq is serviced by ksoftirqd, we are likely under stress. + * Wait until our queues (qdisc + devices) are drained. + * This gives : + * - less callbacks to tcp_write_xmit(), reducing stress (batches) + * - chance for incoming ACK (processed by another cpu maybe) + * to migrate this flow (skb->ooo_okay will be eventually set) + */ + if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) + goto out; if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { unsigned long flags; struct tsq_tasklet *tsq; - /* Keep a ref on socket. - * This last ref will be released in tcp_tasklet_func() - */ - atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc); - /* queue this socket to tasklet queue */ local_irq_save(flags); tsq = &__get_cpu_var(tsq_tasklet); list_add(&tp->tsq_node, &tsq->head); tasklet_schedule(&tsq->tasklet); local_irq_restore(flags); - } else { - sock_wfree(skb); + return; } +out: + sk_free(sk); } /* This routine actually transmits TCP packets queued in by