| From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST |
| From: Yuchung Cheng <ycheng@google.com> |
| Date: Thu, 23 Jul 2020 12:00:06 -0700 |
| Subject: tcp: allow at most one TLP probe per flight |
| |
| From: Yuchung Cheng <ycheng@google.com> |
| |
| [ Upstream commit 76be93fc0702322179bb0ea87295d820ee46ad14 ] |
| |
| Previously TLP may send multiple probes of new data in one |
| flight. This happens when the sender is cwnd limited. After the |
| initial TLP containing new data is sent, the sender receives another |
| ACK that acks partial inflight. It may re-arm another TLP timer |
| to send more, if no further ACK returns before the next TLP timeout |
| (PTO) expires. The sender may send in theory a large amount of TLP |
| until send queue is depleted. This only happens if the sender sees |
| such irregular uncommon ACK pattern. But it is generally undesirable |
| behavior during congestion especially. |
| |
| The original TLP design restrict only one TLP probe per inflight as |
| published in "Reducing Web Latency: the Virtue of Gentle Aggression", |
| SIGCOMM 2013. This patch changes TLP to send at most one probe |
| per inflight. |
| |
| Note that if the sender is app-limited, TLP retransmits old data |
| and did not have this issue. |
| |
| Signed-off-by: Yuchung Cheng <ycheng@google.com> |
| Signed-off-by: Neal Cardwell <ncardwell@google.com> |
| Signed-off-by: Eric Dumazet <edumazet@google.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| include/linux/tcp.h | 4 +++- |
| net/ipv4/tcp_input.c | 11 ++++++----- |
| net/ipv4/tcp_output.c | 13 ++++++++----- |
| 3 files changed, 17 insertions(+), 11 deletions(-) |
| |
| --- a/include/linux/tcp.h |
| +++ b/include/linux/tcp.h |
| @@ -216,6 +216,8 @@ struct tcp_sock { |
| } rack; |
| u16 advmss; /* Advertised MSS */ |
| u8 compressed_ack; |
| + u8 tlp_retrans:1, /* TLP is a retransmission */ |
| + unused_1:7; |
| u32 chrono_start; /* Start time in jiffies of a TCP chrono */ |
| u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ |
| u8 chrono_type:2, /* current chronograph type */ |
| @@ -238,7 +240,7 @@ struct tcp_sock { |
| save_syn:1, /* Save headers of SYN packet */ |
| is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ |
| syn_smc:1; /* SYN includes SMC */ |
| - u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ |
| + u32 tlp_high_seq; /* snd_nxt at the time of TLP */ |
| |
| u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ |
| u64 tcp_wstamp_ns; /* departure time for next sent data packet */ |
| --- a/net/ipv4/tcp_input.c |
| +++ b/net/ipv4/tcp_input.c |
| @@ -3505,10 +3505,8 @@ static void tcp_replace_ts_recent(struct |
| } |
| } |
| |
| -/* This routine deals with acks during a TLP episode. |
| - * We mark the end of a TLP episode on receiving TLP dupack or when |
| - * ack is after tlp_high_seq. |
| - * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. |
| +/* This routine deals with acks during a TLP episode and ends an episode by |
| + * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack |
| */ |
| static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) |
| { |
| @@ -3517,7 +3515,10 @@ static void tcp_process_tlp_ack(struct s |
| if (before(ack, tp->tlp_high_seq)) |
| return; |
| |
| - if (flag & FLAG_DSACKING_ACK) { |
| + if (!tp->tlp_retrans) { |
| + /* TLP of new data has been acknowledged */ |
| + tp->tlp_high_seq = 0; |
| + } else if (flag & FLAG_DSACKING_ACK) { |
| /* This DSACK means original and TLP probe arrived; no loss */ |
| tp->tlp_high_seq = 0; |
| } else if (after(ack, tp->tlp_high_seq)) { |
| --- a/net/ipv4/tcp_output.c |
| +++ b/net/ipv4/tcp_output.c |
| @@ -2564,6 +2564,11 @@ void tcp_send_loss_probe(struct sock *sk |
| int pcount; |
| int mss = tcp_current_mss(sk); |
| |
| + /* At most one outstanding TLP */ |
| + if (tp->tlp_high_seq) |
| + goto rearm_timer; |
| + |
| + tp->tlp_retrans = 0; |
| skb = tcp_send_head(sk); |
| if (skb && tcp_snd_wnd_test(tp, skb, mss)) { |
| pcount = tp->packets_out; |
| @@ -2581,10 +2586,6 @@ void tcp_send_loss_probe(struct sock *sk |
| return; |
| } |
| |
| - /* At most one outstanding TLP retransmission. */ |
| - if (tp->tlp_high_seq) |
| - goto rearm_timer; |
| - |
| if (skb_still_in_host_queue(sk, skb)) |
| goto rearm_timer; |
| |
| @@ -2606,10 +2607,12 @@ void tcp_send_loss_probe(struct sock *sk |
| if (__tcp_retransmit_skb(sk, skb, 1)) |
| goto rearm_timer; |
| |
| + tp->tlp_retrans = 1; |
| + |
| +probe_sent: |
| /* Record snd_nxt for loss detection. */ |
| tp->tlp_high_seq = tp->snd_nxt; |
| |
| -probe_sent: |
| NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); |
| /* Reset s.t. tcp_rearm_rto will restart timer from now */ |
| inet_csk(sk)->icsk_pending = 0; |