| From foo@baz Sun Dec 31 11:13:15 CET 2017 |
| From: Yousuk Seung <ysseung@google.com> |
| Date: Thu, 7 Dec 2017 13:41:34 -0800 |
| Subject: tcp: invalidate rate samples during SACK reneging |
| |
| From: Yousuk Seung <ysseung@google.com> |
| |
| |
| [ Upstream commit d4761754b4fb2ef8d9a1e9d121c4bec84e1fe292 ] |
| |
| Mark tcp_sock during a SACK reneging event and invalidate rate samples |
| while marked. Such rate samples may overestimate bw by including packets |
| that were SACKed before reneging. |
| |
| < ack 6001 win 10000 sack 7001:38001 |
| < ack 7001 win 0 sack 8001:38001 // Reneg detected |
| > seq 7001:8001 // RTO, SACK cleared. |
| < ack 38001 win 10000 |
| |
| In above example the rate sample taken after the last ack will count |
| 7001-38001 as delivered while the actual delivery rate likely could |
| be much lower i.e. 7001-8001. |
| |
| This patch adds a new field tcp_sock.sack_reneg and marks it when we |
| declare SACK reneging and entering TCP_CA_Loss, and unmarks it after |
| the last rate sample was taken before moving back to TCP_CA_Open. This |
| patch also invalidates rate samples taken while tcp_sock.is_sack_reneg |
| is set. |
| |
| Fixes: b9f64820fb22 ("tcp: track data delivery rate for a TCP connection") |
| Signed-off-by: Yousuk Seung <ysseung@google.com> |
| Signed-off-by: Neal Cardwell <ncardwell@google.com> |
| Signed-off-by: Yuchung Cheng <ycheng@google.com> |
| Acked-by: Soheil Hassas Yeganeh <soheil@google.com> |
| Acked-by: Eric Dumazet <edumazet@google.com> |
| Acked-by: Priyaranjan Jha <priyarjha@google.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| include/linux/tcp.h | 3 ++- |
| include/net/tcp.h | 2 +- |
| net/ipv4/tcp.c | 1 + |
| net/ipv4/tcp_input.c | 10 ++++++++-- |
| net/ipv4/tcp_rate.c | 10 +++++++--- |
| 5 files changed, 19 insertions(+), 7 deletions(-) |
| |
| --- a/include/linux/tcp.h |
| +++ b/include/linux/tcp.h |
| @@ -219,7 +219,8 @@ struct tcp_sock { |
| } rack; |
| u16 advmss; /* Advertised MSS */ |
| u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ |
| - unused:7; |
| + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ |
| + unused:6; |
| u8 nonagle : 4,/* Disable Nagle algorithm? */ |
| thin_lto : 1,/* Use linear timeouts for thin streams */ |
| thin_dupack : 1,/* Fast retransmit on first dupack */ |
| --- a/include/net/tcp.h |
| +++ b/include/net/tcp.h |
| @@ -1001,7 +1001,7 @@ void tcp_rate_skb_sent(struct sock *sk, |
| void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, |
| struct rate_sample *rs); |
| void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, |
| - struct skb_mstamp *now, struct rate_sample *rs); |
| + bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs); |
| void tcp_rate_check_app_limited(struct sock *sk); |
| |
| /* These functions determine how the current flow behaves in respect of SACK |
| --- a/net/ipv4/tcp.c |
| +++ b/net/ipv4/tcp.c |
| @@ -2297,6 +2297,7 @@ int tcp_disconnect(struct sock *sk, int |
| tp->snd_cwnd_cnt = 0; |
| tp->window_clamp = 0; |
| tcp_set_ca_state(sk, TCP_CA_Open); |
| + tp->is_sack_reneg = 0; |
| tcp_clear_retrans(tp); |
| inet_csk_delack_init(sk); |
| /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 |
| --- a/net/ipv4/tcp_input.c |
| +++ b/net/ipv4/tcp_input.c |
| @@ -1966,6 +1966,8 @@ void tcp_enter_loss(struct sock *sk) |
| NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); |
| tp->sacked_out = 0; |
| tp->fackets_out = 0; |
| + /* Mark SACK reneging until we recover from this loss event. */ |
| + tp->is_sack_reneg = 1; |
| } |
| tcp_clear_all_retrans_hints(tp); |
| |
| @@ -2463,6 +2465,7 @@ static bool tcp_try_undo_recovery(struct |
| return true; |
| } |
| tcp_set_ca_state(sk, TCP_CA_Open); |
| + tp->is_sack_reneg = 0; |
| return false; |
| } |
| |
| @@ -2494,8 +2497,10 @@ static bool tcp_try_undo_loss(struct soc |
| NET_INC_STATS(sock_net(sk), |
| LINUX_MIB_TCPSPURIOUSRTOS); |
| inet_csk(sk)->icsk_retransmits = 0; |
| - if (frto_undo || tcp_is_sack(tp)) |
| + if (frto_undo || tcp_is_sack(tp)) { |
| tcp_set_ca_state(sk, TCP_CA_Open); |
| + tp->is_sack_reneg = 0; |
| + } |
| return true; |
| } |
| return false; |
| @@ -3589,6 +3594,7 @@ static int tcp_ack(struct sock *sk, cons |
| struct tcp_sacktag_state sack_state; |
| struct rate_sample rs = { .prior_delivered = 0 }; |
| u32 prior_snd_una = tp->snd_una; |
| + bool is_sack_reneg = tp->is_sack_reneg; |
| u32 ack_seq = TCP_SKB_CB(skb)->seq; |
| u32 ack = TCP_SKB_CB(skb)->ack_seq; |
| bool is_dupack = false; |
| @@ -3711,7 +3717,7 @@ static int tcp_ack(struct sock *sk, cons |
| tcp_schedule_loss_probe(sk); |
| delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ |
| lost = tp->lost - lost; /* freshly marked lost */ |
| - tcp_rate_gen(sk, delivered, lost, &now, &rs); |
| + tcp_rate_gen(sk, delivered, lost, is_sack_reneg, &now, &rs); |
| tcp_cong_control(sk, ack, delivered, flag, &rs); |
| tcp_xmit_recovery(sk, rexmit); |
| return 1; |
| --- a/net/ipv4/tcp_rate.c |
| +++ b/net/ipv4/tcp_rate.c |
| @@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock |
| |
| /* Update the connection delivery information and generate a rate sample. */ |
| void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, |
| - struct skb_mstamp *now, struct rate_sample *rs) |
| + bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs) |
| { |
| struct tcp_sock *tp = tcp_sk(sk); |
| u32 snd_us, ack_us; |
| @@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 d |
| |
| rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ |
| rs->losses = lost; /* freshly marked lost */ |
| - /* Return an invalid sample if no timing information is available. */ |
| - if (!rs->prior_mstamp.v64) { |
| + /* Return an invalid sample if no timing information is available or |
| + * in recovery from loss with SACK reneging. Rate samples taken during |
| + * a SACK reneging event may overestimate bw by including packets that |
| + * were SACKed before the reneg. |
| + */ |
| + if (!rs->prior_mstamp.v64 || is_sack_reneg) { |
| rs->delivered = -1; |
| rs->interval_us = -1; |
| return; |