| From 4119a5ad286c95696300fcb214de8855bb57beb5 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Mon, 11 Jul 2022 17:47:18 +0800 |
| Subject: tcp: make retransmitted SKB fit into the send window |
| |
| From: Yonglong Li <liyonglong@chinatelecom.cn> |
| |
| [ Upstream commit 536a6c8e05f95e3d1118c40ae8b3022ee2d05d52 ] |
| |
| current code of __tcp_retransmit_skb only check TCP_SKB_CB(skb)->seq |
| in send window, and TCP_SKB_CB(skb)->seq_end maybe out of send window. |
| If receiver has shrunk his window, and skb is out of new window, it |
| should retransmit a smaller portion of the payload. |
| |
| test packetdrill script: |
| 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 |
| +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) |
| +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 |
| |
| +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) |
| +0 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> |
| +.05 < S. 0:0(0) ack 1 win 6000 <mss 1000,nop,nop,sackOK> |
| +0 > . 1:1(0) ack 1 |
| |
| +0 write(3, ..., 10000) = 10000 |
| |
| +0 > . 1:2001(2000) ack 1 win 65535 |
| +0 > . 2001:4001(2000) ack 1 win 65535 |
| +0 > . 4001:6001(2000) ack 1 win 65535 |
| |
| +.05 < . 1:1(0) ack 4001 win 1001 |
| |
| and tcpdump show: |
| 192.168.226.67.55 > 192.0.2.1.8080: Flags [.], seq 1:2001, ack 1, win 65535, length 2000 |
| 192.168.226.67.55 > 192.0.2.1.8080: Flags [.], seq 2001:4001, ack 1, win 65535, length 2000 |
| 192.168.226.67.55 > 192.0.2.1.8080: Flags [P.], seq 4001:5001, ack 1, win 65535, length 1000 |
| 192.168.226.67.55 > 192.0.2.1.8080: Flags [.], seq 5001:6001, ack 1, win 65535, length 1000 |
| 192.0.2.1.8080 > 192.168.226.67.55: Flags [.], ack 4001, win 1001, length 0 |
| 192.168.226.67.55 > 192.0.2.1.8080: Flags [.], seq 5001:6001, ack 1, win 65535, length 1000 |
| 192.168.226.67.55 > 192.0.2.1.8080: Flags [P.], seq 4001:5001, ack 1, win 65535, length 1000 |
| |
| when cient retract window to 1001, send window is [4001,5002], |
| but TLP send 5001-6001 packet which is out of send window. |
| |
| Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") |
| Signed-off-by: Yonglong Li <liyonglong@chinatelecom.cn> |
| Signed-off-by: Eric Dumazet <edumazet@google.com> |
| Link: https://lore.kernel.org/r/1657532838-20200-1-git-send-email-liyonglong@chinatelecom.cn |
| Signed-off-by: Jakub Kicinski <kuba@kernel.org> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| net/ipv4/tcp_output.c | 23 ++++++++++++++++------- |
| 1 file changed, 16 insertions(+), 7 deletions(-) |
| |
| diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c |
| index 657b0a4d9359..5662faf81fa5 100644 |
| --- a/net/ipv4/tcp_output.c |
| +++ b/net/ipv4/tcp_output.c |
| @@ -3137,7 +3137,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) |
| struct tcp_sock *tp = tcp_sk(sk); |
| unsigned int cur_mss; |
| int diff, len, err; |
| - |
| + int avail_wnd; |
| |
| /* Inconclusive MTU probe */ |
| if (icsk->icsk_mtup.probe_size) |
| @@ -3167,17 +3167,25 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) |
| return -EHOSTUNREACH; /* Routing failure or similar. */ |
| |
| cur_mss = tcp_current_mss(sk); |
| + avail_wnd = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
| |
| /* If receiver has shrunk his window, and skb is out of |
| * new window, do not retransmit it. The exception is the |
| * case, when window is shrunk to zero. In this case |
| - * our retransmit serves as a zero window probe. |
| + * our retransmit of one segment serves as a zero window probe. |
| */ |
| - if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) && |
| - TCP_SKB_CB(skb)->seq != tp->snd_una) |
| - return -EAGAIN; |
| + if (avail_wnd <= 0) { |
| + if (TCP_SKB_CB(skb)->seq != tp->snd_una) |
| + return -EAGAIN; |
| + avail_wnd = cur_mss; |
| + } |
| |
| len = cur_mss * segs; |
| + if (len > avail_wnd) { |
| + len = rounddown(avail_wnd, cur_mss); |
| + if (!len) |
| + len = avail_wnd; |
| + } |
| if (skb->len > len) { |
| if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len, |
| cur_mss, GFP_ATOMIC)) |
| @@ -3191,8 +3199,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) |
| diff -= tcp_skb_pcount(skb); |
| if (diff) |
| tcp_adjust_pcount(sk, skb, diff); |
| - if (skb->len < cur_mss) |
| - tcp_retrans_try_collapse(sk, skb, cur_mss); |
| + avail_wnd = min_t(int, avail_wnd, cur_mss); |
| + if (skb->len < avail_wnd) |
| + tcp_retrans_try_collapse(sk, skb, avail_wnd); |
| } |
| |
| /* RFC3168, section 6.1.1.1. ECN fallback */ |
| -- |
| 2.35.1 |
| |