| From 686f0bf5f79c84fa2f29c7da48cbd2fbad14746b Mon Sep 17 00:00:00 2001 |
| From: Eric Dumazet <edumazet@google.com> |
| Date: Thu, 12 Dec 2019 12:55:31 -0800 |
| Subject: [PATCH] tcp: refine rule to allow EPOLLOUT generation under mem |
| pressure |
| |
| commit 216808c6ba6d00169fd2aa928ec3c0e63bef254f upstream. |
| |
| At the time commit ce5ec440994b ("tcp: ensure epoll edge trigger |
| wakeup when write queue is empty") was added to the kernel, |
| we still had a single write queue, combining rtx and write queues. |
| |
| Once we moved the rtx queue into a separate rb-tree, testing |
| if sk_write_queue is empty has been suboptimal. |
| |
| Indeed, if we have packets in the rtx queue, we probably want |
| to delay the EPOLLOUT generation at the time incoming packets |
| will free them, making room, but more importantly avoiding |
| flooding application with EPOLLOUT events. |
| |
| Solution is to use tcp_rtx_and_write_queues_empty() helper. |
| |
| Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue") |
| Signed-off-by: Eric Dumazet <edumazet@google.com> |
| Cc: Jason Baron <jbaron@akamai.com> |
| Cc: Neal Cardwell <ncardwell@google.com> |
| Acked-by: Soheil Hassas Yeganeh <soheil@google.com> |
| Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c |
| index 7d233f802ab9..05159825d0be 100644 |
| --- a/net/ipv4/tcp.c |
| +++ b/net/ipv4/tcp.c |
| @@ -1085,8 +1085,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, |
| goto out; |
| out_err: |
| /* make sure we wake any epoll edge trigger waiter */ |
| - if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && |
| - err == -EAGAIN)) { |
| + if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { |
| sk->sk_write_space(sk); |
| tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); |
| } |
| @@ -1416,8 +1415,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) |
| sock_zerocopy_put_abort(uarg, true); |
| err = sk_stream_error(sk, flags, err); |
| /* make sure we wake any epoll edge trigger waiter */ |
| - if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && |
| - err == -EAGAIN)) { |
| + if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { |
| sk->sk_write_space(sk); |
| tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); |
| } |
| -- |
| 2.7.4 |
| |