| From 844a8e92acab3d388d4380fc85705118034d84bb Mon Sep 17 00:00:00 2001 |
| From: Willy Tarreau <w@1wt.eu> |
| Date: Tue, 13 Jan 2009 16:04:36 -0800 |
| Subject: tcp: splice as many packets as possible at once |
| |
| From: Willy Tarreau <w@1wt.eu> |
| |
| [ Upstream commit 33966dd0e2f68f26943cd9ee93ec6abbc6547a8e ] |
| |
| As spotted by Willy Tarreau, current splice() from tcp socket to pipe is not |
| optimal. It processes at most one segment per call. |
| This results in low performance and very high overhead due to syscall rate |
| when splicing from interfaces which do not support LRO. |
| |
| Willy provided a patch inside tcp_splice_read(), but a better fix |
| is to let tcp_read_sock() process as many segments as possible, so |
| that tcp_rcv_space_adjust() and tcp_cleanup_rbuf() are called less |
| often. |
| |
| With this change, splice() behaves like tcp_recvmsg(), being able |
| to consume many skbs in one system call. With typical 1460 bytes |
| of payload per frame, that means splice(SPLICE_F_NONBLOCK) can return |
| 16*1460 = 23360 bytes. |
| |
| Signed-off-by: Willy Tarreau <w@1wt.eu> |
| Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| |
| --- |
| net/ipv4/tcp.c | 11 +++++++++-- |
| 1 file changed, 9 insertions(+), 2 deletions(-) |
| |
| --- a/net/ipv4/tcp.c |
| +++ b/net/ipv4/tcp.c |
| @@ -518,8 +518,12 @@ static int tcp_splice_data_recv(read_des |
| unsigned int offset, size_t len) |
| { |
| struct tcp_splice_state *tss = rd_desc->arg.data; |
| + int ret; |
| |
| - return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags); |
| + ret = skb_splice_bits(skb, offset, tss->pipe, rd_desc->count, tss->flags); |
| + if (ret > 0) |
| + rd_desc->count -= ret; |
| + return ret; |
| } |
| |
| static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) |
| @@ -527,6 +531,7 @@ static int __tcp_splice_read(struct sock |
| /* Store TCP splice context information in read_descriptor_t. */ |
| read_descriptor_t rd_desc = { |
| .arg.data = tss, |
| + .count = tss->len, |
| }; |
| |
| return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv); |
| @@ -611,11 +616,13 @@ ssize_t tcp_splice_read(struct socket *s |
| tss.len -= ret; |
| spliced += ret; |
| |
| + if (!timeo) |
| + break; |
| release_sock(sk); |
| lock_sock(sk); |
| |
| if (sk->sk_err || sk->sk_state == TCP_CLOSE || |
| - (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || |
| + (sk->sk_shutdown & RCV_SHUTDOWN) || |
| signal_pending(current)) |
| break; |
| } |