| From foo@baz Tue Mar 24 11:01:55 CET 2015 |
| From: Eric Dumazet <edumazet@google.com> |
| Date: Mon, 17 Nov 2014 23:06:20 -0800 |
| Subject: tcp: make connect() mem charging friendly |
| |
| From: Eric Dumazet <edumazet@google.com> |
| |
| [ Upstream commit 355a901e6cf1b2b763ec85caa2a9f04fbcc4ab4a ] |
| |
| While working on sk_forward_alloc problems reported by Denys |
| Fedoryshchenko, we found that tcp connect() (and fastopen) do not call |
| sk_wmem_schedule() for SYN packet (and/or SYN/DATA packet), so |
| sk_forward_alloc is negative while connect is in progress. |
| |
| We can fix this by calling regular sk_stream_alloc_skb() both for the |
| SYN packet (in tcp_connect()) and the syn_data packet in |
| tcp_send_syn_data() |
| |
| Then, tcp_send_syn_data() can avoid copying syn_data as we simply |
| can manipulate syn_data->cb[] to remove SYN flag (and increment seq) |
| |
| Instead of open coding memcpy_fromiovecend(), simply use this helper. |
| |
| This leaves in socket write queue clean fast clone skbs. |
| |
| This was tested against our fastopen packetdrill tests. |
| |
| Reported-by: Denys Fedoryshchenko <nuclearcat@nuclearcat.com> |
| Signed-off-by: Eric Dumazet <edumazet@google.com> |
| Acked-by: Yuchung Cheng <ycheng@google.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| net/ipv4/tcp_output.c | 66 +++++++++++++++++++++----------------------------- |
| 1 file changed, 29 insertions(+), 37 deletions(-) |
| |
| --- a/net/ipv4/tcp_output.c |
| +++ b/net/ipv4/tcp_output.c |
| @@ -2871,9 +2871,9 @@ static int tcp_send_syn_data(struct sock |
| { |
| struct tcp_sock *tp = tcp_sk(sk); |
| struct tcp_fastopen_request *fo = tp->fastopen_req; |
| - int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen; |
| - struct sk_buff *syn_data = NULL, *data; |
| + int syn_loss = 0, space, err = 0; |
| unsigned long last_syn_loss = 0; |
| + struct sk_buff *syn_data; |
| |
| tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ |
| tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, |
| @@ -2904,42 +2904,38 @@ static int tcp_send_syn_data(struct sock |
| /* limit to order-0 allocations */ |
| space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); |
| |
| - syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, |
| - sk->sk_allocation); |
| - if (syn_data == NULL) |
| + syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation); |
| + if (!syn_data) |
| goto fallback; |
| - |
| - for (i = 0; i < iovlen && syn_data->len < space; ++i) { |
| - struct iovec *iov = &fo->data->msg_iov[i]; |
| - unsigned char __user *from = iov->iov_base; |
| - int len = iov->iov_len; |
| - |
| - if (syn_data->len + len > space) |
| - len = space - syn_data->len; |
| - else if (i + 1 == iovlen) |
| - /* No more data pending in inet_wait_for_connect() */ |
| - fo->data = NULL; |
| - |
| - if (skb_add_data(syn_data, from, len)) |
| - goto fallback; |
| - } |
| - |
| - /* Queue a data-only packet after the regular SYN for retransmission */ |
| - data = pskb_copy(syn_data, sk->sk_allocation); |
| - if (data == NULL) |
| + syn_data->ip_summed = CHECKSUM_PARTIAL; |
| + memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); |
| + if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), |
| + fo->data->msg_iov, 0, space))) { |
| + kfree_skb(syn_data); |
| goto fallback; |
| - TCP_SKB_CB(data)->seq++; |
| - TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN; |
| - TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH); |
| - tcp_connect_queue_skb(sk, data); |
| - fo->copied = data->len; |
| + } |
| |
| - if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { |
| + /* No more data pending in inet_wait_for_connect() */ |
| + if (space == fo->size) |
| + fo->data = NULL; |
| + fo->copied = space; |
| + |
| + tcp_connect_queue_skb(sk, syn_data); |
| + |
| + err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); |
| + |
| + /* Now full SYN+DATA was cloned and sent (or not), |
| + * remove the SYN from the original skb (syn_data) |
| + * we keep in write queue in case of a retransmit, as we |
| + * also have the SYN packet (with no data) in the same queue. |
| + */ |
| + TCP_SKB_CB(syn_data)->seq++; |
| + TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; |
| + if (!err) { |
| tp->syn_data = (fo->copied > 0); |
| NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); |
| goto done; |
| } |
| - syn_data = NULL; |
| |
| fallback: |
| /* Send a regular SYN with Fast Open cookie request option */ |
| @@ -2948,7 +2944,6 @@ fallback: |
| err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); |
| if (err) |
| tp->syn_fastopen = 0; |
| - kfree_skb(syn_data); |
| done: |
| fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ |
| return err; |
| @@ -2968,13 +2963,10 @@ int tcp_connect(struct sock *sk) |
| return 0; |
| } |
| |
| - buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); |
| - if (unlikely(buff == NULL)) |
| + buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); |
| + if (unlikely(!buff)) |
| return -ENOBUFS; |
| |
| - /* Reserve space for headers. */ |
| - skb_reserve(buff, MAX_TCP_HEADER); |
| - |
| tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); |
| tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp; |
| tcp_connect_queue_skb(sk, buff); |