| From 32e2858d1a12f223facf58371a8e4d269d0c5896 Mon Sep 17 00:00:00 2001 |
| From: Eric Dumazet <edumazet@google.com> |
| Date: Thu, 10 Oct 2019 20:17:41 -0700 |
| Subject: [PATCH] tcp: annotate tp->write_seq lockless reads |
| |
| commit 0f31746452e6793ad6271337438af8f4defb8940 upstream. |
| |
| There are few places where we fetch tp->write_seq while |
| this field can change from IRQ or other cpu. |
| |
| We need to add READ_ONCE() annotations, and also make |
| sure write sides use corresponding WRITE_ONCE() to avoid |
| store-tearing. |
| |
| Signed-off-by: Eric Dumazet <edumazet@google.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/include/net/tcp.h b/include/net/tcp.h |
| index 9eea14060456..2c57e0b79e1a 100644 |
| --- a/include/net/tcp.h |
| +++ b/include/net/tcp.h |
| @@ -1885,7 +1885,7 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) |
| static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) |
| { |
| const struct tcp_sock *tp = tcp_sk(sk); |
| - u32 notsent_bytes = tp->write_seq - tp->snd_nxt; |
| + u32 notsent_bytes = READ_ONCE(tp->write_seq) - tp->snd_nxt; |
| |
| return (notsent_bytes << wake) < tcp_notsent_lowat(tp); |
| } |
| diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c |
| index d55ff2f83dc3..5822b426c4d7 100644 |
| --- a/net/ipv4/tcp.c |
| +++ b/net/ipv4/tcp.c |
| @@ -616,7 +616,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) |
| if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) |
| answ = 0; |
| else |
| - answ = tp->write_seq - tp->snd_una; |
| + answ = READ_ONCE(tp->write_seq) - tp->snd_una; |
| break; |
| case SIOCOUTQNSD: |
| if (sk->sk_state == TCP_LISTEN) |
| @@ -625,7 +625,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) |
| if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) |
| answ = 0; |
| else |
| - answ = tp->write_seq - tp->snd_nxt; |
| + answ = READ_ONCE(tp->write_seq) - tp->snd_nxt; |
| break; |
| default: |
| return -ENOIOCTLCMD; |
| @@ -1035,7 +1035,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, |
| sk->sk_wmem_queued += copy; |
| sk_mem_charge(sk, copy); |
| skb->ip_summed = CHECKSUM_PARTIAL; |
| - tp->write_seq += copy; |
| + WRITE_ONCE(tp->write_seq, tp->write_seq + copy); |
| TCP_SKB_CB(skb)->end_seq += copy; |
| tcp_skb_pcount_set(skb, 0); |
| |
| @@ -1360,7 +1360,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) |
| if (!copied) |
| TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; |
| |
| - tp->write_seq += copy; |
| + WRITE_ONCE(tp->write_seq, tp->write_seq + copy); |
| TCP_SKB_CB(skb)->end_seq += copy; |
| tcp_skb_pcount_set(skb, 0); |
| |
| @@ -2558,6 +2558,7 @@ int tcp_disconnect(struct sock *sk, int flags) |
| struct inet_connection_sock *icsk = inet_csk(sk); |
| struct tcp_sock *tp = tcp_sk(sk); |
| int old_state = sk->sk_state; |
| + u32 seq; |
| |
| if (old_state != TCP_CLOSE) |
| tcp_set_state(sk, TCP_CLOSE); |
| @@ -2600,9 +2601,12 @@ int tcp_disconnect(struct sock *sk, int flags) |
| tp->srtt_us = 0; |
| tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); |
| tp->rcv_rtt_last_tsecr = 0; |
| - tp->write_seq += tp->max_window + 2; |
| - if (tp->write_seq == 0) |
| - tp->write_seq = 1; |
| + |
| + seq = tp->write_seq + tp->max_window + 2; |
| + if (!seq) |
| + seq = 1; |
| + WRITE_ONCE(tp->write_seq, seq); |
| + |
| icsk->icsk_backoff = 0; |
| tp->snd_cwnd = 2; |
| icsk->icsk_probes_out = 0; |
| @@ -2911,7 +2915,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, |
| if (sk->sk_state != TCP_CLOSE) |
| err = -EPERM; |
| else if (tp->repair_queue == TCP_SEND_QUEUE) |
| - tp->write_seq = val; |
| + WRITE_ONCE(tp->write_seq, val); |
| else if (tp->repair_queue == TCP_RECV_QUEUE) |
| WRITE_ONCE(tp->rcv_nxt, val); |
| else |
| diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c |
| index 035c549f623c..89b8de350b10 100644 |
| --- a/net/ipv4/tcp_diag.c |
| +++ b/net/ipv4/tcp_diag.c |
| @@ -28,7 +28,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, |
| |
| r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - |
| READ_ONCE(tp->copied_seq), 0); |
| - r->idiag_wqueue = tp->write_seq - tp->snd_una; |
| + r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una; |
| } |
| if (info) |
| tcp_get_info(sk, info); |
| diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c |
| index b900ce920f51..16591cccbeff 100644 |
| --- a/net/ipv4/tcp_ipv4.c |
| +++ b/net/ipv4/tcp_ipv4.c |
| @@ -164,9 +164,11 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) |
| * without appearing to create any others. |
| */ |
| if (likely(!tp->repair)) { |
| - tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; |
| - if (tp->write_seq == 0) |
| - tp->write_seq = 1; |
| + u32 seq = tcptw->tw_snd_nxt + 65535 + 2; |
| + |
| + if (!seq) |
| + seq = 1; |
| + WRITE_ONCE(tp->write_seq, seq); |
| tp->rx_opt.ts_recent = tcptw->tw_ts_recent; |
| tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; |
| } |
| @@ -253,7 +255,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
| tp->rx_opt.ts_recent = 0; |
| tp->rx_opt.ts_recent_stamp = 0; |
| if (likely(!tp->repair)) |
| - tp->write_seq = 0; |
| + WRITE_ONCE(tp->write_seq, 0); |
| } |
| |
| inet->inet_dport = usin->sin_port; |
| @@ -291,10 +293,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
| |
| if (likely(!tp->repair)) { |
| if (!tp->write_seq) |
| - tp->write_seq = secure_tcp_seq(inet->inet_saddr, |
| - inet->inet_daddr, |
| - inet->inet_sport, |
| - usin->sin_port); |
| + WRITE_ONCE(tp->write_seq, |
| + secure_tcp_seq(inet->inet_saddr, |
| + inet->inet_daddr, |
| + inet->inet_sport, |
| + usin->sin_port)); |
| tp->tsoffset = secure_tcp_ts_off(sock_net(sk), |
| inet->inet_saddr, |
| inet->inet_daddr); |
| @@ -2437,7 +2440,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) |
| seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " |
| "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", |
| i, src, srcp, dest, destp, state, |
| - tp->write_seq - tp->snd_una, |
| + READ_ONCE(tp->write_seq) - tp->snd_una, |
| rx_queue, |
| timer_active, |
| jiffies_delta_to_clock_t(timer_expires - jiffies), |
| diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c |
| index 2775a5be74bc..9a91d4dfe493 100644 |
| --- a/net/ipv4/tcp_minisocks.c |
| +++ b/net/ipv4/tcp_minisocks.c |
| @@ -496,7 +496,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, |
| newtp->total_retrans = req->num_retrans; |
| |
| tcp_init_xmit_timers(newsk); |
| - newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; |
| + WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1); |
| |
| if (sock_flag(newsk, SOCK_KEEPOPEN)) |
| inet_csk_reset_keepalive_timer(newsk, |
| diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c |
| index 87ce98a1022b..f70920f5bb7f 100644 |
| --- a/net/ipv4/tcp_output.c |
| +++ b/net/ipv4/tcp_output.c |
| @@ -1187,7 +1187,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) |
| struct tcp_sock *tp = tcp_sk(sk); |
| |
| /* Advance write_seq and place onto the write_queue. */ |
| - tp->write_seq = TCP_SKB_CB(skb)->end_seq; |
| + WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq); |
| __skb_header_release(skb); |
| tcp_add_write_queue_tail(sk, skb); |
| sk->sk_wmem_queued += skb->truesize; |
| @@ -3434,7 +3434,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) |
| __skb_header_release(skb); |
| sk->sk_wmem_queued += skb->truesize; |
| sk_mem_charge(sk, skb->truesize); |
| - tp->write_seq = tcb->end_seq; |
| + WRITE_ONCE(tp->write_seq, tcb->end_seq); |
| tp->packets_out += tcp_skb_pcount(skb); |
| } |
| |
| diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c |
| index 41d616a8a967..70b36dc26f62 100644 |
| --- a/net/ipv6/tcp_ipv6.c |
| +++ b/net/ipv6/tcp_ipv6.c |
| @@ -215,7 +215,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, |
| !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { |
| tp->rx_opt.ts_recent = 0; |
| tp->rx_opt.ts_recent_stamp = 0; |
| - tp->write_seq = 0; |
| + WRITE_ONCE(tp->write_seq, 0); |
| } |
| |
| sk->sk_v6_daddr = usin->sin6_addr; |
| @@ -311,10 +311,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, |
| |
| if (likely(!tp->repair)) { |
| if (!tp->write_seq) |
| - tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32, |
| - sk->sk_v6_daddr.s6_addr32, |
| - inet->inet_sport, |
| - inet->inet_dport); |
| + WRITE_ONCE(tp->write_seq, |
| + secure_tcpv6_seq(np->saddr.s6_addr32, |
| + sk->sk_v6_daddr.s6_addr32, |
| + inet->inet_sport, |
| + inet->inet_dport)); |
| tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), |
| np->saddr.s6_addr32, |
| sk->sk_v6_daddr.s6_addr32); |
| @@ -1864,7 +1865,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) |
| dest->s6_addr32[0], dest->s6_addr32[1], |
| dest->s6_addr32[2], dest->s6_addr32[3], destp, |
| state, |
| - tp->write_seq - tp->snd_una, |
| + READ_ONCE(tp->write_seq) - tp->snd_una, |
| rx_queue, |
| timer_active, |
| jiffies_delta_to_clock_t(timer_expires - jiffies), |
| -- |
| 2.7.4 |
| |