blob: 3707a814f82ac346f70c9a9712bccd5ec6a2da64 [file] [log] [blame]
From b204b72c028dd2c3a09620c7c26705742f29ea71 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Oct 2019 20:17:44 -0700
Subject: [PATCH] tcp: annotate sk->sk_rcvbuf lockless reads
commit ebb3b78db7bf842270a46fd4fe7cc45c78fa5ed6 upstream.
For the sake of tcp_poll(), there are few places where we fetch
sk->sk_rcvbuf while this field can change from IRQ or other cpu.
We need to add READ_ONCE() annotations, and also make sure write
sides use corresponding WRITE_ONCE() to avoid store-tearing.
Note that other transports probably need similar fixes.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7882cfbf7e5b..9b0f00592247 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1380,14 +1380,14 @@ static inline int tcp_win_from_space(const struct sock *sk, int space)
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, sk->sk_rcvbuf -
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
READ_ONCE(sk->sk_backlog.len) -
atomic_read(&sk->sk_rmem_alloc));
}
static inline int tcp_full_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, sk->sk_rcvbuf);
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
extern void tcp_openreq_init_rwin(struct request_sock *req,
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index a0c4b8a30966..f720c32e7dfd 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -82,7 +82,7 @@ TRACE_EVENT(sock_rcvqueue_full,
TP_fast_assign(
__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
__entry->truesize = skb->truesize;
- __entry->sk_rcvbuf = sk->sk_rcvbuf;
+ __entry->sk_rcvbuf = READ_ONCE(sk->sk_rcvbuf);
),
TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d",
diff --git a/net/core/filter.c b/net/core/filter.c
index afcaba2e051f..1f3f603c7ec3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4246,7 +4246,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
case SO_RCVBUF:
val = min_t(u32, val, sysctl_rmem_max);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+ WRITE_ONCE(sk->sk_rcvbuf,
+ max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_SNDBUF:
val = min_t(u32, val, sysctl_wmem_max);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 74efd63f15e2..57f879316ab4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4270,7 +4270,7 @@ static void skb_set_err_queue(struct sk_buff *skb)
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
{
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned int)sk->sk_rcvbuf)
+ (unsigned int)READ_ONCE(sk->sk_rcvbuf))
return -ENOMEM;
skb_orphan(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index e66d0b946868..28e4062343c9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -831,7 +831,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
* returning the value we actually used in getsockopt
* is the most desirable behavior.
*/
- sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+ WRITE_ONCE(sk->sk_rcvbuf,
+ max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_RCVBUFFORCE:
@@ -3200,7 +3201,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
- mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+ mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bcdcca023972..c6d122ae2866 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,7 +451,7 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+ WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
sk_sockets_allocated_inc(sk);
sk->sk_route_forced_caps = NETIF_F_GSO;
@@ -1708,7 +1708,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
val <<= 1;
if (val > sk->sk_rcvbuf) {
- sk->sk_rcvbuf = val;
+ WRITE_ONCE(sk->sk_rcvbuf, val);
tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
}
return 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f9e6cc739d68..d66186de8f3d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -483,8 +483,9 @@ static void tcp_clamp_window(struct sock *sk)
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
- sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
- net->ipv4.sysctl_tcp_rmem[2]);
+ WRITE_ONCE(sk->sk_rcvbuf,
+ min(atomic_read(&sk->sk_rmem_alloc),
+ net->ipv4.sysctl_tcp_rmem[2]));
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -648,7 +649,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
rcvbuf = min_t(u64, rcvwin * rcvmem,
sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) {
- sk->sk_rcvbuf = rcvbuf;
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make the window clamp follow along. */
tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
--
2.7.4