| From foo@baz Fri Jan 4 20:01:52 CET 2019 |
| From: Deepa Dinamani <deepa.kernel@gmail.com> |
| Date: Thu, 27 Dec 2018 18:55:09 -0800 |
| Subject: sock: Make sock->sk_stamp thread-safe |
| |
| From: Deepa Dinamani <deepa.kernel@gmail.com> |
| |
| [ Upstream commit 3a0ed3e9619738067214871e9cb826fa23b2ddb9 ] |
| |
| Al Viro mentioned (Message-ID |
| <20170626041334.GZ10672@ZenIV.linux.org.uk>) |
| that there is probably a race condition |
| lurking in accesses of sk_stamp on 32-bit machines. |
| |
| sock->sk_stamp is of type ktime_t which is always an s64. |
| On a 32 bit architecture, we might run into situations of |
| unsafe access as the access to the field becomes non atomic. |
| |
| Use seqlocks for synchronization. |
| This allows us to avoid using spinlocks for readers as |
| readers do not need mutual exclusion. |
| |
| Another approach to solve this is to require sk_lock for all |
| modifications of the timestamps. The current approach allows |
| for timestamps to have their own lock: sk_stamp_lock. |
| This allows for the patch to not compete with already |
| existing critical sections, and side effects are limited |
| to the paths in the patch. |
| |
| The addition of the new field maintains the data locality |
| optimizations from |
| commit 9115e8cd2a0c ("net: reorganize struct sock for better data |
| locality") |
| |
| Note that all the instances of the sk_stamp accesses |
| are either through the ioctl or the syscall recvmsg. |
| |
| Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| include/net/sock.h | 36 ++++++++++++++++++++++++++++++++++-- |
| net/compat.c | 15 +++++++++------ |
| net/core/sock.c | 3 +++ |
| net/sunrpc/svcsock.c | 2 +- |
| 4 files changed, 47 insertions(+), 9 deletions(-) |
| |
| --- a/include/net/sock.h |
| +++ b/include/net/sock.h |
| @@ -284,6 +284,7 @@ struct sock_common { |
| * @sk_filter: socket filtering instructions |
| * @sk_timer: sock cleanup timer |
| * @sk_stamp: time stamp of last packet received |
| + * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only |
| * @sk_tsflags: SO_TIMESTAMPING socket options |
| * @sk_tskey: counter to disambiguate concurrent tstamp requests |
| * @sk_socket: Identd and reporting IO signals |
| @@ -425,6 +426,9 @@ struct sock { |
| long sk_sndtimeo; |
| struct timer_list sk_timer; |
| ktime_t sk_stamp; |
| +#if BITS_PER_LONG==32 |
| + seqlock_t sk_stamp_seq; |
| +#endif |
| u16 sk_tsflags; |
| u8 sk_shutdown; |
| u32 sk_tskey; |
| @@ -2114,6 +2118,34 @@ static inline void sk_drops_add(struct s |
| atomic_add(segs, &sk->sk_drops); |
| } |
| |
| +static inline ktime_t sock_read_timestamp(struct sock *sk) |
| +{ |
| +#if BITS_PER_LONG==32 |
| + unsigned int seq; |
| + ktime_t kt; |
| + |
| + do { |
| + seq = read_seqbegin(&sk->sk_stamp_seq); |
| + kt = sk->sk_stamp; |
| + } while (read_seqretry(&sk->sk_stamp_seq, seq)); |
| + |
| + return kt; |
| +#else |
| + return sk->sk_stamp; |
| +#endif |
| +} |
| + |
| +static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) |
| +{ |
| +#if BITS_PER_LONG==32 |
| + write_seqlock(&sk->sk_stamp_seq); |
| + sk->sk_stamp = kt; |
| + write_sequnlock(&sk->sk_stamp_seq); |
| +#else |
| + sk->sk_stamp = kt; |
| +#endif |
| +} |
| + |
| void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, |
| struct sk_buff *skb); |
| void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, |
| @@ -2138,7 +2170,7 @@ sock_recv_timestamp(struct msghdr *msg, |
| (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) |
| __sock_recv_timestamp(msg, sk, skb); |
| else |
| - sk->sk_stamp = kt; |
| + sock_write_timestamp(sk, kt); |
| |
| if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid) |
| __sock_recv_wifi_status(msg, sk, skb); |
| @@ -2158,7 +2190,7 @@ static inline void sock_recv_ts_and_drop |
| if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) |
| __sock_recv_ts_and_drops(msg, sk, skb); |
| else |
| - sk->sk_stamp = skb->tstamp; |
| + sock_write_timestamp(sk, skb->tstamp); |
| } |
| |
| void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); |
| --- a/net/compat.c |
| +++ b/net/compat.c |
| @@ -457,12 +457,14 @@ int compat_sock_get_timestamp(struct soc |
| err = -ENOENT; |
| if (!sock_flag(sk, SOCK_TIMESTAMP)) |
| sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
| - tv = ktime_to_timeval(sk->sk_stamp); |
| + tv = ktime_to_timeval(sock_read_timestamp(sk)); |
| + |
| if (tv.tv_sec == -1) |
| return err; |
| if (tv.tv_sec == 0) { |
| - sk->sk_stamp = ktime_get_real(); |
| - tv = ktime_to_timeval(sk->sk_stamp); |
| + ktime_t kt = ktime_get_real(); |
| + sock_write_timestamp(sk, kt); |
| + tv = ktime_to_timeval(kt); |
| } |
| err = 0; |
| if (put_user(tv.tv_sec, &ctv->tv_sec) || |
| @@ -485,12 +487,13 @@ int compat_sock_get_timestampns(struct s |
| err = -ENOENT; |
| if (!sock_flag(sk, SOCK_TIMESTAMP)) |
| sock_enable_timestamp(sk, SOCK_TIMESTAMP); |
| - ts = ktime_to_timespec(sk->sk_stamp); |
| + ts = ktime_to_timespec(sock_read_timestamp(sk)); |
| if (ts.tv_sec == -1) |
| return err; |
| if (ts.tv_sec == 0) { |
| - sk->sk_stamp = ktime_get_real(); |
| - ts = ktime_to_timespec(sk->sk_stamp); |
| + ktime_t kt = ktime_get_real(); |
| + sock_write_timestamp(sk, kt); |
| + ts = ktime_to_timespec(kt); |
| } |
| err = 0; |
| if (put_user(ts.tv_sec, &ctv->tv_sec) || |
| --- a/net/core/sock.c |
| +++ b/net/core/sock.c |
| @@ -2467,6 +2467,9 @@ void sock_init_data(struct socket *sock, |
| sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; |
| |
| sk->sk_stamp = ktime_set(-1L, 0); |
| +#if BITS_PER_LONG==32 |
| + seqlock_init(&sk->sk_stamp_seq); |
| +#endif |
| |
| #ifdef CONFIG_NET_RX_BUSY_POLL |
| sk->sk_napi_id = 0; |
| --- a/net/sunrpc/svcsock.c |
| +++ b/net/sunrpc/svcsock.c |
| @@ -572,7 +572,7 @@ static int svc_udp_recvfrom(struct svc_r |
| /* Don't enable netstamp, sunrpc doesn't |
| need that much accuracy */ |
| } |
| - svsk->sk_sk->sk_stamp = skb->tstamp; |
| + sock_write_timestamp(svsk->sk_sk, skb->tstamp); |
| set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ |
| |
| len = skb->len; |