| From cd430d841fa2d9c0dbcdc129587e8206e4ffe0d2 Mon Sep 17 00:00:00 2001 |
| From: John Fastabend <john.fastabend@gmail.com> |
| Date: Sat, 11 Jan 2020 06:12:00 +0000 |
| Subject: [PATCH] bpf: Sockmap, ensure sock lock held during tear down |
| |
| commit 7e81a35302066c5a00b4c72d83e3ea4cad6eeb5b upstream. |
| |
| The sock_map_free() and sock_hash_free() paths used to delete sockmap |
| and sockhash maps walk the maps and destroy psock and bpf state associated |
| with the socks in the map. When done the socks no longer have BPF programs |
| attached and will function normally. This can happen while the socks in |
| the map are still "live" meaning data may be sent/received during the walk. |
| |
| Currently, though we don't take the sock_lock when the psock and bpf state |
| is removed through this path. Specifically, this means we can be writing |
| into the ops structure pointers such as sendmsg, sendpage, recvmsg, etc. |
| while they are also being called from the networking side. This is not |
| safe, we never used proper READ_ONCE/WRITE_ONCE semantics here if we |
| believed it was safe. Further its not clear to me its even a good idea |
| to try and do this on "live" sockets while networking side might also |
| be using the socket. Instead of trying to reason about using the socks |
| from both sides lets realize that every use case I'm aware of rarely |
| deletes maps, in fact kubernetes/Cilium case builds map at init and |
| never tears it down except on errors. So lets do the simple fix and |
| grab sock lock. |
| |
| This patch wraps sock deletes from maps in sock lock and adds some |
| annotations so we catch any other cases easier. |
| |
| Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") |
| Signed-off-by: John Fastabend <john.fastabend@gmail.com> |
| Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> |
| Acked-by: Song Liu <songliubraving@fb.com> |
| Cc: stable@vger.kernel.org |
| Link: https://lore.kernel.org/bpf/20200111061206.8028-3-john.fastabend@gmail.com |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/net/core/skmsg.c b/net/core/skmsg.c |
| index fc6f99dcddfd..5b754427688c 100644 |
| --- a/net/core/skmsg.c |
| +++ b/net/core/skmsg.c |
| @@ -596,6 +596,8 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy); |
| void sk_psock_drop(struct sock *sk, struct sk_psock *psock) |
| { |
| rcu_assign_sk_user_data(sk, NULL); |
| + sock_owned_by_me(sk); |
| + |
| sk_psock_cork_free(psock); |
| sk_psock_zap_ingress(psock); |
| sk_psock_restore_proto(sk, psock); |
| diff --git a/net/core/sock_map.c b/net/core/sock_map.c |
| index fb3e581be63f..dd5face89a27 100644 |
| --- a/net/core/sock_map.c |
| +++ b/net/core/sock_map.c |
| @@ -245,10 +245,13 @@ static void sock_map_free(struct bpf_map *map) |
| struct sock *sk; |
| |
| sk = xchg(psk, NULL); |
| - if (sk) |
| + if (sk) { |
| + lock_sock(sk); |
| rcu_read_lock(); |
| sock_map_unref(sk, psk); |
| + release_sock(sk); |
| rcu_read_unlock(); |
| + } |
| } |
| raw_spin_unlock_bh(&stab->lock); |
| |
| @@ -867,7 +870,9 @@ static void sock_hash_free(struct bpf_map *map) |
| hlist_for_each_entry_safe(elem, node, &bucket->head, node) { |
| hlist_del_rcu(&elem->node); |
| rcu_read_lock(); |
| + lock_sock(elem->sk); |
| sock_map_unref(elem->sk, elem); |
| + release_sock(elem->sk); |
| rcu_read_unlock(); |
| } |
| raw_spin_unlock_bh(&bucket->lock); |
| -- |
| 2.7.4 |
| |