| From d8c12267357535d8a61d8b6d5b9bcf7db33b003f Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Wed, 9 Feb 2022 19:56:57 +0100 |
| Subject: tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH. |
| |
| From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| |
| [ Upstream commit 4f9bf2a2f5aacf988e6d5e56b961ba45c5a25248 ] |
| |
| Commit |
| 9652dc2eb9e40 ("tcp: relax listening_hash operations") |
| |
| removed the need to disable bottom half while acquiring |
| listening_hash.lock. There are still two callers left which disable |
| bottom half before the lock is acquired. |
| |
| On PREEMPT_RT the softirqs are preemptible and local_bh_disable() acts |
| as a lock to ensure that resources, that are protected by disabling |
| bottom halves, remain protected. |
| This leads to a circular locking dependency if the lock acquired with |
| disabled bottom halves is also acquired with enabled bottom halves |
| followed by disabling bottom halves. This is the reverse locking order. |
| It has been observed with inet_listen_hashbucket::lock: |
| |
| local_bh_disable() + spin_lock(&ilb->lock): |
| inet_listen() |
| inet_csk_listen_start() |
| sk->sk_prot->hash() := inet_hash() |
| local_bh_disable() |
| __inet_hash() |
| spin_lock(&ilb->lock); |
| acquire(&ilb->lock); |
| |
| Reverse order: spin_lock(&ilb2->lock) + local_bh_disable(): |
| tcp_seq_next() |
| listening_get_next() |
| spin_lock(&ilb2->lock); |
| acquire(&ilb2->lock); |
| |
| tcp4_seq_show() |
| get_tcp4_sock() |
| sock_i_ino() |
| read_lock_bh(&sk->sk_callback_lock); |
| acquire(softirq_ctrl) // <---- whoops |
| acquire(&sk->sk_callback_lock) |
| |
| Drop local_bh_disable() around __inet_hash() which acquires |
| listening_hash->lock. Split inet_unhash() and acquire the |
| listen_hashbucket lock without disabling bottom halves; the inet_ehash |
| lock with disabled bottom halves. |
| |
| Reported-by: Mike Galbraith <efault@gmx.de> |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| Link: https://lkml.kernel.org/r/12d6f9879a97cd56c09fb53dee343cbb14f7f1f7.camel@gmx.de |
| Link: https://lkml.kernel.org/r/X9CheYjuXWc75Spa@hirez.programming.kicks-ass.net |
| Link: https://lore.kernel.org/r/YgQOebeZ10eNx1W6@linutronix.de |
| Signed-off-by: Jakub Kicinski <kuba@kernel.org> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| net/ipv4/inet_hashtables.c | 53 ++++++++++++++++++++++--------------- |
| net/ipv6/inet6_hashtables.c | 5 +--- |
| 2 files changed, 33 insertions(+), 25 deletions(-) |
| |
| diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c |
| index 75737267746f..7bd1e10086f0 100644 |
| --- a/net/ipv4/inet_hashtables.c |
| +++ b/net/ipv4/inet_hashtables.c |
| @@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) |
| int err = 0; |
| |
| if (sk->sk_state != TCP_LISTEN) { |
| + local_bh_disable(); |
| inet_ehash_nolisten(sk, osk, NULL); |
| + local_bh_enable(); |
| return 0; |
| } |
| WARN_ON(!sk_unhashed(sk)); |
| @@ -669,45 +671,54 @@ int inet_hash(struct sock *sk) |
| { |
| int err = 0; |
| |
| - if (sk->sk_state != TCP_CLOSE) { |
| - local_bh_disable(); |
| + if (sk->sk_state != TCP_CLOSE) |
| err = __inet_hash(sk, NULL); |
| - local_bh_enable(); |
| - } |
| |
| return err; |
| } |
| EXPORT_SYMBOL_GPL(inet_hash); |
| |
| -void inet_unhash(struct sock *sk) |
| +static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb) |
| { |
| - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
| - struct inet_listen_hashbucket *ilb = NULL; |
| - spinlock_t *lock; |
| - |
| if (sk_unhashed(sk)) |
| return; |
| |
| - if (sk->sk_state == TCP_LISTEN) { |
| - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
| - lock = &ilb->lock; |
| - } else { |
| - lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
| - } |
| - spin_lock_bh(lock); |
| - if (sk_unhashed(sk)) |
| - goto unlock; |
| - |
| if (rcu_access_pointer(sk->sk_reuseport_cb)) |
| reuseport_stop_listen_sock(sk); |
| if (ilb) { |
| + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
| + |
| inet_unhash2(hashinfo, sk); |
| ilb->count--; |
| } |
| __sk_nulls_del_node_init_rcu(sk); |
| sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
| -unlock: |
| - spin_unlock_bh(lock); |
| +} |
| + |
| +void inet_unhash(struct sock *sk) |
| +{ |
| + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
| + |
| + if (sk_unhashed(sk)) |
| + return; |
| + |
| + if (sk->sk_state == TCP_LISTEN) { |
| + struct inet_listen_hashbucket *ilb; |
| + |
| + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
| + /* Don't disable bottom halves while acquiring the lock to |
| + * avoid circular locking dependency on PREEMPT_RT. |
| + */ |
| + spin_lock(&ilb->lock); |
| + __inet_unhash(sk, ilb); |
| + spin_unlock(&ilb->lock); |
| + } else { |
| + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
| + |
| + spin_lock_bh(lock); |
| + __inet_unhash(sk, NULL); |
| + spin_unlock_bh(lock); |
| + } |
| } |
| EXPORT_SYMBOL_GPL(inet_unhash); |
| |
| diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c |
| index 67c9114835c8..0a2e7f228391 100644 |
| --- a/net/ipv6/inet6_hashtables.c |
| +++ b/net/ipv6/inet6_hashtables.c |
| @@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk) |
| { |
| int err = 0; |
| |
| - if (sk->sk_state != TCP_CLOSE) { |
| - local_bh_disable(); |
| + if (sk->sk_state != TCP_CLOSE) |
| err = __inet_hash(sk, NULL); |
| - local_bh_enable(); |
| - } |
| |
| return err; |
| } |
| -- |
| 2.35.1 |
| |