| From 2c610022711675ee908b903d242f0b90e1db661f Mon Sep 17 00:00:00 2001 |
| From: Peter Zijlstra <peterz@infradead.org> |
| Date: Wed, 8 Jun 2016 10:19:51 +0200 |
| Subject: locking/qspinlock: Fix spin_unlock_wait() some more |
| |
| From: Peter Zijlstra <peterz@infradead.org> |
| |
| commit 2c610022711675ee908b903d242f0b90e1db661f upstream. |
| |
| While this prior commit: |
| |
| 54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()") |
| |
| ... fixes spin_is_locked() and spin_unlock_wait() for the usage |
| in ipc/sem and netfilter, it does not in fact work right for the |
| usage in task_work and futex. |
| |
| So while the 2 locks crossed problem: |
| |
| spin_lock(A) spin_lock(B) |
| if (!spin_is_locked(B)) spin_unlock_wait(A) |
| foo() foo(); |
| |
| ... works with the smp_mb() injected by both spin_is_locked() and |
| spin_unlock_wait(), this is not sufficient for: |
| |
| flag = 1; |
| smp_mb(); spin_lock() |
| spin_unlock_wait() if (!flag) |
| // add to lockless list |
| // iterate lockless list |
| |
| ... because in this scenario, the store from spin_lock() can be delayed |
| past the load of flag, uncrossing the variables and loosing the |
| guarantee. |
| |
| This patch reworks spin_is_locked() and spin_unlock_wait() to work in |
| both cases by exploiting the observation that while the lock byte |
| store can be delayed, the contender must have registered itself |
| visibly in other state contained in the word. |
| |
| It also allows for architectures to override both functions, as PPC |
| and ARM64 have an additional issue for which we currently have no |
| generic solution. |
| |
| Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Cc: Andrew Morton <akpm@linux-foundation.org> |
| Cc: Boqun Feng <boqun.feng@gmail.com> |
| Cc: Davidlohr Bueso <dave@stgolabs.net> |
| Cc: Giovanni Gherdovich <ggherdovich@suse.com> |
| Cc: Linus Torvalds <torvalds@linux-foundation.org> |
| Cc: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> |
| Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Cc: Waiman Long <waiman.long@hpe.com> |
| Cc: Will Deacon <will.deacon@arm.com> |
| Fixes: 54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()") |
| Signed-off-by: Ingo Molnar <mingo@kernel.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| include/asm-generic/qspinlock.h | 53 +++++++++++------------------------ |
| kernel/locking/qspinlock.c | 60 ++++++++++++++++++++++++++++++++++++++++ |
| 2 files changed, 77 insertions(+), 36 deletions(-) |
| |
| --- a/include/asm-generic/qspinlock.h |
| +++ b/include/asm-generic/qspinlock.h |
| @@ -21,37 +21,33 @@ |
| #include <asm-generic/qspinlock_types.h> |
| |
| /** |
| + * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock |
| + * @lock : Pointer to queued spinlock structure |
| + * |
| + * There is a very slight possibility of live-lock if the lockers keep coming |
| + * and the waiter is just unfortunate enough to not see any unlock state. |
| + */ |
| +#ifndef queued_spin_unlock_wait |
| +extern void queued_spin_unlock_wait(struct qspinlock *lock); |
| +#endif |
| + |
| +/** |
| * queued_spin_is_locked - is the spinlock locked? |
| * @lock: Pointer to queued spinlock structure |
| * Return: 1 if it is locked, 0 otherwise |
| */ |
| +#ifndef queued_spin_is_locked |
| static __always_inline int queued_spin_is_locked(struct qspinlock *lock) |
| { |
| /* |
| - * queued_spin_lock_slowpath() can ACQUIRE the lock before |
| - * issuing the unordered store that sets _Q_LOCKED_VAL. |
| - * |
| - * See both smp_cond_acquire() sites for more detail. |
| - * |
| - * This however means that in code like: |
| - * |
| - * spin_lock(A) spin_lock(B) |
| - * spin_unlock_wait(B) spin_is_locked(A) |
| - * do_something() do_something() |
| - * |
| - * Both CPUs can end up running do_something() because the store |
| - * setting _Q_LOCKED_VAL will pass through the loads in |
| - * spin_unlock_wait() and/or spin_is_locked(). |
| + * See queued_spin_unlock_wait(). |
| * |
| - * Avoid this by issuing a full memory barrier between the spin_lock() |
| - * and the loads in spin_unlock_wait() and spin_is_locked(). |
| - * |
| - * Note that regular mutual exclusion doesn't care about this |
| - * delayed store. |
| + * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL |
| + * isn't immediately observable. |
| */ |
| - smp_mb(); |
| - return atomic_read(&lock->val) & _Q_LOCKED_MASK; |
| + return atomic_read(&lock->val); |
| } |
| +#endif |
| |
| /** |
| * queued_spin_value_unlocked - is the spinlock structure unlocked? |
| @@ -121,21 +117,6 @@ static __always_inline void queued_spin_ |
| } |
| #endif |
| |
| -/** |
| - * queued_spin_unlock_wait - wait until current lock holder releases the lock |
| - * @lock : Pointer to queued spinlock structure |
| - * |
| - * There is a very slight possibility of live-lock if the lockers keep coming |
| - * and the waiter is just unfortunate enough to not see any unlock state. |
| - */ |
| -static inline void queued_spin_unlock_wait(struct qspinlock *lock) |
| -{ |
| - /* See queued_spin_is_locked() */ |
| - smp_mb(); |
| - while (atomic_read(&lock->val) & _Q_LOCKED_MASK) |
| - cpu_relax(); |
| -} |
| - |
| #ifndef virt_spin_lock |
| static __always_inline bool virt_spin_lock(struct qspinlock *lock) |
| { |
| --- a/kernel/locking/qspinlock.c |
| +++ b/kernel/locking/qspinlock.c |
| @@ -255,6 +255,66 @@ static __always_inline void __pv_wait_he |
| #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath |
| #endif |
| |
| +/* |
| + * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before |
| + * issuing an _unordered_ store to set _Q_LOCKED_VAL. |
| + * |
| + * This means that the store can be delayed, but no later than the |
| + * store-release from the unlock. This means that simply observing |
| + * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired. |
| + * |
| + * There are two paths that can issue the unordered store: |
| + * |
| + * (1) clear_pending_set_locked(): *,1,0 -> *,0,1 |
| + * |
| + * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0 |
| + * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1 |
| + * |
| + * However, in both cases we have other !0 state we've set before to queue |
| + * ourseves: |
| + * |
| + * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our |
| + * load is constrained by that ACQUIRE to not pass before that, and thus must |
| + * observe the store. |
| + * |
| + * For (2) we have a more intersting scenario. We enqueue ourselves using |
| + * xchg_tail(), which ends up being a RELEASE. This in itself is not |
| + * sufficient, however that is followed by an smp_cond_acquire() on the same |
| + * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and |
| + * guarantees we must observe that store. |
| + * |
| + * Therefore both cases have other !0 state that is observable before the |
| + * unordered locked byte store comes through. This means we can use that to |
| + * wait for the lock store, and then wait for an unlock. |
| + */ |
| +#ifndef queued_spin_unlock_wait |
| +void queued_spin_unlock_wait(struct qspinlock *lock) |
| +{ |
| + u32 val; |
| + |
| + for (;;) { |
| + val = atomic_read(&lock->val); |
| + |
| + if (!val) /* not locked, we're done */ |
| + goto done; |
| + |
| + if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */ |
| + break; |
| + |
| + /* not locked, but pending, wait until we observe the lock */ |
| + cpu_relax(); |
| + } |
| + |
| + /* any unlock is good */ |
| + while (atomic_read(&lock->val) & _Q_LOCKED_MASK) |
| + cpu_relax(); |
| + |
| +done: |
| + smp_rmb(); /* CTRL + RMB -> ACQUIRE */ |
| +} |
| +EXPORT_SYMBOL(queued_spin_unlock_wait); |
| +#endif |
| + |
| #endif /* _GEN_PV_LOCK_SLOWPATH */ |
| |
| /** |