releases/4.4.16/locking-qspinlock-fix-spin_unlock_wait-some-more.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From 2c610022711675ee908b903d242f0b90e1db661f Mon Sep 17 00:00:00 2001
 From: Peter Zijlstra <peterz@infradead.org>
 Date: Wed, 8 Jun 2016 10:19:51 +0200
 Subject: locking/qspinlock: Fix spin_unlock_wait() some more

 From: Peter Zijlstra <peterz@infradead.org>

 commit 2c610022711675ee908b903d242f0b90e1db661f upstream.

 While this prior commit:

   54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()")

 ... fixes spin_is_locked() and spin_unlock_wait() for the usage
 in ipc/sem and netfilter, it does not in fact work right for the
 usage in task_work and futex.

 So while the 2 locks crossed problem:

 	spin_lock(A)		spin_lock(B)
 	if (!spin_is_locked(B)) spin_unlock_wait(A)
 	  foo()			foo();

 ... works with the smp_mb() injected by both spin_is_locked() and
 spin_unlock_wait(), this is not sufficient for:

 	flag = 1;
 	smp_mb();		spin_lock()
 	spin_unlock_wait()	if (!flag)
 				  // add to lockless list
 	// iterate lockless list

 ... because in this scenario, the store from spin_lock() can be delayed
 past the load of flag, uncrossing the variables and loosing the
 guarantee.

 This patch reworks spin_is_locked() and spin_unlock_wait() to work in
 both cases by exploiting the observation that while the lock byte
 store can be delayed, the contender must have registered itself
 visibly in other state contained in the word.

 It also allows for architectures to override both functions, as PPC
 and ARM64 have an additional issue for which we currently have no
 generic solution.

 Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
 Cc: Andrew Morton <akpm@linux-foundation.org>
 Cc: Boqun Feng <boqun.feng@gmail.com>
 Cc: Davidlohr Bueso <dave@stgolabs.net>
 Cc: Giovanni Gherdovich <ggherdovich@suse.com>
 Cc: Linus Torvalds <torvalds@linux-foundation.org>
 Cc: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
 Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 Cc: Peter Zijlstra <peterz@infradead.org>
 Cc: Thomas Gleixner <tglx@linutronix.de>
 Cc: Waiman Long <waiman.long@hpe.com>
 Cc: Will Deacon <will.deacon@arm.com>
 Fixes: 54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()")
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

 ---
  include/asm-generic/qspinlock.h |   53 +++++++++++------------------------
  kernel/locking/qspinlock.c      |   60 ++++++++++++++++++++++++++++++++++++++++
  2 files changed, 77 insertions(+), 36 deletions(-)

 --- a/include/asm-generic/qspinlock.h
 +++ b/include/asm-generic/qspinlock.h
 @@ -21,37 +21,33 @@
  #include <asm-generic/qspinlock_types.h>

  /**
 + * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock
 + * @lock : Pointer to queued spinlock structure
 + *
 + * There is a very slight possibility of live-lock if the lockers keep coming
 + * and the waiter is just unfortunate enough to not see any unlock state.
 + */
 +#ifndef queued_spin_unlock_wait
 +extern void queued_spin_unlock_wait(struct qspinlock *lock);
 +#endif
 +
 +/**
   * queued_spin_is_locked - is the spinlock locked?
   * @lock: Pointer to queued spinlock structure
   * Return: 1 if it is locked, 0 otherwise
   */
 +#ifndef queued_spin_is_locked
  static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
  {
  	/*
 -	 * queued_spin_lock_slowpath() can ACQUIRE the lock before
 -	 * issuing the unordered store that sets _Q_LOCKED_VAL.
 -	 *
 -	 * See both smp_cond_acquire() sites for more detail.
 -	 *
 -	 * This however means that in code like:
 -	 *
 -	 *   spin_lock(A)		spin_lock(B)
 -	 *   spin_unlock_wait(B)	spin_is_locked(A)
 -	 *   do_something()		do_something()
 -	 *
 -	 * Both CPUs can end up running do_something() because the store
 -	 * setting _Q_LOCKED_VAL will pass through the loads in
 -	 * spin_unlock_wait() and/or spin_is_locked().
 +	 * See queued_spin_unlock_wait().
  	 *
 -	 * Avoid this by issuing a full memory barrier between the spin_lock()
 -	 * and the loads in spin_unlock_wait() and spin_is_locked().
 -	 *
 -	 * Note that regular mutual exclusion doesn't care about this
 -	 * delayed store.
 +	 * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL
 +	 * isn't immediately observable.
  	 */
 -	smp_mb();
 -	return atomic_read(&lock->val) & _Q_LOCKED_MASK;
 +	return atomic_read(&lock->val);
  }
 +#endif

  /**
   * queued_spin_value_unlocked - is the spinlock structure unlocked?
 @@ -121,21 +117,6 @@ static __always_inline void queued_spin_
  }
  #endif

 -/**
 - * queued_spin_unlock_wait - wait until current lock holder releases the lock
 - * @lock : Pointer to queued spinlock structure
 - *
 - * There is a very slight possibility of live-lock if the lockers keep coming
 - * and the waiter is just unfortunate enough to not see any unlock state.
 - */
 -static inline void queued_spin_unlock_wait(struct qspinlock *lock)
 -{
 -	/* See queued_spin_is_locked() */
 -	smp_mb();
 -	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
 -		cpu_relax();
 -}
 -
  #ifndef virt_spin_lock
  static __always_inline bool virt_spin_lock(struct qspinlock *lock)
  {
 --- a/kernel/locking/qspinlock.c
 +++ b/kernel/locking/qspinlock.c
 @@ -255,6 +255,66 @@ static __always_inline void __pv_wait_he
  #define queued_spin_lock_slowpath	native_queued_spin_lock_slowpath
  #endif

 +/*
 + * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
 + * issuing an _unordered_ store to set _Q_LOCKED_VAL.
 + *
 + * This means that the store can be delayed, but no later than the
 + * store-release from the unlock. This means that simply observing
 + * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
 + *
 + * There are two paths that can issue the unordered store:
 + *
 + *  (1) clear_pending_set_locked():	*,1,0 -> *,0,1
 + *
 + *  (2) set_locked():			t,0,0 -> t,0,1 ; t != 0
 + *      atomic_cmpxchg_relaxed():	t,0,0 -> 0,0,1
 + *
 + * However, in both cases we have other !0 state we've set before to queue
 + * ourseves:
 + *
 + * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
 + * load is constrained by that ACQUIRE to not pass before that, and thus must
 + * observe the store.
 + *
 + * For (2) we have a more intersting scenario. We enqueue ourselves using
 + * xchg_tail(), which ends up being a RELEASE. This in itself is not
 + * sufficient, however that is followed by an smp_cond_acquire() on the same
 + * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
 + * guarantees we must observe that store.
 + *
 + * Therefore both cases have other !0 state that is observable before the
 + * unordered locked byte store comes through. This means we can use that to
 + * wait for the lock store, and then wait for an unlock.
 + */
 +#ifndef queued_spin_unlock_wait
 +void queued_spin_unlock_wait(struct qspinlock *lock)
 +{
 +	u32 val;
 +
 +	for (;;) {
 +		val = atomic_read(&lock->val);
 +
 +		if (!val) /* not locked, we're done */
 +			goto done;
 +
 +		if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
 +			break;
 +
 +		/* not locked, but pending, wait until we observe the lock */
 +		cpu_relax();
 +	}
 +
 +	/* any unlock is good */
 +	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
 +		cpu_relax();
 +
 +done:
 +	smp_rmb(); /* CTRL + RMB -> ACQUIRE */
 +}
 +EXPORT_SYMBOL(queued_spin_unlock_wait);
 +#endif
 +
  #endif /* _GEN_PV_LOCK_SLOWPATH */

  /**
	From 2c610022711675ee908b903d242f0b90e1db661f Mon Sep 17 00:00:00 2001
	From: Peter Zijlstra <peterz@infradead.org>
	Date: Wed, 8 Jun 2016 10:19:51 +0200
	Subject: locking/qspinlock: Fix spin_unlock_wait() some more

	From: Peter Zijlstra <peterz@infradead.org>

	commit 2c610022711675ee908b903d242f0b90e1db661f upstream.

	While this prior commit:

	54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()")

	... fixes spin_is_locked() and spin_unlock_wait() for the usage
	in ipc/sem and netfilter, it does not in fact work right for the
	usage in task_work and futex.

	So while the 2 locks crossed problem:

	spin_lock(A) spin_lock(B)
	if (!spin_is_locked(B)) spin_unlock_wait(A)
	foo() foo();

	... works with the smp_mb() injected by both spin_is_locked() and
	spin_unlock_wait(), this is not sufficient for:

	flag = 1;
	smp_mb(); spin_lock()
	spin_unlock_wait() if (!flag)
	// add to lockless list
	// iterate lockless list

	... because in this scenario, the store from spin_lock() can be delayed
	past the load of flag, uncrossing the variables and loosing the
	guarantee.

	This patch reworks spin_is_locked() and spin_unlock_wait() to work in
	both cases by exploiting the observation that while the lock byte
	store can be delayed, the contender must have registered itself
	visibly in other state contained in the word.

	It also allows for architectures to override both functions, as PPC
	and ARM64 have an additional issue for which we currently have no
	generic solution.

	Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
	Cc: Andrew Morton <akpm@linux-foundation.org>
	Cc: Boqun Feng <boqun.feng@gmail.com>
	Cc: Davidlohr Bueso <dave@stgolabs.net>
	Cc: Giovanni Gherdovich <ggherdovich@suse.com>
	Cc: Linus Torvalds <torvalds@linux-foundation.org>
	Cc: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
	Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	Cc: Peter Zijlstra <peterz@infradead.org>
	Cc: Thomas Gleixner <tglx@linutronix.de>
	Cc: Waiman Long <waiman.long@hpe.com>
	Cc: Will Deacon <will.deacon@arm.com>
	Fixes: 54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()")
	Signed-off-by: Ingo Molnar <mingo@kernel.org>
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

	---
	include/asm-generic/qspinlock.h \| 53 +++++++++++------------------------
	kernel/locking/qspinlock.c \| 60 ++++++++++++++++++++++++++++++++++++++++
	2 files changed, 77 insertions(+), 36 deletions(-)

	--- a/include/asm-generic/qspinlock.h
	+++ b/include/asm-generic/qspinlock.h
	@@ -21,37 +21,33 @@
	#include <asm-generic/qspinlock_types.h>

	/**
	+ * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock
	+ * @lock : Pointer to queued spinlock structure
	+ *
	+ * There is a very slight possibility of live-lock if the lockers keep coming
	+ * and the waiter is just unfortunate enough to not see any unlock state.
	+ */
	+#ifndef queued_spin_unlock_wait
	+extern void queued_spin_unlock_wait(struct qspinlock *lock);
	+#endif
	+
	+/**
	* queued_spin_is_locked - is the spinlock locked?
	* @lock: Pointer to queued spinlock structure
	* Return: 1 if it is locked, 0 otherwise
	*/
	+#ifndef queued_spin_is_locked
	static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
	{
	/*
	- * queued_spin_lock_slowpath() can ACQUIRE the lock before
	- * issuing the unordered store that sets _Q_LOCKED_VAL.
	- *
	- * See both smp_cond_acquire() sites for more detail.
	- *
	- * This however means that in code like:
	- *
	- * spin_lock(A) spin_lock(B)
	- * spin_unlock_wait(B) spin_is_locked(A)
	- * do_something() do_something()
	- *
	- * Both CPUs can end up running do_something() because the store
	- * setting _Q_LOCKED_VAL will pass through the loads in
	- * spin_unlock_wait() and/or spin_is_locked().
	+ * See queued_spin_unlock_wait().
	*
	- * Avoid this by issuing a full memory barrier between the spin_lock()
	- * and the loads in spin_unlock_wait() and spin_is_locked().
	- *
	- * Note that regular mutual exclusion doesn't care about this
	- * delayed store.
	+ * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL
	+ * isn't immediately observable.
	*/
	- smp_mb();
	- return atomic_read(&lock->val) & _Q_LOCKED_MASK;
	+ return atomic_read(&lock->val);
	}
	+#endif

	/**
	* queued_spin_value_unlocked - is the spinlock structure unlocked?
	@@ -121,21 +117,6 @@ static __always_inline void queued_spin_
	}
	#endif

	-/**
	- * queued_spin_unlock_wait - wait until current lock holder releases the lock
	- * @lock : Pointer to queued spinlock structure
	- *
	- * There is a very slight possibility of live-lock if the lockers keep coming
	- * and the waiter is just unfortunate enough to not see any unlock state.
	- */
	-static inline void queued_spin_unlock_wait(struct qspinlock *lock)
	-{
	- /* See queued_spin_is_locked() */
	- smp_mb();
	- while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
	- cpu_relax();
	-}
	-
	#ifndef virt_spin_lock
	static __always_inline bool virt_spin_lock(struct qspinlock *lock)
	{
	--- a/kernel/locking/qspinlock.c
	+++ b/kernel/locking/qspinlock.c
	@@ -255,6 +255,66 @@ static __always_inline void __pv_wait_he
	#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
	#endif

	+/*
	+ * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
	+ * issuing an _unordered_ store to set _Q_LOCKED_VAL.
	+ *
	+ * This means that the store can be delayed, but no later than the
	+ * store-release from the unlock. This means that simply observing
	+ * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
	+ *
	+ * There are two paths that can issue the unordered store:
	+ *
	+ * (1) clear_pending_set_locked(): ,1,0 -> ,0,1
	+ *
	+ * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0
	+ * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1
	+ *
	+ * However, in both cases we have other !0 state we've set before to queue
	+ * ourseves:
	+ *
	+ * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
	+ * load is constrained by that ACQUIRE to not pass before that, and thus must
	+ * observe the store.
	+ *
	+ * For (2) we have a more intersting scenario. We enqueue ourselves using
	+ * xchg_tail(), which ends up being a RELEASE. This in itself is not
	+ * sufficient, however that is followed by an smp_cond_acquire() on the same
	+ * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
	+ * guarantees we must observe that store.
	+ *
	+ * Therefore both cases have other !0 state that is observable before the
	+ * unordered locked byte store comes through. This means we can use that to
	+ * wait for the lock store, and then wait for an unlock.
	+ */
	+#ifndef queued_spin_unlock_wait
	+void queued_spin_unlock_wait(struct qspinlock *lock)
	+{
	+ u32 val;
	+
	+ for (;;) {
	+ val = atomic_read(&lock->val);
	+
	+ if (!val) /* not locked, we're done */
	+ goto done;
	+
	+ if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
	+ break;
	+
	+ /* not locked, but pending, wait until we observe the lock */
	+ cpu_relax();
	+ }
	+
	+ /* any unlock is good */
	+ while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
	+ cpu_relax();
	+
	+done:
	+ smp_rmb(); /* CTRL + RMB -> ACQUIRE */
	+}
	+EXPORT_SYMBOL(queued_spin_unlock_wait);
	+#endif
	+
	#endif /* _GEN_PV_LOCK_SLOWPATH */

	/**