| From 2576c28e3f623ed401db7e6197241865328620ef Mon Sep 17 00:00:00 2001 |
| From: Vineet Gupta <vgupta@synopsys.com> |
| Date: Thu, 20 Nov 2014 15:42:09 +0530 |
| Subject: ARC: add smp barriers around atomics per Documentation/atomic_ops.txt |
| |
| From: Vineet Gupta <vgupta@synopsys.com> |
| |
| commit 2576c28e3f623ed401db7e6197241865328620ef upstream. |
| |
| - arch_spin_lock/unlock were lacking the ACQUIRE/RELEASE barriers |
| Since ARCv2 only provides load/load, store/store and all/all, we need |
| the full barrier |
| |
| - LLOCK/SCOND based atomics, bitops, cmpxchg, which return modified |
| values were lacking the explicit smp barriers. |
| |
| - Non LLOCK/SCOND varaints don't need the explicit barriers since that |
| is implicity provided by the spin locks used to implement the |
| critical section (the spin lock barriers in turn are also fixed in |
| this commit as explained above |
| |
| Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Signed-off-by: Vineet Gupta <vgupta@synopsys.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| arch/arc/include/asm/atomic.h | 21 +++++++++++++++++++++ |
| arch/arc/include/asm/bitops.h | 19 +++++++++++++++++++ |
| arch/arc/include/asm/cmpxchg.h | 17 +++++++++++++++++ |
| arch/arc/include/asm/spinlock.h | 32 ++++++++++++++++++++++++++++++++ |
| 4 files changed, 89 insertions(+) |
| |
| --- a/arch/arc/include/asm/atomic.h |
| +++ b/arch/arc/include/asm/atomic.h |
| @@ -43,6 +43,12 @@ static inline int atomic_##op##_return(i |
| { \ |
| unsigned int temp; \ |
| \ |
| + /* \ |
| + * Explicit full memory barrier needed before/after as \ |
| + * LLOCK/SCOND thmeselves don't provide any such semantics \ |
| + */ \ |
| + smp_mb(); \ |
| + \ |
| __asm__ __volatile__( \ |
| "1: llock %0, [%1] \n" \ |
| " " #asm_op " %0, %0, %2 \n" \ |
| @@ -52,6 +58,8 @@ static inline int atomic_##op##_return(i |
| : "r"(&v->counter), "ir"(i) \ |
| : "cc"); \ |
| \ |
| + smp_mb(); \ |
| + \ |
| return temp; \ |
| } |
| |
| @@ -105,6 +113,9 @@ static inline int atomic_##op##_return(i |
| unsigned long flags; \ |
| unsigned long temp; \ |
| \ |
| + /* \ |
| + * spin lock/unlock provides the needed smp_mb() before/after \ |
| + */ \ |
| atomic_ops_lock(flags); \ |
| temp = v->counter; \ |
| temp c_op i; \ |
| @@ -142,9 +153,19 @@ ATOMIC_OP(and, &=, and) |
| #define __atomic_add_unless(v, a, u) \ |
| ({ \ |
| int c, old; \ |
| + \ |
| + /* \ |
| + * Explicit full memory barrier needed before/after as \ |
| + * LLOCK/SCOND thmeselves don't provide any such semantics \ |
| + */ \ |
| + smp_mb(); \ |
| + \ |
| c = atomic_read(v); \ |
| while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ |
| c = old; \ |
| + \ |
| + smp_mb(); \ |
| + \ |
| c; \ |
| }) |
| |
| --- a/arch/arc/include/asm/bitops.h |
| +++ b/arch/arc/include/asm/bitops.h |
| @@ -103,6 +103,12 @@ static inline int test_and_set_bit(unsig |
| if (__builtin_constant_p(nr)) |
| nr &= 0x1f; |
| |
| + /* |
| + * Explicit full memory barrier needed before/after as |
| + * LLOCK/SCOND themselves don't provide any such semantics |
| + */ |
| + smp_mb(); |
| + |
| __asm__ __volatile__( |
| "1: llock %0, [%2] \n" |
| " bset %1, %0, %3 \n" |
| @@ -112,6 +118,8 @@ static inline int test_and_set_bit(unsig |
| : "r"(m), "ir"(nr) |
| : "cc"); |
| |
| + smp_mb(); |
| + |
| return (old & (1 << nr)) != 0; |
| } |
| |
| @@ -125,6 +133,8 @@ test_and_clear_bit(unsigned long nr, vol |
| if (__builtin_constant_p(nr)) |
| nr &= 0x1f; |
| |
| + smp_mb(); |
| + |
| __asm__ __volatile__( |
| "1: llock %0, [%2] \n" |
| " bclr %1, %0, %3 \n" |
| @@ -134,6 +144,8 @@ test_and_clear_bit(unsigned long nr, vol |
| : "r"(m), "ir"(nr) |
| : "cc"); |
| |
| + smp_mb(); |
| + |
| return (old & (1 << nr)) != 0; |
| } |
| |
| @@ -147,6 +159,8 @@ test_and_change_bit(unsigned long nr, vo |
| if (__builtin_constant_p(nr)) |
| nr &= 0x1f; |
| |
| + smp_mb(); |
| + |
| __asm__ __volatile__( |
| "1: llock %0, [%2] \n" |
| " bxor %1, %0, %3 \n" |
| @@ -156,6 +170,8 @@ test_and_change_bit(unsigned long nr, vo |
| : "r"(m), "ir"(nr) |
| : "cc"); |
| |
| + smp_mb(); |
| + |
| return (old & (1 << nr)) != 0; |
| } |
| |
| @@ -235,6 +251,9 @@ static inline int test_and_set_bit(unsig |
| if (__builtin_constant_p(nr)) |
| nr &= 0x1f; |
| |
| + /* |
| + * spin lock/unlock provide the needed smp_mb() before/after |
| + */ |
| bitops_lock(flags); |
| |
| old = *m; |
| --- a/arch/arc/include/asm/cmpxchg.h |
| +++ b/arch/arc/include/asm/cmpxchg.h |
| @@ -10,6 +10,8 @@ |
| #define __ASM_ARC_CMPXCHG_H |
| |
| #include <linux/types.h> |
| + |
| +#include <asm/barrier.h> |
| #include <asm/smp.h> |
| |
| #ifdef CONFIG_ARC_HAS_LLSC |
| @@ -19,6 +21,12 @@ __cmpxchg(volatile void *ptr, unsigned l |
| { |
| unsigned long prev; |
| |
| + /* |
| + * Explicit full memory barrier needed before/after as |
| + * LLOCK/SCOND thmeselves don't provide any such semantics |
| + */ |
| + smp_mb(); |
| + |
| __asm__ __volatile__( |
| "1: llock %0, [%1] \n" |
| " brne %0, %2, 2f \n" |
| @@ -30,6 +38,8 @@ __cmpxchg(volatile void *ptr, unsigned l |
| "r"(new) /* can't be "ir". scond can't take limm for "b" */ |
| : "cc"); |
| |
| + smp_mb(); |
| + |
| return prev; |
| } |
| |
| @@ -42,6 +52,9 @@ __cmpxchg(volatile void *ptr, unsigned l |
| int prev; |
| volatile unsigned long *p = ptr; |
| |
| + /* |
| + * spin lock/unlock provide the needed smp_mb() before/after |
| + */ |
| atomic_ops_lock(flags); |
| prev = *p; |
| if (prev == expected) |
| @@ -77,12 +90,16 @@ static inline unsigned long __xchg(unsig |
| |
| switch (size) { |
| case 4: |
| + smp_mb(); |
| + |
| __asm__ __volatile__( |
| " ex %0, [%1] \n" |
| : "+r"(val) |
| : "r"(ptr) |
| : "memory"); |
| |
| + smp_mb(); |
| + |
| return val; |
| } |
| return __xchg_bad_pointer(); |
| --- a/arch/arc/include/asm/spinlock.h |
| +++ b/arch/arc/include/asm/spinlock.h |
| @@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_s |
| { |
| unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; |
| |
| + /* |
| + * This smp_mb() is technically superfluous, we only need the one |
| + * after the lock for providing the ACQUIRE semantics. |
| + * However doing the "right" thing was regressing hackbench |
| + * so keeping this, pending further investigation |
| + */ |
| + smp_mb(); |
| + |
| __asm__ __volatile__( |
| "1: ex %0, [%1] \n" |
| " breq %0, %2, 1b \n" |
| : "+&r" (tmp) |
| : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) |
| : "memory"); |
| + |
| + /* |
| + * ACQUIRE barrier to ensure load/store after taking the lock |
| + * don't "bleed-up" out of the critical section (leak-in is allowed) |
| + * http://www.spinics.net/lists/kernel/msg2010409.html |
| + * |
| + * ARCv2 only has load-load, store-store and all-all barrier |
| + * thus need the full all-all barrier |
| + */ |
| + smp_mb(); |
| } |
| |
| static inline int arch_spin_trylock(arch_spinlock_t *lock) |
| { |
| unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; |
| |
| + smp_mb(); |
| + |
| __asm__ __volatile__( |
| "1: ex %0, [%1] \n" |
| : "+r" (tmp) |
| : "r"(&(lock->slock)) |
| : "memory"); |
| |
| + smp_mb(); |
| + |
| return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); |
| } |
| |
| @@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch |
| { |
| unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; |
| |
| + /* |
| + * RELEASE barrier: given the instructions avail on ARCv2, full barrier |
| + * is the only option |
| + */ |
| + smp_mb(); |
| + |
| __asm__ __volatile__( |
| " ex %0, [%1] \n" |
| : "+r" (tmp) |
| : "r"(&(lock->slock)) |
| : "memory"); |
| |
| + /* |
| + * superfluous, but keeping for now - see pairing version in |
| + * arch_spin_lock above |
| + */ |
| smp_mb(); |
| } |
| |