| From 113fc5a6e8c2288619ff7e8187a6f556b7e0d372 Mon Sep 17 00:00:00 2001 |
| From: H. Peter Anvin <hpa@zytor.com> |
| Date: Tue, 27 Jul 2010 17:01:49 -0700 |
| Subject: x86: Add memory modify constraints to xchg() and cmpxchg() |
| |
| From: H. Peter Anvin <hpa@zytor.com> |
| |
| commit 113fc5a6e8c2288619ff7e8187a6f556b7e0d372 upstream. |
| |
| [ Backport to .32 by Tomáš Janoušek <tomi@nomi.cz> ] |
| |
| xchg() and cmpxchg() modify their memory operands, not merely read |
| them. For some versions of gcc the "memory" clobber has apparently |
| dealt with the situation, but not for all. |
| |
| Originally-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: H. Peter Anvin <hpa@zytor.com> |
| Cc: Glauber Costa <glommer@redhat.com> |
| Cc: Avi Kivity <avi@redhat.com> |
| Cc: Peter Palfrader <peter@palfrader.org> |
| Cc: Greg KH <gregkh@suse.de> |
| Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> |
| Cc: Zachary Amsden <zamsden@redhat.com> |
| Cc: Marcelo Tosatti <mtosatti@redhat.com> |
| LKML-Reference: <4C4F7277.8050306@zytor.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| |
| --- |
| arch/x86/include/asm/cmpxchg_32.h | 65 +++++++++++--------------------------- |
| arch/x86/include/asm/cmpxchg_64.h | 4 -- |
| 2 files changed, 20 insertions(+), 49 deletions(-) |
| |
| --- a/arch/x86/include/asm/cmpxchg_32.h |
| +++ b/arch/x86/include/asm/cmpxchg_32.h |
| @@ -17,60 +17,33 @@ struct __xchg_dummy { |
| #define __xg(x) ((struct __xchg_dummy *)(x)) |
| |
| /* |
| - * The semantics of XCHGCMP8B are a bit strange, this is why |
| - * there is a loop and the loading of %%eax and %%edx has to |
| - * be inside. This inlines well in most cases, the cached |
| - * cost is around ~38 cycles. (in the future we might want |
| - * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that |
| - * might have an implicit FPU-save as a cost, so it's not |
| - * clear which path to go.) |
| + * CMPXCHG8B only writes to the target if we had the previous |
| + * value in registers, otherwise it acts as a read and gives us the |
| + * "new previous" value. That is why there is a loop. Preloading |
| + * EDX:EAX is a performance optimization: in the common case it means |
| + * we need only one locked operation. |
| * |
| - * cmpxchg8b must be used with the lock prefix here to allow |
| - * the instruction to be executed atomically, see page 3-102 |
| - * of the instruction set reference 24319102.pdf. We need |
| - * the reader side to see the coherent 64bit value. |
| + * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very |
| + * least an FPU save and/or %cr0.ts manipulation. |
| + * |
| + * cmpxchg8b must be used with the lock prefix here to allow the |
| + * instruction to be executed atomically. We need to have the reader |
| + * side to see the coherent 64bit value. |
| */ |
| -static inline void __set_64bit(unsigned long long *ptr, |
| - unsigned int low, unsigned int high) |
| +static inline void set_64bit(volatile u64 *ptr, u64 value) |
| { |
| + u32 low = value; |
| + u32 high = value >> 32; |
| + u64 prev = *ptr; |
| + |
| asm volatile("\n1:\t" |
| - "movl (%1), %%eax\n\t" |
| - "movl 4(%1), %%edx\n\t" |
| LOCK_PREFIX "cmpxchg8b %0\n\t" |
| "jnz 1b" |
| - : "=m"(*ptr) |
| - : "D" (ptr), |
| - "b"(low), |
| - "c"(high) |
| - : "ax", "dx", "memory"); |
| -} |
| - |
| -static inline void __set_64bit_constant(unsigned long long *ptr, |
| - unsigned long long value) |
| -{ |
| - __set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32)); |
| + : "=m" (*ptr), "+A" (prev) |
| + : "b" (low), "c" (high) |
| + : "memory"); |
| } |
| |
| -#define ll_low(x) *(((unsigned int *)&(x)) + 0) |
| -#define ll_high(x) *(((unsigned int *)&(x)) + 1) |
| - |
| -static inline void __set_64bit_var(unsigned long long *ptr, |
| - unsigned long long value) |
| -{ |
| - __set_64bit(ptr, ll_low(value), ll_high(value)); |
| -} |
| - |
| -#define set_64bit(ptr, value) \ |
| - (__builtin_constant_p((value)) \ |
| - ? __set_64bit_constant((ptr), (value)) \ |
| - : __set_64bit_var((ptr), (value))) |
| - |
| -#define _set_64bit(ptr, value) \ |
| - (__builtin_constant_p(value) \ |
| - ? __set_64bit(ptr, (unsigned int)(value), \ |
| - (unsigned int)((value) >> 32)) \ |
| - : __set_64bit(ptr, ll_low((value)), ll_high((value)))) |
| - |
| /* |
| * Note: no "lock" prefix even on SMP: xchg always implies lock anyway |
| * Note 2: xchg has side effect, so that attribute volatile is necessary, |
| --- a/arch/x86/include/asm/cmpxchg_64.h |
| +++ b/arch/x86/include/asm/cmpxchg_64.h |
| @@ -8,13 +8,11 @@ |
| |
| #define __xg(x) ((volatile long *)(x)) |
| |
| -static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) |
| +static inline void set_64bit(volatile u64 *ptr, u64 val) |
| { |
| *ptr = val; |
| } |
| |
| -#define _set_64bit set_64bit |
| - |
| /* |
| * Note: no "lock" prefix even on SMP: xchg always implies lock anyway |
| * Note 2: xchg has side effect, so that attribute volatile is necessary, |