linux/arch/x86/include/asm/cmpxchg_32.h
Jan Beulich cdcd629869 x86: Fix and improve cmpxchg_double{,_local}()
Just like the per-CPU ones they had several
problems/shortcomings:

Only the first memory operand was mentioned in the asm()
operands, and the 2x64-bit version didn't have a memory clobber
while the 2x32-bit one did. The former allowed the compiler to
not recognize the need to re-load the data in case it had it
cached in some register, while the latter was overly
destructive.

The types of the local copies of the old and new values were
incorrect (the types of the pointed-to variables should be used
here, to make sure the respective old/new variable types are
compatible).

The __dummy/__junk variables were pointless, given that local
copies of the inputs already existed (and can hence be used for
discarded outputs).

The 32-bit variant of cmpxchg_double_local() referenced
cmpxchg16b_local().

At once also:

 - change the return value type to what it really is: 'bool'
 - unify 32- and 64-bit variants
 - abstract out the common part of the 'normal' and 'local' variants

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F01F12A020000780006A19B@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-01-04 15:01:54 +01:00

172 lines
4.7 KiB
C

#ifndef _ASM_X86_CMPXCHG_32_H
#define _ASM_X86_CMPXCHG_32_H
/*
* Note: if you use set64_bit(), __cmpxchg64(), or their variants, you
* you need to test for the feature in boot_cpu_data.
*/
/*
* CMPXCHG8B only writes to the target if we had the previous
* value in registers, otherwise it acts as a read and gives us the
* "new previous" value. That is why there is a loop. Preloading
* EDX:EAX is a performance optimization: in the common case it means
* we need only one locked operation.
*
* A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
* least an FPU save and/or %cr0.ts manipulation.
*
* cmpxchg8b must be used with the lock prefix here to allow the
* instruction to be executed atomically. We need to have the reader
* side to see the coherent 64bit value.
*/
static inline void set_64bit(volatile u64 *ptr, u64 value)
{
u32 low = value;
u32 high = value >> 32;
u64 prev = *ptr;
asm volatile("\n1:\t"
LOCK_PREFIX "cmpxchg8b %0\n\t"
"jnz 1b"
: "=m" (*ptr), "+A" (prev)
: "b" (low), "c" (high)
: "memory");
}
#ifdef CONFIG_X86_CMPXCHG
#define __HAVE_ARCH_CMPXCHG 1
#endif
#ifdef CONFIG_X86_CMPXCHG64
#define cmpxchg64(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
#define cmpxchg64_local(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
#endif
static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
{
u64 prev;
asm volatile(LOCK_PREFIX "cmpxchg8b %1"
: "=A" (prev),
"+m" (*ptr)
: "b" ((u32)new),
"c" ((u32)(new >> 32)),
"0" (old)
: "memory");
return prev;
}
static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
{
u64 prev;
asm volatile("cmpxchg8b %1"
: "=A" (prev),
"+m" (*ptr)
: "b" ((u32)new),
"c" ((u32)(new >> 32)),
"0" (old)
: "memory");
return prev;
}
#ifndef CONFIG_X86_CMPXCHG
/*
* Building a kernel capable running on 80386. It may be necessary to
* simulate the cmpxchg on the 80386 CPU. For that purpose we define
* a function for each of the sizes we support.
*/
extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
switch (size) {
case 1:
return cmpxchg_386_u8(ptr, old, new);
case 2:
return cmpxchg_386_u16(ptr, old, new);
case 4:
return cmpxchg_386_u32(ptr, old, new);
}
return old;
}
#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
if (likely(boot_cpu_data.x86 > 3)) \
__ret = (__typeof__(*(ptr)))__cmpxchg((ptr), \
(unsigned long)(o), (unsigned long)(n), \
sizeof(*(ptr))); \
else \
__ret = (__typeof__(*(ptr)))cmpxchg_386((ptr), \
(unsigned long)(o), (unsigned long)(n), \
sizeof(*(ptr))); \
__ret; \
})
#define cmpxchg_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
if (likely(boot_cpu_data.x86 > 3)) \
__ret = (__typeof__(*(ptr)))__cmpxchg_local((ptr), \
(unsigned long)(o), (unsigned long)(n), \
sizeof(*(ptr))); \
else \
__ret = (__typeof__(*(ptr)))cmpxchg_386((ptr), \
(unsigned long)(o), (unsigned long)(n), \
sizeof(*(ptr))); \
__ret; \
})
#endif
#ifndef CONFIG_X86_CMPXCHG64
/*
* Building a kernel capable running on 80386 and 80486. It may be necessary
* to simulate the cmpxchg8b on the 80386 and 80486 CPU.
*/
#define cmpxchg64(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (o); \
__typeof__(*(ptr)) __new = (n); \
alternative_io(LOCK_PREFIX_HERE \
"call cmpxchg8b_emu", \
"lock; cmpxchg8b (%%esi)" , \
X86_FEATURE_CX8, \
"=A" (__ret), \
"S" ((ptr)), "0" (__old), \
"b" ((unsigned int)__new), \
"c" ((unsigned int)(__new>>32)) \
: "memory"); \
__ret; })
#define cmpxchg64_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (o); \
__typeof__(*(ptr)) __new = (n); \
alternative_io("call cmpxchg8b_emu", \
"cmpxchg8b (%%esi)" , \
X86_FEATURE_CX8, \
"=A" (__ret), \
"S" ((ptr)), "0" (__old), \
"b" ((unsigned int)__new), \
"c" ((unsigned int)(__new>>32)) \
: "memory"); \
__ret; })
#endif
#define system_has_cmpxchg_double() cpu_has_cx8
#endif /* _ASM_X86_CMPXCHG_32_H */