Nick Piggin 0b2fcfdb8b [PATCH] atomic: add_unless cmpxchg optimise
Without branch hints, the very unlikely chance of the loop repeating due to
cmpxchg failure is unrolled with gcc-4 that I have tested.

Improve this for architectures with a native cas/cmpxchg.  llsc archs
should try to implement this natively.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Andi Kleen <ak@muc.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-23 07:38:17 -08:00

200 lines
5.8 KiB
C

#ifndef __ARCH_S390_ATOMIC__
#define __ARCH_S390_ATOMIC__
/*
* include/asm-s390/atomic.h
*
* S390 version
* Copyright (C) 1999-2005 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Denis Joseph Barrow,
* Arnd Bergmann (arndb@de.ibm.com)
*
* Derived from "include/asm-i386/bitops.h"
* Copyright (C) 1992, Linus Torvalds
*
*/
/*
* Atomic operations that C can't guarantee us. Useful for
* resource counting etc..
* S390 uses 'Compare And Swap' for atomicity in SMP enviroment
*/
typedef struct {
volatile int counter;
} __attribute__ ((aligned (4))) atomic_t;
#define ATOMIC_INIT(i) { (i) }
#ifdef __KERNEL__
#define __CS_LOOP(ptr, op_val, op_string) ({ \
typeof(ptr->counter) old_val, new_val; \
__asm__ __volatile__(" l %0,0(%3)\n" \
"0: lr %1,%0\n" \
op_string " %1,%4\n" \
" cs %0,%1,0(%3)\n" \
" jl 0b" \
: "=&d" (old_val), "=&d" (new_val), \
"=m" (((atomic_t *)(ptr))->counter) \
: "a" (ptr), "d" (op_val), \
"m" (((atomic_t *)(ptr))->counter) \
: "cc", "memory" ); \
new_val; \
})
#define atomic_read(v) ((v)->counter)
#define atomic_set(v,i) (((v)->counter) = (i))
static __inline__ int atomic_add_return(int i, atomic_t * v)
{
return __CS_LOOP(v, i, "ar");
}
#define atomic_add(_i, _v) atomic_add_return(_i, _v)
#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0)
#define atomic_inc(_v) atomic_add_return(1, _v)
#define atomic_inc_return(_v) atomic_add_return(1, _v)
#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0)
static __inline__ int atomic_sub_return(int i, atomic_t * v)
{
return __CS_LOOP(v, i, "sr");
}
#define atomic_sub(_i, _v) atomic_sub_return(_i, _v)
#define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0)
#define atomic_dec(_v) atomic_sub_return(1, _v)
#define atomic_dec_return(_v) atomic_sub_return(1, _v)
#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0)
static __inline__ void atomic_clear_mask(unsigned long mask, atomic_t * v)
{
__CS_LOOP(v, ~mask, "nr");
}
static __inline__ void atomic_set_mask(unsigned long mask, atomic_t * v)
{
__CS_LOOP(v, mask, "or");
}
#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new)
{
__asm__ __volatile__(" cs %0,%3,0(%2)\n"
: "+d" (old), "=m" (v->counter)
: "a" (v), "d" (new), "m" (v->counter)
: "cc", "memory" );
return old;
}
static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
{
int c, old;
c = atomic_read(v);
for (;;) {
if (unlikely(c == u))
break;
old = atomic_cmpxchg(v, c, c + a);
if (likely(old == c))
break;
c = old;
}
return c != u;
}
#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
#undef __CS_LOOP
#ifdef __s390x__
typedef struct {
volatile long long counter;
} __attribute__ ((aligned (8))) atomic64_t;
#define ATOMIC64_INIT(i) { (i) }
#define __CSG_LOOP(ptr, op_val, op_string) ({ \
typeof(ptr->counter) old_val, new_val; \
__asm__ __volatile__(" lg %0,0(%3)\n" \
"0: lgr %1,%0\n" \
op_string " %1,%4\n" \
" csg %0,%1,0(%3)\n" \
" jl 0b" \
: "=&d" (old_val), "=&d" (new_val), \
"=m" (((atomic_t *)(ptr))->counter) \
: "a" (ptr), "d" (op_val), \
"m" (((atomic_t *)(ptr))->counter) \
: "cc", "memory" ); \
new_val; \
})
#define atomic64_read(v) ((v)->counter)
#define atomic64_set(v,i) (((v)->counter) = (i))
static __inline__ long long atomic64_add_return(long long i, atomic64_t * v)
{
return __CSG_LOOP(v, i, "agr");
}
#define atomic64_add(_i, _v) atomic64_add_return(_i, _v)
#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0)
#define atomic64_inc(_v) atomic64_add_return(1, _v)
#define atomic64_inc_return(_v) atomic64_add_return(1, _v)
#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0)
static __inline__ long long atomic64_sub_return(long long i, atomic64_t * v)
{
return __CSG_LOOP(v, i, "sgr");
}
#define atomic64_sub(_i, _v) atomic64_sub_return(_i, _v)
#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0)
#define atomic64_dec(_v) atomic64_sub_return(1, _v)
#define atomic64_dec_return(_v) atomic64_sub_return(1, _v)
#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0)
static __inline__ void atomic64_clear_mask(unsigned long mask, atomic64_t * v)
{
__CSG_LOOP(v, ~mask, "ngr");
}
static __inline__ void atomic64_set_mask(unsigned long mask, atomic64_t * v)
{
__CSG_LOOP(v, mask, "ogr");
}
static __inline__ long long atomic64_cmpxchg(atomic64_t *v,
long long old, long long new)
{
__asm__ __volatile__(" csg %0,%3,0(%2)\n"
: "+d" (old), "=m" (v->counter)
: "a" (v), "d" (new), "m" (v->counter)
: "cc", "memory" );
return old;
}
static __inline__ int atomic64_add_unless(atomic64_t *v,
long long a, long long u)
{
long long c, old;
c = atomic64_read(v);
for (;;) {
if (unlikely(c == u))
break;
old = atomic64_cmpxchg(v, c, c + a);
if (likely(old == c))
break;
c = old;
}
return c != u;
}
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
#undef __CSG_LOOP
#endif
#define smp_mb__before_atomic_dec() smp_mb()
#define smp_mb__after_atomic_dec() smp_mb()
#define smp_mb__before_atomic_inc() smp_mb()
#define smp_mb__after_atomic_inc() smp_mb()
#include <asm-generic/atomic.h>
#endif /* __KERNEL__ */
#endif /* __ARCH_S390_ATOMIC__ */