linux/arch/arm/lib/bitops.h
Will Deacon c32ffce0f6 ARM: 7984/1: prefetch: add prefetchw invocations for barriered atomics
After a bunch of benchmarking on the interaction between dmb and pldw,
it turns out that issuing the pldw *after* the dmb instruction can
give modest performance gains (~3% atomic_add_return improvement on a
dual A15).

This patch adds prefetchw invocations to our barriered atomic operations
including cmpxchg, test_and_xxx and futexes.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-02-25 11:30:20 +00:00

106 lines
2.1 KiB
C

#include <asm/unwind.h>
#if __LINUX_ARM_ARCH__ >= 6
.macro bitop, name, instr
ENTRY( \name )
UNWIND( .fnstart )
ands ip, r1, #3
strneb r1, [ip] @ assert word-aligned
mov r2, #1
and r3, r0, #31 @ Get bit offset
mov r0, r0, lsr #5
add r1, r1, r0, lsl #2 @ Get word offset
#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
.arch_extension mp
ALT_SMP(W(pldw) [r1])
ALT_UP(W(nop))
#endif
mov r3, r2, lsl r3
1: ldrex r2, [r1]
\instr r2, r2, r3
strex r0, r2, [r1]
cmp r0, #0
bne 1b
bx lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
.macro testop, name, instr, store
ENTRY( \name )
UNWIND( .fnstart )
ands ip, r1, #3
strneb r1, [ip] @ assert word-aligned
mov r2, #1
and r3, r0, #31 @ Get bit offset
mov r0, r0, lsr #5
add r1, r1, r0, lsl #2 @ Get word offset
mov r3, r2, lsl r3 @ create mask
smp_dmb
#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
.arch_extension mp
ALT_SMP(W(pldw) [r1])
ALT_UP(W(nop))
#endif
1: ldrex r2, [r1]
ands r0, r2, r3 @ save old value of bit
\instr r2, r2, r3 @ toggle bit
strex ip, r2, [r1]
cmp ip, #0
bne 1b
smp_dmb
cmp r0, #0
movne r0, #1
2: bx lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
#else
.macro bitop, name, instr
ENTRY( \name )
UNWIND( .fnstart )
ands ip, r1, #3
strneb r1, [ip] @ assert word-aligned
and r2, r0, #31
mov r0, r0, lsr #5
mov r3, #1
mov r3, r3, lsl r2
save_and_disable_irqs ip
ldr r2, [r1, r0, lsl #2]
\instr r2, r2, r3
str r2, [r1, r0, lsl #2]
restore_irqs ip
mov pc, lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
/**
* testop - implement a test_and_xxx_bit operation.
* @instr: operational instruction
* @store: store instruction
*
* Note: we can trivially conditionalise the store instruction
* to avoid dirtying the data cache.
*/
.macro testop, name, instr, store
ENTRY( \name )
UNWIND( .fnstart )
ands ip, r1, #3
strneb r1, [ip] @ assert word-aligned
and r3, r0, #31
mov r0, r0, lsr #5
save_and_disable_irqs ip
ldr r2, [r1, r0, lsl #2]!
mov r0, #1
tst r2, r0, lsl r3
\instr r2, r2, r0, lsl r3
\store r2, [r1]
moveq r0, #0
restore_irqs ip
mov pc, lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
#endif