[AArch64] add barriers to ool __sync builtins
2022-05-13 Sebastian Pop <spop@amazon.com> gcc/ PR target/105162 * config/aarch64/aarch64-protos.h (atomic_ool_names): Increase dimension of str array. * config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call memmodel_from_int and handle MEMMODEL_SYNC_*. (DEF0): Add __aarch64_*_sync functions. gcc/testsuite/ PR target/105162 * gcc.target/aarch64/sync-comp-swap-ool.c: New. * gcc.target/aarch64/sync-op-acquire-ool.c: New. * gcc.target/aarch64/sync-op-full-ool.c: New. * gcc.target/aarch64/target_attr_20.c: Update check. * gcc.target/aarch64/target_attr_21.c: Same. libgcc/ PR target/105162 * config/aarch64/lse.S: Define BARRIER and handle memory MODEL 5. * config/aarch64/t-lse: Add a 5th memory model for _sync functions.
This commit is contained in:
parent
010af1040b
commit
bc25483c05
@ -1065,7 +1065,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
|
||||
|
||||
struct atomic_ool_names
|
||||
{
|
||||
const char *str[5][4];
|
||||
const char *str[5][5];
|
||||
};
|
||||
|
||||
rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
|
||||
|
@ -22678,14 +22678,14 @@ aarch64_emit_unlikely_jump (rtx insn)
|
||||
add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
|
||||
}
|
||||
|
||||
/* We store the names of the various atomic helpers in a 5x4 array.
|
||||
/* We store the names of the various atomic helpers in a 5x5 array.
|
||||
Return the libcall function given MODE, MODEL and NAMES. */
|
||||
|
||||
rtx
|
||||
aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
|
||||
const atomic_ool_names *names)
|
||||
{
|
||||
memmodel model = memmodel_base (INTVAL (model_rtx));
|
||||
memmodel model = memmodel_from_int (INTVAL (model_rtx));
|
||||
int mode_idx, model_idx;
|
||||
|
||||
switch (mode)
|
||||
@ -22725,6 +22725,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
|
||||
case MEMMODEL_SEQ_CST:
|
||||
model_idx = 3;
|
||||
break;
|
||||
case MEMMODEL_SYNC_ACQUIRE:
|
||||
case MEMMODEL_SYNC_RELEASE:
|
||||
case MEMMODEL_SYNC_SEQ_CST:
|
||||
model_idx = 4;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
@ -22737,7 +22742,8 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
|
||||
{ "__aarch64_" #B #N "_relax", \
|
||||
"__aarch64_" #B #N "_acq", \
|
||||
"__aarch64_" #B #N "_rel", \
|
||||
"__aarch64_" #B #N "_acq_rel" }
|
||||
"__aarch64_" #B #N "_acq_rel", \
|
||||
"__aarch64_" #B #N "_sync" }
|
||||
|
||||
#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
|
||||
{ NULL, NULL, NULL, NULL }
|
||||
|
6
gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
Normal file
6
gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
Normal file
@ -0,0 +1,6 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -moutline-atomics" } */
|
||||
|
||||
#include "sync-comp-swap.x"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bl.*__aarch64_cas4_sync" 1 } } */
|
6
gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
Normal file
6
gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
Normal file
@ -0,0 +1,6 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=armv8-a+nolse -O2 -moutline-atomics" } */
|
||||
|
||||
#include "sync-op-acquire.x"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bl.*__aarch64_swp4_sync" 1 } } */
|
9
gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c
Normal file
9
gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c
Normal file
@ -0,0 +1,9 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=armv8-a+nolse -O2 -moutline-atomics" } */
|
||||
|
||||
#include "sync-op-full.x"
|
||||
|
||||
/* { dg-final { scan-assembler-times "bl.*__aarch64_ldadd4_sync" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "bl.*__aarch64_ldclr4_sync" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "bl.*__aarch64_ldeor4_sync" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "bl.*__aarch64_ldset4_sync" 1 } } */
|
@ -24,4 +24,4 @@ bar (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "bl.*__aarch64_cas2_acq_rel" } } */
|
||||
/* { dg-final { scan-assembler-not "bl.*__aarch64_cas2_sync" } } */
|
||||
|
@ -24,4 +24,4 @@ bar (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "bl.*__aarch64_cas2_acq_rel" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "bl.*__aarch64_cas2_sync" 1 } } */
|
||||
|
@ -87,24 +87,44 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
# define L
|
||||
# define M 0x000000
|
||||
# define N 0x000000
|
||||
# define BARRIER
|
||||
#elif MODEL == 2
|
||||
# define SUFF _acq
|
||||
# define A a
|
||||
# define L
|
||||
# define M 0x400000
|
||||
# define N 0x800000
|
||||
# define BARRIER
|
||||
#elif MODEL == 3
|
||||
# define SUFF _rel
|
||||
# define A
|
||||
# define L l
|
||||
# define M 0x008000
|
||||
# define N 0x400000
|
||||
# define BARRIER
|
||||
#elif MODEL == 4
|
||||
# define SUFF _acq_rel
|
||||
# define A a
|
||||
# define L l
|
||||
# define M 0x408000
|
||||
# define N 0xc00000
|
||||
# define BARRIER
|
||||
#elif MODEL == 5
|
||||
# define SUFF _sync
|
||||
#ifdef L_swp
|
||||
/* swp has _acq semantics. */
|
||||
# define A a
|
||||
# define L
|
||||
# define M 0x400000
|
||||
# define N 0x800000
|
||||
#else
|
||||
/* All other _sync functions have _seq semantics. */
|
||||
# define A a
|
||||
# define L l
|
||||
# define M 0x408000
|
||||
# define N 0xc00000
|
||||
#endif
|
||||
# define BARRIER dmb ish
|
||||
#else
|
||||
# error
|
||||
#endif
|
||||
@ -127,7 +147,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
#endif
|
||||
|
||||
#define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF)
|
||||
#define LDXR glue4(ld, A, xr, S)
|
||||
#if MODEL == 5
|
||||
/* Drop A for _sync functions. */
|
||||
# define LDXR glue3(ld, xr, S)
|
||||
#else
|
||||
# define LDXR glue4(ld, A, xr, S)
|
||||
#endif
|
||||
#define STXR glue4(st, L, xr, S)
|
||||
|
||||
/* Temporary registers used. Other than these, only the return value
|
||||
@ -183,10 +208,16 @@ STARTFN NAME(cas)
|
||||
bne 1f
|
||||
STXR w(tmp1), s(1), [x2]
|
||||
cbnz w(tmp1), 0b
|
||||
1: ret
|
||||
1: BARRIER
|
||||
ret
|
||||
|
||||
#else
|
||||
#define LDXP glue3(ld, A, xp)
|
||||
#if MODEL == 5
|
||||
/* Drop A for _sync functions. */
|
||||
# define LDXP glue2(ld, xp)
|
||||
#else
|
||||
# define LDXP glue3(ld, A, xp)
|
||||
#endif
|
||||
#define STXP glue3(st, L, xp)
|
||||
#ifdef HAVE_AS_LSE
|
||||
# define CASP glue3(casp, A, L) x0, x1, x2, x3, [x4]
|
||||
@ -205,7 +236,8 @@ STARTFN NAME(cas)
|
||||
bne 1f
|
||||
STXP w(tmp2), x2, x3, [x4]
|
||||
cbnz w(tmp2), 0b
|
||||
1: ret
|
||||
1: BARRIER
|
||||
ret
|
||||
|
||||
#endif
|
||||
|
||||
@ -229,6 +261,7 @@ STARTFN NAME(swp)
|
||||
0: LDXR s(0), [x1]
|
||||
STXR w(tmp1), s(tmp0), [x1]
|
||||
cbnz w(tmp1), 0b
|
||||
BARRIER
|
||||
ret
|
||||
|
||||
ENDFN NAME(swp)
|
||||
@ -273,6 +306,7 @@ STARTFN NAME(LDNM)
|
||||
OP s(tmp1), s(0), s(tmp0)
|
||||
STXR w(tmp2), s(tmp1), [x1]
|
||||
cbnz w(tmp2), 0b
|
||||
BARRIER
|
||||
ret
|
||||
|
||||
ENDFN NAME(LDNM)
|
||||
|
@ -18,13 +18,13 @@
|
||||
# along with GCC; see the file COPYING3. If not see
|
||||
# <http://www.gnu.org/licenses/>.
|
||||
|
||||
# Compare-and-swap has 5 sizes and 4 memory models.
|
||||
# Compare-and-swap has 5 sizes and 5 memory models.
|
||||
S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
|
||||
O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))
|
||||
O0 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S0)))
|
||||
|
||||
# Swap, Load-and-operate have 4 sizes and 4 memory models
|
||||
# Swap, Load-and-operate have 4 sizes and 5 memory models
|
||||
S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
|
||||
O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))
|
||||
O1 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S1)))
|
||||
|
||||
LSE_OBJS := $(O0) $(O1)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user