[AArch64] Use atomic load-operate instructions for fetch-update patterns.

gcc/
2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>

	* config/aarch64/aarch64-protos.h
	(aarch64_atomic_ldop_supported_p): Declare.
	* config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New.
	(enum aarch64_atomic_load_op_code): New.
	(aarch64_emit_atomic_load_op): New.
	(aarch64_gen_atomic_ldop): Update to support load-operate
	patterns.
	* config/aarch64/atomics.md (atomic_<atomic_optab><mode>): Change
	to an expander.
	(aarch64_atomic_<atomic_optab><mode>): New.
	(aarch64_atomic_<atomic_optab><mode>_lse): New.
	(atomic_fetch_<atomic_optab><mode>): Change to an expander.
	(aarch64_atomic_fetch_<atomic_optab><mode>): New.
	(aarch64_atomic_fetch_<atomic_optab><mode>_lse): New.

gcc/testsuite/
2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>

	* gcc.target/aarch64/atomic-inst-ldadd.c: New.
	* gcc.target/aarch64/atomic-inst-ldlogic.c: New.

From-SVN: r228001
This commit is contained in:
Matthew Wahab 2015-09-22 09:35:17 +00:00 committed by Matthew Wahab
parent 6380d2bc38
commit 641c2f8b69
7 changed files with 457 additions and 14 deletions

View File

@ -1,3 +1,20 @@
2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64-protos.h
(aarch64_atomic_ldop_supported_p): Declare.
* config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New.
(enum aarch64_atomic_load_op_code): New.
(aarch64_emit_atomic_load_op): New.
(aarch64_gen_atomic_ldop): Update to support load-operate
patterns.
* config/aarch64/atomics.md (atomic_<atomic_optab><mode>): Change
to an expander.
(aarch64_atomic_<atomic_optab><mode>): New.
(aarch64_atomic_<atomic_optab><mode>_lse): New.
(atomic_fetch_<atomic_optab><mode>): Change to an expander.
(aarch64_atomic_fetch_<atomic_optab><mode>): New.
(aarch64_atomic_fetch_<atomic_optab><mode>_lse): New.
2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64/atomics.md (UNSPECV_ATOMIC_LDOP): New.

View File

@ -378,6 +378,8 @@ rtx aarch64_load_tp (rtx);
void aarch64_expand_compare_and_swap (rtx op[]);
void aarch64_split_compare_and_swap (rtx op[]);
void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
bool aarch64_atomic_ldop_supported_p (enum rtx_code);
void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx);
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);

View File

@ -10871,6 +10871,32 @@ aarch64_expand_compare_and_swap (rtx operands[])
emit_insn (gen_rtx_SET (bval, x));
}
/* Test whether the target supports using a atomic load-operate instruction.
CODE is the operation and AFTER is TRUE if the data in memory after the
operation should be returned and FALSE if the data before the operation
should be returned. Returns FALSE if the operation isn't supported by the
architecture. */
bool
aarch64_atomic_ldop_supported_p (enum rtx_code code)
{
if (!TARGET_LSE)
return false;
switch (code)
{
case SET:
case AND:
case IOR:
case XOR:
case MINUS:
case PLUS:
return true;
default:
return false;
}
}
/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
sequence implementing an atomic operation. */
@ -11013,26 +11039,169 @@ aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
emit_insn (gen (dst, mem, value, model));
}
/* Emit an atomic operation where the architecture supports it. */
/* Operations supported by aarch64_emit_atomic_load_op. */
enum aarch64_atomic_load_op_code
{
AARCH64_LDOP_PLUS, /* A + B */
AARCH64_LDOP_XOR, /* A ^ B */
AARCH64_LDOP_OR, /* A | B */
AARCH64_LDOP_BIC /* A & ~B */
};
/* Emit an atomic load-operate. */
static void
aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
machine_mode mode, rtx dst, rtx src,
rtx mem, rtx model)
{
typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
const aarch64_atomic_load_op_fn plus[] =
{
gen_aarch64_atomic_loadaddqi,
gen_aarch64_atomic_loadaddhi,
gen_aarch64_atomic_loadaddsi,
gen_aarch64_atomic_loadadddi
};
const aarch64_atomic_load_op_fn eor[] =
{
gen_aarch64_atomic_loadeorqi,
gen_aarch64_atomic_loadeorhi,
gen_aarch64_atomic_loadeorsi,
gen_aarch64_atomic_loadeordi
};
const aarch64_atomic_load_op_fn ior[] =
{
gen_aarch64_atomic_loadsetqi,
gen_aarch64_atomic_loadsethi,
gen_aarch64_atomic_loadsetsi,
gen_aarch64_atomic_loadsetdi
};
const aarch64_atomic_load_op_fn bic[] =
{
gen_aarch64_atomic_loadclrqi,
gen_aarch64_atomic_loadclrhi,
gen_aarch64_atomic_loadclrsi,
gen_aarch64_atomic_loadclrdi
};
aarch64_atomic_load_op_fn gen;
int idx = 0;
switch (mode)
{
case QImode: idx = 0; break;
case HImode: idx = 1; break;
case SImode: idx = 2; break;
case DImode: idx = 3; break;
default:
gcc_unreachable ();
}
switch (code)
{
case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
case AARCH64_LDOP_XOR: gen = eor[idx]; break;
case AARCH64_LDOP_OR: gen = ior[idx]; break;
case AARCH64_LDOP_BIC: gen = bic[idx]; break;
default:
gcc_unreachable ();
}
emit_insn (gen (dst, mem, src, model));
}
/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
location to store the data read from memory. MEM is the memory location to
read and modify. MODEL_RTX is the memory ordering to use. VALUE is the
second operand for the operation. Either OUT_DATA or OUT_RESULT, but not
both, can be NULL. */
void
aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data,
rtx mem, rtx value, rtx model_rtx)
{
machine_mode mode = GET_MODE (mem);
machine_mode wmode = (mode == DImode ? DImode : SImode);
const bool short_mode = (mode < SImode);
aarch64_atomic_load_op_code ldop_code;
rtx src;
rtx x;
out_data = gen_lowpart (mode, out_data);
if (out_data)
out_data = gen_lowpart (mode, out_data);
/* Make sure the value is in a register, putting it into a destination
register if it needs to be manipulated. */
if (!register_operand (value, mode)
|| code == AND || code == MINUS)
{
src = out_data;
emit_move_insn (src, gen_lowpart (mode, value));
}
else
src = value;
gcc_assert (register_operand (src, mode));
/* Preprocess the data for the operation as necessary. If the operation is
a SET then emit a swap instruction and finish. */
switch (code)
{
case SET:
aarch64_emit_atomic_swap (mode, out_data, value, mem, model_rtx);
aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
return;
case MINUS:
/* Negate the value and treat it as a PLUS. */
{
rtx neg_src;
/* Resize the value if necessary. */
if (short_mode)
src = gen_lowpart (wmode, src);
neg_src = gen_rtx_NEG (wmode, src);
emit_insn (gen_rtx_SET (src, neg_src));
if (short_mode)
src = gen_lowpart (mode, src);
}
/* Fall-through. */
case PLUS:
ldop_code = AARCH64_LDOP_PLUS;
break;
case IOR:
ldop_code = AARCH64_LDOP_OR;
break;
case XOR:
ldop_code = AARCH64_LDOP_XOR;
break;
case AND:
{
rtx not_src;
/* Resize the value if necessary. */
if (short_mode)
src = gen_lowpart (wmode, src);
not_src = gen_rtx_NOT (wmode, src);
emit_insn (gen_rtx_SET (src, not_src));
if (short_mode)
src = gen_lowpart (mode, src);
}
ldop_code = AARCH64_LDOP_BIC;
break;
default:
/* The operation can't be done with atomic instructions. */
gcc_unreachable ();
}
aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
}
/* Split an atomic operation. */

View File

@ -225,16 +225,37 @@
}
)
(define_insn_and_split "atomic_<atomic_optab><mode>"
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
(unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 0)
(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPECV_ATOMIC_OP))
(clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:ALLI 3 "=&r"))
(clobber (match_scratch:SI 4 "=&r"))]
(define_expand "atomic_<atomic_optab><mode>"
[(match_operand:ALLI 0 "aarch64_sync_memory_operand" "")
(atomic_op:ALLI
(match_operand:ALLI 1 "<atomic_op_operand>" "")
(match_operand:SI 2 "const_int_operand"))]
""
{
rtx (*gen) (rtx, rtx, rtx);
/* Use an atomic load-operate instruction when possible. */
if (aarch64_atomic_ldop_supported_p (<CODE>))
gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
else
gen = gen_aarch64_atomic_<atomic_optab><mode>;
emit_insn (gen (operands[0], operands[1], operands[2]));
DONE;
}
)
(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>"
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
(unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 0)
(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
(match_operand:SI 2 "const_int_operand")]
UNSPECV_ATOMIC_OP))
(clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:ALLI 3 "=&r"))
(clobber (match_scratch:SI 4 "=&r"))]
""
"#"
"&& reload_completed"
@ -246,6 +267,25 @@
}
)
(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
(unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 0)
(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
(match_operand:SI 2 "const_int_operand")]
UNSPECV_ATOMIC_OP))
(clobber (match_scratch:ALLI 3 "=&r"))]
"TARGET_LSE"
"#"
"&& reload_completed"
[(const_int 0)]
{
aarch64_gen_atomic_ldop (<CODE>, operands[3], operands[0],
operands[1], operands[2]);
DONE;
}
)
(define_insn_and_split "atomic_nand<mode>"
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
(unspec_volatile:ALLI
@ -268,7 +308,30 @@
}
)
(define_insn_and_split "atomic_fetch_<atomic_optab><mode>"
;; Load-operate-store, returning the updated memory data.
(define_expand "atomic_fetch_<atomic_optab><mode>"
[(match_operand:ALLI 0 "register_operand" "")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
(atomic_op:ALLI
(match_operand:ALLI 2 "<atomic_op_operand>" "")
(match_operand:SI 3 "const_int_operand"))]
""
{
rtx (*gen) (rtx, rtx, rtx, rtx);
/* Use an atomic load-operate instruction when possible. */
if (aarch64_atomic_ldop_supported_p (<CODE>))
gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
else
gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
DONE;
})
(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
(set (match_dup 1)
@ -291,6 +354,26 @@
}
)
(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
(set (match_dup 1)
(unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 1)
(match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
(match_operand:SI 3 "const_int_operand")]
UNSPECV_ATOMIC_LDOP))]
"TARGET_LSE"
"#"
"&& reload_completed"
[(const_int 0)]
{
aarch64_gen_atomic_ldop (<CODE>, operands[0], operands[1],
operands[2], operands[3]);
DONE;
}
)
(define_insn_and_split "atomic_fetch_nand<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))

View File

@ -1,3 +1,8 @@
2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/aarch64/atomic-inst-ldadd.c: New.
* gcc.target/aarch64/atomic-inst-ldlogic.c: New.
2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/aarch64/atomic-inst-ops.inc: (TEST_MODEL): New.

View File

@ -0,0 +1,58 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+lse" } */
/* Test ARMv8.1-A Load-ADD instruction. */
#include "atomic-inst-ops.inc"
#define TEST TEST_ONE
#define LOAD_ADD(FN, TY, MODEL) \
TY FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
return __atomic_fetch_add (val, foo, MODEL); \
}
#define LOAD_ADD_NORETURN(FN, TY, MODEL) \
void FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
__atomic_fetch_add (val, foo, MODEL); \
}
#define LOAD_SUB(FN, TY, MODEL) \
TY FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
return __atomic_fetch_sub (val, foo, MODEL); \
}
#define LOAD_SUB_NORETURN(FN, TY, MODEL) \
void FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
__atomic_fetch_sub (val, foo, MODEL); \
}
TEST (load_add, LOAD_ADD)
TEST (load_add_notreturn, LOAD_ADD_NORETURN)
TEST (load_sub, LOAD_SUB)
TEST (load_sub_notreturn, LOAD_SUB_NORETURN)
/* { dg-final { scan-assembler-times "ldaddb\t" 8} } */
/* { dg-final { scan-assembler-times "ldaddab\t" 16} } */
/* { dg-final { scan-assembler-times "ldaddlb\t" 8} } */
/* { dg-final { scan-assembler-times "ldaddalb\t" 16} } */
/* { dg-final { scan-assembler-times "ldaddh\t" 8} } */
/* { dg-final { scan-assembler-times "ldaddah\t" 16} } */
/* { dg-final { scan-assembler-times "ldaddlh\t" 8} } */
/* { dg-final { scan-assembler-times "ldaddalh\t" 16} } */
/* { dg-final { scan-assembler-times "ldadd\t" 16} } */
/* { dg-final { scan-assembler-times "ldadda\t" 32} } */
/* { dg-final { scan-assembler-times "ldaddl\t" 16} } */
/* { dg-final { scan-assembler-times "ldaddal\t" 32} } */
/* { dg-final { scan-assembler-not "ldaxr\t" } } */
/* { dg-final { scan-assembler-not "stlxr\t" } } */
/* { dg-final { scan-assembler-not "dmb" } } */

View File

@ -0,0 +1,109 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+lse" } */
/* Test ARMv8.1-A LD<logic-op> instruction. */
#include "atomic-inst-ops.inc"
#define TEST TEST_ONE
#define LOAD_OR(FN, TY, MODEL) \
TY FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
return __atomic_fetch_or (val, foo, MODEL); \
}
#define LOAD_OR_NORETURN(FN, TY, MODEL) \
void FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
__atomic_fetch_or (val, foo, MODEL); \
}
#define LOAD_AND(FN, TY, MODEL) \
TY FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
return __atomic_fetch_and (val, foo, MODEL); \
}
#define LOAD_AND_NORETURN(FN, TY, MODEL) \
void FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
__atomic_fetch_and (val, foo, MODEL); \
}
#define LOAD_XOR(FN, TY, MODEL) \
TY FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
return __atomic_fetch_xor (val, foo, MODEL); \
}
#define LOAD_XOR_NORETURN(FN, TY, MODEL) \
void FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
__atomic_fetch_xor (val, foo, MODEL); \
}
TEST (load_or, LOAD_OR)
TEST (load_or_notreturn, LOAD_OR_NORETURN)
TEST (load_and, LOAD_AND)
TEST (load_and_notreturn, LOAD_AND_NORETURN)
TEST (load_xor, LOAD_XOR)
TEST (load_xor_notreturn, LOAD_XOR_NORETURN)
/* Load-OR. */
/* { dg-final { scan-assembler-times "ldsetb\t" 4} } */
/* { dg-final { scan-assembler-times "ldsetab\t" 8} } */
/* { dg-final { scan-assembler-times "ldsetlb\t" 4} } */
/* { dg-final { scan-assembler-times "ldsetalb\t" 8} } */
/* { dg-final { scan-assembler-times "ldseth\t" 4} } */
/* { dg-final { scan-assembler-times "ldsetah\t" 8} } */
/* { dg-final { scan-assembler-times "ldsetlh\t" 4} } */
/* { dg-final { scan-assembler-times "ldsetalh\t" 8} } */
/* { dg-final { scan-assembler-times "ldset\t" 8} } */
/* { dg-final { scan-assembler-times "ldseta\t" 16} } */
/* { dg-final { scan-assembler-times "ldsetl\t" 8} } */
/* { dg-final { scan-assembler-times "ldsetal\t" 16} } */
/* Load-AND. */
/* { dg-final { scan-assembler-times "ldclrb\t" 4} } */
/* { dg-final { scan-assembler-times "ldclrab\t" 8} } */
/* { dg-final { scan-assembler-times "ldclrlb\t" 4} } */
/* { dg-final { scan-assembler-times "ldclralb\t" 8} } */
/* { dg-final { scan-assembler-times "ldclrh\t" 4} } */
/* { dg-final { scan-assembler-times "ldclrah\t" 8} } */
/* { dg-final { scan-assembler-times "ldclrlh\t" 4} } */
/* { dg-final { scan-assembler-times "ldclralh\t" 8} } */
/* { dg-final { scan-assembler-times "ldclr\t" 8} */
/* { dg-final { scan-assembler-times "ldclra\t" 16} } */
/* { dg-final { scan-assembler-times "ldclrl\t" 8} } */
/* { dg-final { scan-assembler-times "ldclral\t" 16} } */
/* Load-XOR. */
/* { dg-final { scan-assembler-times "ldeorb\t" 4} } */
/* { dg-final { scan-assembler-times "ldeorab\t" 8} } */
/* { dg-final { scan-assembler-times "ldeorlb\t" 4} } */
/* { dg-final { scan-assembler-times "ldeoralb\t" 8} } */
/* { dg-final { scan-assembler-times "ldeorh\t" 4} } */
/* { dg-final { scan-assembler-times "ldeorah\t" 8} } */
/* { dg-final { scan-assembler-times "ldeorlh\t" 4} } */
/* { dg-final { scan-assembler-times "ldeoralh\t" 8} } */
/* { dg-final { scan-assembler-times "ldeor\t" 8} */
/* { dg-final { scan-assembler-times "ldeora\t" 16} } */
/* { dg-final { scan-assembler-times "ldeorl\t" 8} } */
/* { dg-final { scan-assembler-times "ldeoral\t" 16} } */
/* { dg-final { scan-assembler-not "ldaxr\t" } } */
/* { dg-final { scan-assembler-not "stlxr\t" } } */
/* { dg-final { scan-assembler-not "dmb" } } */