[ARC] Add support for atomic memory built-in.

gcc/

2015-12-10  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc-protos.h (arc_expand_atomic_op): Prototype.
	(arc_split_compare_and_swap): Likewise.
	(arc_expand_compare_and_swap): Likewise.
	* config/arc/arc.c (arc_init): Check usage atomic option.
	(arc_pre_atomic_barrier): New function.
	(arc_post_atomic_barrier): Likewise.
	(emit_unlikely_jump): Likewise.
	(arc_expand_compare_and_swap_qh): Likewise.
	(arc_expand_compare_and_swap): Likewise.
	(arc_split_compare_and_swap): Likewise.
	(arc_expand_atomic_op): Likewise.
	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): New C macro.
	(ASM_SPEC): Enable mlock option when matomic is used.
	* config/arc/arc.md (UNSPEC_ARC_MEMBAR): Define.
	(VUNSPEC_ARC_CAS): Likewise.
	(VUNSPEC_ARC_LL): Likewise.
	(VUNSPEC_ARC_SC): Likewise.
	(VUNSPEC_ARC_EX): Likewise.
	* config/arc/arc.opt (matomic): New option.
	* config/arc/constraints.md (ATO): New constraint.
	* config/arc/predicates.md (mem_noofs_operand): New predicate.
	* doc/invoke.texi: Document -matomic.
	* config/arc/atomic.md: New file.

gcc/testsuite

2015-12-10  Claudiu Zissulescu  <claziss@synopsys.com>

	* lib/target-supports.exp (check_effective_target_arc_atomic): New
	function.
	(check_effective_target_sync_int_long): Add checks for ARC atomic
	feature.
	(check_effective_target_sync_char_short): Likewise.

From-SVN: r231509
This commit is contained in:
Claudiu Zissulescu 2015-12-10 14:32:21 +01:00 committed by Claudiu Zissulescu
parent 41eefe133f
commit b8a64b7f5d
11 changed files with 440 additions and 2 deletions

View File

@ -1,3 +1,29 @@
2015-12-10 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc-protos.h (arc_expand_atomic_op): Prototype.
(arc_split_compare_and_swap): Likewise.
(arc_expand_compare_and_swap): Likewise.
* config/arc/arc.c (arc_init): Check usage atomic option.
(arc_pre_atomic_barrier): New function.
(arc_post_atomic_barrier): Likewise.
(emit_unlikely_jump): Likewise.
(arc_expand_compare_and_swap_qh): Likewise.
(arc_expand_compare_and_swap): Likewise.
(arc_split_compare_and_swap): Likewise.
(arc_expand_atomic_op): Likewise.
* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): New C macro.
(ASM_SPEC): Enable mlock option when matomic is used.
* config/arc/arc.md (UNSPEC_ARC_MEMBAR): Define.
(VUNSPEC_ARC_CAS): Likewise.
(VUNSPEC_ARC_LL): Likewise.
(VUNSPEC_ARC_SC): Likewise.
(VUNSPEC_ARC_EX): Likewise.
* config/arc/arc.opt (matomic): New option.
* config/arc/constraints.md (ATO): New constraint.
* config/arc/predicates.md (mem_noofs_operand): New predicate.
* doc/invoke.texi: Document -matomic.
* config/arc/atomic.md: New file.
2015-12-10 Richard Biener <rguenther@suse.de>
PR tree-optimization/68817

View File

@ -41,6 +41,10 @@ extern int arc_output_commutative_cond_exec (rtx *operands, bool);
extern bool arc_expand_movmem (rtx *operands);
extern bool prepare_move_operands (rtx *operands, machine_mode mode);
extern void emit_shift (enum rtx_code, rtx, rtx, rtx);
extern void arc_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
extern void arc_split_compare_and_swap (rtx *);
extern void arc_expand_compare_and_swap (rtx *);
#endif /* RTX_CODE */
#ifdef TREE_CODE

View File

@ -61,6 +61,7 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "builtins.h"
#include "rtl-iter.h"
#include "alias.h"
/* Which cpu we're compiling for (ARC600, ARC601, ARC700). */
static const char *arc_cpu_string = "";
@ -884,6 +885,9 @@ arc_init (void)
flag_pic = 0;
}
if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
error ("-matomic is only supported for ARC700 or ARC HS cores");
arc_init_reg_tables ();
/* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */
@ -9650,6 +9654,359 @@ arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
}
/* Emit a (pre) memory barrier around an atomic sequence according to
MODEL. */
static void
arc_pre_atomic_barrier (enum memmodel model)
{
if (need_atomic_barrier_p (model, true))
emit_insn (gen_memory_barrier ());
}
/* Emit a (post) memory barrier around an atomic sequence according to
MODEL. */
static void
arc_post_atomic_barrier (enum memmodel model)
{
if (need_atomic_barrier_p (model, false))
emit_insn (gen_memory_barrier ());
}
/* Expand a compare and swap pattern. */
static void
emit_unlikely_jump (rtx insn)
{
int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
insn = emit_jump_insn (insn);
add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
}
/* Expand code to perform a 8 or 16-bit compare and swap by doing
32-bit compare and swap on the word containing the byte or
half-word. The difference between a weak and a strong CAS is that
the weak version may simply fail. The strong version relies on two
loops, one checks if the SCOND op is succsfully or not, the other
checks if the 32 bit accessed location which contains the 8 or 16
bit datum is not changed by other thread. The first loop is
implemented by the atomic_compare_and_swapsi_1 pattern. The second
loops is implemented by this routine. */
static void
arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem,
rtx oldval, rtx newval, rtx weak,
rtx mod_s, rtx mod_f)
{
rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
rtx addr = gen_reg_rtx (Pmode);
rtx off = gen_reg_rtx (SImode);
rtx oldv = gen_reg_rtx (SImode);
rtx newv = gen_reg_rtx (SImode);
rtx oldvalue = gen_reg_rtx (SImode);
rtx newvalue = gen_reg_rtx (SImode);
rtx res = gen_reg_rtx (SImode);
rtx resv = gen_reg_rtx (SImode);
rtx memsi, val, mask, end_label, loop_label, cc, x;
machine_mode mode;
bool is_weak = (weak != const0_rtx);
/* Truncate the address. */
emit_insn (gen_rtx_SET (addr,
gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
/* Compute the datum offset. */
emit_insn (gen_rtx_SET (off,
gen_rtx_AND (SImode, addr1, GEN_INT (3))));
if (TARGET_BIG_ENDIAN)
emit_insn (gen_rtx_SET (off,
gen_rtx_MINUS (SImode,
(GET_MODE (mem) == QImode) ?
GEN_INT (3) : GEN_INT (2), off)));
/* Normal read from truncated address. */
memsi = gen_rtx_MEM (SImode, addr);
set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
val = copy_to_reg (memsi);
/* Convert the offset in bits. */
emit_insn (gen_rtx_SET (off,
gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
/* Get the proper mask. */
if (GET_MODE (mem) == QImode)
mask = force_reg (SImode, GEN_INT (0xff));
else
mask = force_reg (SImode, GEN_INT (0xffff));
emit_insn (gen_rtx_SET (mask,
gen_rtx_ASHIFT (SImode, mask, off)));
/* Prepare the old and new values. */
emit_insn (gen_rtx_SET (val,
gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
val)));
oldval = gen_lowpart (SImode, oldval);
emit_insn (gen_rtx_SET (oldv,
gen_rtx_ASHIFT (SImode, oldval, off)));
newval = gen_lowpart_common (SImode, newval);
emit_insn (gen_rtx_SET (newv,
gen_rtx_ASHIFT (SImode, newval, off)));
emit_insn (gen_rtx_SET (oldv,
gen_rtx_AND (SImode, oldv, mask)));
emit_insn (gen_rtx_SET (newv,
gen_rtx_AND (SImode, newv, mask)));
if (!is_weak)
{
end_label = gen_label_rtx ();
loop_label = gen_label_rtx ();
emit_label (loop_label);
}
/* Make the old and new values. */
emit_insn (gen_rtx_SET (oldvalue,
gen_rtx_IOR (SImode, oldv, val)));
emit_insn (gen_rtx_SET (newvalue,
gen_rtx_IOR (SImode, newv, val)));
/* Try an 32bit atomic compare and swap. It clobbers the CC
register. */
emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue,
weak, mod_s, mod_f));
/* Regardless of the weakness of the operation, a proper boolean
result needs to be provided. */
x = gen_rtx_REG (CC_Zmode, CC_REG);
x = gen_rtx_EQ (SImode, x, const0_rtx);
emit_insn (gen_rtx_SET (bool_result, x));
if (!is_weak)
{
/* Check the results: if the atomic op is successfully the goto
to end label. */
x = gen_rtx_REG (CC_Zmode, CC_REG);
x = gen_rtx_EQ (VOIDmode, x, const0_rtx);
x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx);
emit_jump_insn (gen_rtx_SET (pc_rtx, x));
/* Wait for the right moment when the accessed 32-bit location
is stable. */
emit_insn (gen_rtx_SET (resv,
gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
res)));
mode = SELECT_CC_MODE (NE, resv, val);
cc = gen_rtx_REG (mode, CC_REG);
emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val)));
/* Set the new value of the 32 bit location, proper masked. */
emit_insn (gen_rtx_SET (val, resv));
/* Try again if location is unstable. Fall through if only
scond op failed. */
x = gen_rtx_NE (VOIDmode, cc, const0_rtx);
x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx);
emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
emit_label (end_label);
}
/* End: proper return the result for the given mode. */
emit_insn (gen_rtx_SET (res,
gen_rtx_AND (SImode, res, mask)));
emit_insn (gen_rtx_SET (res,
gen_rtx_LSHIFTRT (SImode, res, off)));
emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
}
/* Helper function used by "atomic_compare_and_swap" expand
pattern. */
void
arc_expand_compare_and_swap (rtx operands[])
{
rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
machine_mode mode;
bval = operands[0];
rval = operands[1];
mem = operands[2];
oldval = operands[3];
newval = operands[4];
is_weak = operands[5];
mod_s = operands[6];
mod_f = operands[7];
mode = GET_MODE (mem);
if (reg_overlap_mentioned_p (rval, oldval))
oldval = copy_to_reg (oldval);
if (mode == SImode)
{
emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval,
is_weak, mod_s, mod_f));
x = gen_rtx_REG (CC_Zmode, CC_REG);
x = gen_rtx_EQ (SImode, x, const0_rtx);
emit_insn (gen_rtx_SET (bval, x));
}
else
{
arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval,
is_weak, mod_s, mod_f);
}
}
/* Helper function used by the "atomic_compare_and_swapsi_1"
pattern. */
void
arc_split_compare_and_swap (rtx operands[])
{
rtx rval, mem, oldval, newval;
machine_mode mode;
enum memmodel mod_s, mod_f;
bool is_weak;
rtx label1, label2, x, cond;
rval = operands[0];
mem = operands[1];
oldval = operands[2];
newval = operands[3];
is_weak = (operands[4] != const0_rtx);
mod_s = (enum memmodel) INTVAL (operands[5]);
mod_f = (enum memmodel) INTVAL (operands[6]);
mode = GET_MODE (mem);
/* ARC atomic ops work only with 32-bit aligned memories. */
gcc_assert (mode == SImode);
arc_pre_atomic_barrier (mod_s);
label1 = NULL_RTX;
if (!is_weak)
{
label1 = gen_label_rtx ();
emit_label (label1);
}
label2 = gen_label_rtx ();
/* Load exclusive. */
emit_insn (gen_arc_load_exclusivesi (rval, mem));
/* Check if it is oldval. */
mode = SELECT_CC_MODE (NE, rval, oldval);
cond = gen_rtx_REG (mode, CC_REG);
emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode, rval, oldval)));
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
/* Exclusively store new item. Store clobbers CC reg. */
emit_insn (gen_arc_store_exclusivesi (mem, newval));
if (!is_weak)
{
/* Check the result of the store. */
cond = gen_rtx_REG (CC_Zmode, CC_REG);
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
}
if (mod_f != MEMMODEL_RELAXED)
emit_label (label2);
arc_post_atomic_barrier (mod_s);
if (mod_f == MEMMODEL_RELAXED)
emit_label (label2);
}
/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
to perform. MEM is the memory on which to operate. VAL is the second
operand of the binary operator. BEFORE and AFTER are optional locations to
return the value of MEM either before of after the operation. MODEL_RTX
is a CONST_INT containing the memory model to use. */
void
arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
rtx orig_before, rtx orig_after, rtx model_rtx)
{
enum memmodel model = (enum memmodel) INTVAL (model_rtx);
machine_mode mode = GET_MODE (mem);
rtx label, x, cond;
rtx before = orig_before, after = orig_after;
/* ARC atomic ops work only with 32-bit aligned memories. */
gcc_assert (mode == SImode);
arc_pre_atomic_barrier (model);
label = gen_label_rtx ();
emit_label (label);
label = gen_rtx_LABEL_REF (VOIDmode, label);
if (before == NULL_RTX)
before = gen_reg_rtx (mode);
if (after == NULL_RTX)
after = gen_reg_rtx (mode);
/* Load exclusive. */
emit_insn (gen_arc_load_exclusivesi (before, mem));
switch (code)
{
case NOT:
x = gen_rtx_AND (mode, before, val);
emit_insn (gen_rtx_SET (after, x));
x = gen_rtx_NOT (mode, after);
emit_insn (gen_rtx_SET (after, x));
break;
case MINUS:
if (CONST_INT_P (val))
{
val = GEN_INT (-INTVAL (val));
code = PLUS;
}
/* FALLTHRU. */
default:
x = gen_rtx_fmt_ee (code, mode, before, val);
emit_insn (gen_rtx_SET (after, x));
break;
}
/* Exclusively store new item. Store clobbers CC reg. */
emit_insn (gen_arc_store_exclusivesi (mem, after));
/* Check the result of the store. */
cond = gen_rtx_REG (CC_Zmode, CC_REG);
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
label, pc_rtx);
emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
arc_post_atomic_barrier (model);
}
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-arc.h"

View File

@ -88,6 +88,10 @@ along with GCC; see the file COPYING3. If not see
{ \
builtin_define ("__HS__"); \
} \
if (TARGET_ATOMIC) \
{ \
builtin_define ("__ARC_ATOMIC__"); \
} \
if (TARGET_NORM) \
{ \
builtin_define ("__ARC_NORM__");\
@ -153,7 +157,7 @@ along with GCC; see the file COPYING3. If not see
%{mcpu=ARC700|!mcpu=*:%{mrtsc}} \
%{mcpu=ARCHS:-mHS} \
%{mcpu=ARCEM:-mEM} \
"
%{matomic:-mlock}"
#if DEFAULT_LIBC == LIBC_UCLIBC
/* Note that the default is to link against dynamic libraries, if they are

View File

@ -128,6 +128,12 @@
(VUNSPEC_UNIMP_S 28) ; blockage insn for unimp_s generation
(VUNSPEC_NOP 29) ; volatile NOP
(UNSPEC_ARC_MEMBAR 30)
(VUNSPEC_ARC_CAS 31)
(VUNSPEC_ARC_LL 32)
(VUNSPEC_ARC_SC 33)
(VUNSPEC_ARC_EX 34)
(R0_REG 0)
(R1_REG 1)
(R2_REG 2)
@ -5531,3 +5537,6 @@
(include "fpx.md")
(include "simdext.md")
;; include atomic extensions
(include "atomic.md")

View File

@ -414,3 +414,6 @@ Target Joined
mmac_
Target Joined
matomic
Target Report Mask(ATOMIC)
Enable atomic instructions.

View File

@ -421,3 +421,9 @@
An unsigned 6-bit integer constant, up to 62."
(and (match_code "const_int")
(match_test "UNSIGNED_INT6 (ival - 1)")))
;; Memory constraint used for atomic ops.
(define_memory_constraint "ATO"
"A memory with only a base register"
(match_operand 0 "mem_noofs_operand"))

View File

@ -813,3 +813,7 @@
(define_predicate "short_const_int_operand"
(and (match_operand 0 "const_int_operand")
(match_test "satisfies_constraint_C16 (op)")))
(define_predicate "mem_noofs_operand"
(and (match_code "mem")
(match_code "reg" "0")))

View File

@ -538,7 +538,7 @@ Objective-C and Objective-C++ Dialects}.
@gccoptlist{-mbarrel-shifter @gol
-mcpu=@var{cpu} -mA6 -mARC600 -mA7 -mARC700 @gol
-mdpfp -mdpfp-compact -mdpfp-fast -mno-dpfp-lrsr @gol
-mea -mno-mpy -mmul32x16 -mmul64 @gol
-mea -mno-mpy -mmul32x16 -mmul64 -matomic @gol
-mnorm -mspfp -mspfp-compact -mspfp-fast -msimd -msoft-float -mswap @gol
-mcrc -mdsp-packa -mdvbf -mlock -mmac-d16 -mmac-24 -mrtsc -mswape @gol
-mtelephony -mxy -misize -mannotate-align -marclinux -marclinux_prof @gol
@ -12970,6 +12970,12 @@ can overridden by FPX options; @samp{mspfp}, @samp{mspfp-compact}, or
@opindex mswap
Generate swap instructions.
@item -matomic
@opindex matomic
This enables Locked Load/Store Conditional extension to implement
atomic memopry built-in functions. Not available for ARC 6xx or ARC
EM cores.
@item -mdiv-rem
@opindex mdiv-rem
Enable DIV/REM instructions for ARCv2 cores.

View File

@ -1,3 +1,11 @@
2015-12-10 Claudiu Zissulescu <claziss@synopsys.com>
* lib/target-supports.exp (check_effective_target_arc_atomic): New
function.
(check_effective_target_sync_int_long): Add checks for ARC atomic
feature.
(check_effective_target_sync_char_short): Likewise.
2015-12-10 Richard Biener <rguenther@suse.de>
PR tree-optimization/68817

View File

@ -2608,6 +2608,15 @@ proc check_effective_target_aarch64_little_endian { } {
}]
}
# Return 1 if this is a compiler supporting ARC atomic operations
proc check_effective_target_arc_atomic { } {
return [check_no_compiler_messages arc_atomic assembly {
#if !defined(__ARC_ATOMIC__)
#error FOO
#endif
}]
}
# Return 1 if this is an arm target using 32-bit instructions
proc check_effective_target_arm32 { } {
if { ![istarget arm*-*-*] } {
@ -5581,6 +5590,7 @@ proc check_effective_target_sync_int_long { } {
|| [istarget crisv32-*-*] || [istarget cris-*-*]
|| ([istarget sparc*-*-*] && [check_effective_target_sparc_v9])
|| [istarget spu-*-*]
|| ([istarget arc*-*-*] && [check_effective_target_arc_atomic])
|| [check_effective_target_mips_llsc] } {
set et_sync_int_long_saved 1
}
@ -5612,6 +5622,7 @@ proc check_effective_target_sync_char_short { } {
|| [istarget crisv32-*-*] || [istarget cris-*-*]
|| ([istarget sparc*-*-*] && [check_effective_target_sparc_v9])
|| [istarget spu-*-*]
|| ([istarget arc*-*-*] && [check_effective_target_arc_atomic])
|| [check_effective_target_mips_llsc] } {
set et_sync_char_short_saved 1
}