[ARC] Add basic support for double load and store instructions

gcc/
2016-01-25  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
	(arc_init): Check validity mll64 option.
	(arc_save_restore): Use double load/store instruction.
	(arc_expand_movmem): Likewise.
	(arc_split_move): Don't split if we have double load/store
	instructions. Returns a boolean.
	(arc_process_double_reg_moves): Change function to return boolean
	instead of a sequence of instructions.
	(arc_dwarf_register_span): New function.
	* config/arc/arc-protos.h (arc_split_move): Change prototype.
	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
	* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
	(*movdf_insn): Likewise.
	* config/arc/arc.opt (mll64): New option.
	* config/arc/predicates.md (even_register_operand): New predicate.
	* doc/invoke.texi (ARC Options): Add mll64 documentation.

From-SVN: r232788
This commit is contained in:
Claudiu Zissulescu 2016-01-25 12:15:58 +01:00 committed by Claudiu Zissulescu
parent 02ef53f288
commit d34a0fdc03
8 changed files with 191 additions and 86 deletions

@ -1,3 +1,22 @@
2016-01-25 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
(arc_init): Check validity mll64 option.
(arc_save_restore): Use double load/store instruction.
(arc_expand_movmem): Likewise.
(arc_split_move): Don't split if we have double load/store
instructions. Returns a boolean.
(arc_process_double_reg_moves): Change function to return boolean
instead of a sequence of instructions.
(arc_dwarf_register_span): New function.
* config/arc/arc-protos.h (arc_split_move): Change prototype.
* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
(*movdf_insn): Likewise.
* config/arc/arc.opt (mll64): New option.
* config/arc/predicates.md (even_register_operand): New predicate.
* doc/invoke.texi (ARC Options): Add mll64 documentation.
2016-01-25 Richard Biener <rguenther@suse.de>
PR lto/69393

@ -104,7 +104,7 @@ extern void arc_toggle_unalign (void);
extern void split_addsi (rtx *);
extern void split_subsi (rtx *);
extern void arc_pad_return (void);
extern rtx arc_split_move (rtx *);
extern void arc_split_move (rtx *);
extern int arc_verify_short (rtx_insn *insn, int unalign, int);
extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
extern rtx arc_regno_use_in (unsigned int, rtx);

@ -420,6 +420,9 @@ static void arc_finalize_pic (void);
#undef TARGET_ASM_ALIGNED_SI_OP
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
#undef TARGET_DWARF_REGISTER_SPAN
#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
/* Try to keep the (mov:DF _, reg) as early as possible so
that the d<add/sub/mul>h-lr insns appear together and can
use the peephole2 pattern. */
@ -736,6 +739,10 @@ arc_init (void)
if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
error ("-matomic is only supported for ARC700 or ARC HS cores");
/* ll64 ops only available for HS. */
if (TARGET_LL64 && !TARGET_HS)
error ("-mll64 is only supported for ARC HS cores");
arc_init_reg_tables ();
/* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */
@ -2175,9 +2182,26 @@ arc_save_restore (rtx base_reg,
for (regno = 0; regno <= 31; regno++)
{
if ((gmask & (1L << regno)) != 0)
enum machine_mode mode = SImode;
bool found = false;
if (TARGET_LL64
&& (regno % 2 == 0)
&& ((gmask & (1L << regno)) != 0)
&& ((gmask & (1L << (regno+1))) != 0))
{
rtx reg = gen_rtx_REG (SImode, regno);
found = true;
mode = DImode;
}
else if ((gmask & (1L << regno)) != 0)
{
found = true;
mode = SImode;
}
if (found)
{
rtx reg = gen_rtx_REG (mode, regno);
rtx addr, mem;
int cfa_adjust = *first_offset;
@ -2193,7 +2217,7 @@ arc_save_restore (rtx base_reg,
gcc_assert (SMALL_INT (offset));
addr = plus_constant (Pmode, base_reg, offset);
}
mem = gen_frame_mem (SImode, addr);
mem = gen_frame_mem (mode, addr);
if (epilogue_p)
{
rtx insn =
@ -2212,6 +2236,11 @@ arc_save_restore (rtx base_reg,
else
frame_move_inc (mem, reg, base_reg, addr);
offset += UNITS_PER_WORD;
if (mode == DImode)
{
offset += UNITS_PER_WORD;
++regno;
}
} /* if */
} /* for */
}/* if */
@ -6986,9 +7015,8 @@ force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
return addr;
}
/* Like move_by_pieces, but take account of load latency,
and actual offset ranges.
Return true on success. */
/* Like move_by_pieces, but take account of load latency, and actual
offset ranges. Return true on success. */
bool
arc_expand_movmem (rtx *operands)
@ -7009,14 +7037,23 @@ arc_expand_movmem (rtx *operands)
size = INTVAL (operands[2]);
/* move_by_pieces_ninsns is static, so we can't use it. */
if (align >= 4)
n_pieces = (size + 2) / 4U + (size & 1);
{
if (TARGET_LL64)
n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1);
else
n_pieces = (size + 2) / 4U + (size & 1);
}
else if (align == 2)
n_pieces = (size + 1) / 2U;
else
n_pieces = size;
if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
return false;
if (piece > 4)
/* Force 32 bit aligned and larger datum to use 64 bit transfers, if
possible. */
if (TARGET_LL64 && (piece >= 4) && (size >= 8))
piece = 8;
else if (piece > 4)
piece = 4;
dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
@ -7027,8 +7064,8 @@ arc_expand_movmem (rtx *operands)
rtx tmp;
machine_mode mode;
if (piece > size)
piece = size & -size;
while (piece > size)
piece >>= 1;
mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
/* If we don't re-use temporaries, the scheduler gets carried away,
and the register pressure gets unnecessarily high. */
@ -8463,12 +8500,11 @@ split_subsi (rtx *operands)
Operand 0: destination register
Operand 1: source register */
static rtx
static bool
arc_process_double_reg_moves (rtx *operands)
{
rtx dest = operands[0];
rtx src = operands[1];
rtx val;
enum usesDxState { none, srcDx, destDx, maxDx };
enum usesDxState state = none;
@ -8483,9 +8519,7 @@ arc_process_double_reg_moves (rtx *operands)
}
if (state == none)
return NULL_RTX;
start_sequence ();
return false;
if (state == srcDx)
{
@ -8532,30 +8566,36 @@ arc_process_double_reg_moves (rtx *operands)
else
gcc_unreachable ();
val = get_insns ();
end_sequence ();
return val;
return true;
}
/* operands 0..1 are the operands of a 64 bit move instruction.
split it into two moves with operands 2/3 and 4/5. */
rtx
void
arc_split_move (rtx *operands)
{
machine_mode mode = GET_MODE (operands[0]);
int i;
int swap = 0;
rtx xop[4];
rtx val;
if (TARGET_DPFP)
{
val = arc_process_double_reg_moves (operands);
if (val)
return val;
if (arc_process_double_reg_moves (operands))
return;
}
if (TARGET_LL64
&& ((memory_operand (operands[0], mode)
&& even_register_operand (operands[1], mode))
|| (memory_operand (operands[1], mode)
&& even_register_operand (operands[0], mode))))
{
emit_move_insn (operands[0], operands[1]);
return;
}
for (i = 0; i < 2; i++)
{
if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
@ -8603,18 +8643,10 @@ arc_split_move (rtx *operands)
swap = 2;
gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
}
operands[2+swap] = xop[0];
operands[3+swap] = xop[1];
operands[4-swap] = xop[2];
operands[5-swap] = xop[3];
start_sequence ();
emit_insn (gen_rtx_SET (operands[2], operands[3]));
emit_insn (gen_rtx_SET (operands[4], operands[5]));
val = get_insns ();
end_sequence ();
emit_move_insn (xop[0 + swap], xop[1 + swap]);
emit_move_insn (xop[2 - swap], xop[3 - swap]);
return val;
}
/* Select between the instruction output templates s_tmpl (for short INSNs)
@ -9329,6 +9361,28 @@ arc_no_speculation_in_delay_slots_p ()
return true;
}
/* Return a parallel of registers to represent where to find the
register pieces if required, otherwise NULL_RTX. */
static rtx
arc_dwarf_register_span (rtx rtl)
{
enum machine_mode mode = GET_MODE (rtl);
unsigned regno;
rtx p;
if (GET_MODE_SIZE (mode) != 8)
return NULL_RTX;
p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
regno = REGNO (rtl);
XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
return p;
}
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-arc.h"

@ -97,6 +97,10 @@ along with GCC; see the file COPYING3. If not see
builtin_define ("__ARC_NORM__");\
builtin_define ("__Xnorm"); \
} \
if (TARGET_LL64) \
{ \
builtin_define ("__ARC_LL64__");\
} \
if (TARGET_MUL64_SET) \
builtin_define ("__ARC_MUL64__");\
if (TARGET_MULMAC_32BY16_SET) \

@ -984,7 +984,7 @@
}")
(define_insn_and_split "*movdi_insn"
[(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m")
[(set (match_operand:DI 0 "move_dest_operand" "=w, w,r,m")
(match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))]
"register_operand (operands[0], DImode)
|| register_operand (operands[1], DImode)"
@ -993,50 +993,36 @@
switch (which_alternative)
{
default:
case 0 :
/* We normally copy the low-numbered register first. However, if
the first register operand 0 is the same as the second register of
operand 1, we must copy in the opposite order. */
if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
return \"mov%? %R0,%R1\;mov%? %0,%1\";
else
return \"mov%? %0,%1\;mov%? %R0,%R1\";
case 1 :
return \"mov%? %L0,%L1\;mov%? %H0,%H1\";
case 2 :
/* If the low-address word is used in the address, we must load it
last. Otherwise, load it first. Note that we cannot have
auto-increment in that case since the address register is known to be
dead. */
if (refers_to_regno_p (REGNO (operands[0]), operands[1]))
return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
else switch (GET_CODE (XEXP(operands[1], 0)))
{
case POST_MODIFY: case POST_INC: case POST_DEC:
return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\";
case PRE_MODIFY: case PRE_INC: case PRE_DEC:
return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\";
default:
return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\";
}
case 3 :
switch (GET_CODE (XEXP(operands[0], 0)))
{
case POST_MODIFY: case POST_INC: case POST_DEC:
return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\";
case PRE_MODIFY: case PRE_INC: case PRE_DEC:
return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\";
default:
return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\";
}
return \"#\";
case 2:
if (TARGET_LL64
&& ((even_register_operand (operands[0], DImode)
&& memory_operand (operands[1], DImode))
|| (memory_operand (operands[0], DImode)
&& even_register_operand (operands[1], DImode))))
return \"ldd%U1%V1 %0,%1%&\";
return \"#\";
case 3:
if (TARGET_LL64
&& ((even_register_operand (operands[0], DImode)
&& memory_operand (operands[1], DImode))
|| (memory_operand (operands[0], DImode)
&& even_register_operand (operands[1], DImode))))
return \"std%U0%V0 %1,%0\";
return \"#\";
}
}"
"&& reload_completed && optimize"
[(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
"arc_split_move (operands);"
"reload_completed"
[(const_int 0)]
{
arc_split_move (operands);
DONE;
}
[(set_attr "type" "move,move,load,store")
;; ??? The ld/st values could be 4 if it's [reg,bignum].
(set_attr "length" "8,16,16,16")])
(set_attr "length" "8,16,*,*")])
;; Floating point move insns.
@ -1066,23 +1052,46 @@
""
"if (prepare_move_operands (operands, DFmode)) DONE;")
(define_insn "*movdf_insn"
(define_insn_and_split "*movdf_insn"
[(set (match_operand:DF 0 "move_dest_operand" "=D,r,c,c,r,m")
(match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
"register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
"#"
"*
{
switch (which_alternative)
{
default:
return \"#\";
case 4:
if (TARGET_LL64
&& ((even_register_operand (operands[0], DFmode)
&& memory_operand (operands[1], DFmode))
|| (memory_operand (operands[0], DFmode)
&& even_register_operand (operands[1], DFmode))))
return \"ldd%U1%V1 %0,%1%&\";
return \"#\";
case 5:
if (TARGET_LL64
&& ((even_register_operand (operands[0], DFmode)
&& memory_operand (operands[1], DFmode))
|| (memory_operand (operands[0], DFmode)
&& even_register_operand (operands[1], DFmode))))
return \"std%U0%V0 %1,%0\";
return \"#\";
}
}"
"reload_completed"
[(const_int 0)]
{
arc_split_move (operands);
DONE;
}
[(set_attr "type" "move,move,move,move,load,store")
(set_attr "predicable" "no,no,yes,yes,no,no")
;; ??? The ld/st values could be 16 if it's [reg,bignum].
(set_attr "length" "4,16,8,16,16,16")])
(define_split
[(set (match_operand:DF 0 "move_dest_operand" "")
(match_operand:DF 1 "move_double_src_operand" ""))]
"reload_completed"
[(match_dup 2)]
"operands[2] = arc_split_move (operands);")
(define_insn_and_split "*movdf_insn_nolrsr"
[(set (match_operand:DF 0 "register_operand" "=r")
(match_operand:DF 1 "arc_double_register_operand" "D"))

@ -409,3 +409,7 @@ Target Joined
matomic
Target Report Mask(ATOMIC)
Enable atomic instructions.
mll64
Target Report Mask(LL64)
Enable double load/store instructions for ARC HS.

@ -783,4 +783,15 @@
(match_code "reg" "0")))
(define_predicate "any_mem_operand"
(match_code "mem"))
(match_code "mem"))
; Special predicate to match even-odd double register pair
(define_predicate "even_register_operand"
(match_code "reg")
{
if ((GET_MODE (op) != mode) && (mode != VOIDmode))
return 0;
return (REG_P (op) && ((REGNO (op) >= FIRST_PSEUDO_REGISTER)
|| ((REGNO (op) & 1) == 0)));
})

@ -599,7 +599,7 @@ Objective-C and Objective-C++ Dialects}.
-mmixed-code -mq-class -mRcq -mRcw -msize-level=@var{level} @gol
-mtune=@var{cpu} -mmultcost=@var{num} @gol
-munalign-prob-threshold=@var{probability} -mmpy-option=@var{multo} @gol
-mdiv-rem -mcode-density}
-mdiv-rem -mcode-density -mll64}
@emph{ARM Options}
@gccoptlist{-mapcs-frame -mno-apcs-frame @gol
@ -13259,6 +13259,10 @@ Enable DIV/REM instructions for ARCv2 cores.
@opindex mcode-density
Enable code density instructions for ARC EM, default on for ARC HS.
@item -mll64
@opindex mll64
Enable double load/store operations for ARC HS cores.
@item -mmpy-option=@var{multo}
@opindex mmpy-option
Compile ARCv2 code with a multiplier design option. @samp{wlh1} is