[ARC] Add basic support for double load and store instructions
gcc/ 2016-01-25 Claudiu Zissulescu <claziss@synopsys.com> * config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define. (arc_init): Check validity mll64 option. (arc_save_restore): Use double load/store instruction. (arc_expand_movmem): Likewise. (arc_split_move): Don't split if we have double load/store instructions. Returns a boolean. (arc_process_double_reg_moves): Change function to return boolean instead of a sequence of instructions. (arc_dwarf_register_span): New function. * config/arc/arc-protos.h (arc_split_move): Change prototype. * config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__. * config/arc/arc.md (*movdi_insn): Emit ldd/std instructions. (*movdf_insn): Likewise. * config/arc/arc.opt (mll64): New option. * config/arc/predicates.md (even_register_operand): New predicate. * doc/invoke.texi (ARC Options): Add mll64 documentation. From-SVN: r232788
This commit is contained in:
parent
02ef53f288
commit
d34a0fdc03
@ -1,3 +1,22 @@
|
||||
2016-01-25 Claudiu Zissulescu <claziss@synopsys.com>
|
||||
|
||||
* config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
|
||||
(arc_init): Check validity mll64 option.
|
||||
(arc_save_restore): Use double load/store instruction.
|
||||
(arc_expand_movmem): Likewise.
|
||||
(arc_split_move): Don't split if we have double load/store
|
||||
instructions. Returns a boolean.
|
||||
(arc_process_double_reg_moves): Change function to return boolean
|
||||
instead of a sequence of instructions.
|
||||
(arc_dwarf_register_span): New function.
|
||||
* config/arc/arc-protos.h (arc_split_move): Change prototype.
|
||||
* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
|
||||
* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
|
||||
(*movdf_insn): Likewise.
|
||||
* config/arc/arc.opt (mll64): New option.
|
||||
* config/arc/predicates.md (even_register_operand): New predicate.
|
||||
* doc/invoke.texi (ARC Options): Add mll64 documentation.
|
||||
|
||||
2016-01-25 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR lto/69393
|
||||
|
@ -104,7 +104,7 @@ extern void arc_toggle_unalign (void);
|
||||
extern void split_addsi (rtx *);
|
||||
extern void split_subsi (rtx *);
|
||||
extern void arc_pad_return (void);
|
||||
extern rtx arc_split_move (rtx *);
|
||||
extern void arc_split_move (rtx *);
|
||||
extern int arc_verify_short (rtx_insn *insn, int unalign, int);
|
||||
extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
|
||||
extern rtx arc_regno_use_in (unsigned int, rtx);
|
||||
|
@ -420,6 +420,9 @@ static void arc_finalize_pic (void);
|
||||
#undef TARGET_ASM_ALIGNED_SI_OP
|
||||
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
|
||||
|
||||
#undef TARGET_DWARF_REGISTER_SPAN
|
||||
#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
|
||||
|
||||
/* Try to keep the (mov:DF _, reg) as early as possible so
|
||||
that the d<add/sub/mul>h-lr insns appear together and can
|
||||
use the peephole2 pattern. */
|
||||
@ -736,6 +739,10 @@ arc_init (void)
|
||||
if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
|
||||
error ("-matomic is only supported for ARC700 or ARC HS cores");
|
||||
|
||||
/* ll64 ops only available for HS. */
|
||||
if (TARGET_LL64 && !TARGET_HS)
|
||||
error ("-mll64 is only supported for ARC HS cores");
|
||||
|
||||
arc_init_reg_tables ();
|
||||
|
||||
/* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */
|
||||
@ -2175,9 +2182,26 @@ arc_save_restore (rtx base_reg,
|
||||
|
||||
for (regno = 0; regno <= 31; regno++)
|
||||
{
|
||||
if ((gmask & (1L << regno)) != 0)
|
||||
enum machine_mode mode = SImode;
|
||||
bool found = false;
|
||||
|
||||
if (TARGET_LL64
|
||||
&& (regno % 2 == 0)
|
||||
&& ((gmask & (1L << regno)) != 0)
|
||||
&& ((gmask & (1L << (regno+1))) != 0))
|
||||
{
|
||||
rtx reg = gen_rtx_REG (SImode, regno);
|
||||
found = true;
|
||||
mode = DImode;
|
||||
}
|
||||
else if ((gmask & (1L << regno)) != 0)
|
||||
{
|
||||
found = true;
|
||||
mode = SImode;
|
||||
}
|
||||
|
||||
if (found)
|
||||
{
|
||||
rtx reg = gen_rtx_REG (mode, regno);
|
||||
rtx addr, mem;
|
||||
int cfa_adjust = *first_offset;
|
||||
|
||||
@ -2193,7 +2217,7 @@ arc_save_restore (rtx base_reg,
|
||||
gcc_assert (SMALL_INT (offset));
|
||||
addr = plus_constant (Pmode, base_reg, offset);
|
||||
}
|
||||
mem = gen_frame_mem (SImode, addr);
|
||||
mem = gen_frame_mem (mode, addr);
|
||||
if (epilogue_p)
|
||||
{
|
||||
rtx insn =
|
||||
@ -2212,6 +2236,11 @@ arc_save_restore (rtx base_reg,
|
||||
else
|
||||
frame_move_inc (mem, reg, base_reg, addr);
|
||||
offset += UNITS_PER_WORD;
|
||||
if (mode == DImode)
|
||||
{
|
||||
offset += UNITS_PER_WORD;
|
||||
++regno;
|
||||
}
|
||||
} /* if */
|
||||
} /* for */
|
||||
}/* if */
|
||||
@ -6986,9 +7015,8 @@ force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
|
||||
return addr;
|
||||
}
|
||||
|
||||
/* Like move_by_pieces, but take account of load latency,
|
||||
and actual offset ranges.
|
||||
Return true on success. */
|
||||
/* Like move_by_pieces, but take account of load latency, and actual
|
||||
offset ranges. Return true on success. */
|
||||
|
||||
bool
|
||||
arc_expand_movmem (rtx *operands)
|
||||
@ -7009,14 +7037,23 @@ arc_expand_movmem (rtx *operands)
|
||||
size = INTVAL (operands[2]);
|
||||
/* move_by_pieces_ninsns is static, so we can't use it. */
|
||||
if (align >= 4)
|
||||
n_pieces = (size + 2) / 4U + (size & 1);
|
||||
{
|
||||
if (TARGET_LL64)
|
||||
n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1);
|
||||
else
|
||||
n_pieces = (size + 2) / 4U + (size & 1);
|
||||
}
|
||||
else if (align == 2)
|
||||
n_pieces = (size + 1) / 2U;
|
||||
else
|
||||
n_pieces = size;
|
||||
if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
|
||||
return false;
|
||||
if (piece > 4)
|
||||
/* Force 32 bit aligned and larger datum to use 64 bit transfers, if
|
||||
possible. */
|
||||
if (TARGET_LL64 && (piece >= 4) && (size >= 8))
|
||||
piece = 8;
|
||||
else if (piece > 4)
|
||||
piece = 4;
|
||||
dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
|
||||
src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
|
||||
@ -7027,8 +7064,8 @@ arc_expand_movmem (rtx *operands)
|
||||
rtx tmp;
|
||||
machine_mode mode;
|
||||
|
||||
if (piece > size)
|
||||
piece = size & -size;
|
||||
while (piece > size)
|
||||
piece >>= 1;
|
||||
mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
|
||||
/* If we don't re-use temporaries, the scheduler gets carried away,
|
||||
and the register pressure gets unnecessarily high. */
|
||||
@ -8463,12 +8500,11 @@ split_subsi (rtx *operands)
|
||||
Operand 0: destination register
|
||||
Operand 1: source register */
|
||||
|
||||
static rtx
|
||||
static bool
|
||||
arc_process_double_reg_moves (rtx *operands)
|
||||
{
|
||||
rtx dest = operands[0];
|
||||
rtx src = operands[1];
|
||||
rtx val;
|
||||
|
||||
enum usesDxState { none, srcDx, destDx, maxDx };
|
||||
enum usesDxState state = none;
|
||||
@ -8483,9 +8519,7 @@ arc_process_double_reg_moves (rtx *operands)
|
||||
}
|
||||
|
||||
if (state == none)
|
||||
return NULL_RTX;
|
||||
|
||||
start_sequence ();
|
||||
return false;
|
||||
|
||||
if (state == srcDx)
|
||||
{
|
||||
@ -8532,30 +8566,36 @@ arc_process_double_reg_moves (rtx *operands)
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
val = get_insns ();
|
||||
end_sequence ();
|
||||
return val;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* operands 0..1 are the operands of a 64 bit move instruction.
|
||||
split it into two moves with operands 2/3 and 4/5. */
|
||||
|
||||
rtx
|
||||
void
|
||||
arc_split_move (rtx *operands)
|
||||
{
|
||||
machine_mode mode = GET_MODE (operands[0]);
|
||||
int i;
|
||||
int swap = 0;
|
||||
rtx xop[4];
|
||||
rtx val;
|
||||
|
||||
if (TARGET_DPFP)
|
||||
{
|
||||
val = arc_process_double_reg_moves (operands);
|
||||
if (val)
|
||||
return val;
|
||||
if (arc_process_double_reg_moves (operands))
|
||||
return;
|
||||
}
|
||||
|
||||
if (TARGET_LL64
|
||||
&& ((memory_operand (operands[0], mode)
|
||||
&& even_register_operand (operands[1], mode))
|
||||
|| (memory_operand (operands[1], mode)
|
||||
&& even_register_operand (operands[0], mode))))
|
||||
{
|
||||
emit_move_insn (operands[0], operands[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
|
||||
@ -8603,18 +8643,10 @@ arc_split_move (rtx *operands)
|
||||
swap = 2;
|
||||
gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
|
||||
}
|
||||
operands[2+swap] = xop[0];
|
||||
operands[3+swap] = xop[1];
|
||||
operands[4-swap] = xop[2];
|
||||
operands[5-swap] = xop[3];
|
||||
|
||||
start_sequence ();
|
||||
emit_insn (gen_rtx_SET (operands[2], operands[3]));
|
||||
emit_insn (gen_rtx_SET (operands[4], operands[5]));
|
||||
val = get_insns ();
|
||||
end_sequence ();
|
||||
emit_move_insn (xop[0 + swap], xop[1 + swap]);
|
||||
emit_move_insn (xop[2 - swap], xop[3 - swap]);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
/* Select between the instruction output templates s_tmpl (for short INSNs)
|
||||
@ -9329,6 +9361,28 @@ arc_no_speculation_in_delay_slots_p ()
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return a parallel of registers to represent where to find the
|
||||
register pieces if required, otherwise NULL_RTX. */
|
||||
|
||||
static rtx
|
||||
arc_dwarf_register_span (rtx rtl)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (rtl);
|
||||
unsigned regno;
|
||||
rtx p;
|
||||
|
||||
if (GET_MODE_SIZE (mode) != 8)
|
||||
return NULL_RTX;
|
||||
|
||||
p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
|
||||
regno = REGNO (rtl);
|
||||
XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
|
||||
XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
#include "gt-arc.h"
|
||||
|
@ -97,6 +97,10 @@ along with GCC; see the file COPYING3. If not see
|
||||
builtin_define ("__ARC_NORM__");\
|
||||
builtin_define ("__Xnorm"); \
|
||||
} \
|
||||
if (TARGET_LL64) \
|
||||
{ \
|
||||
builtin_define ("__ARC_LL64__");\
|
||||
} \
|
||||
if (TARGET_MUL64_SET) \
|
||||
builtin_define ("__ARC_MUL64__");\
|
||||
if (TARGET_MULMAC_32BY16_SET) \
|
||||
|
@ -984,7 +984,7 @@
|
||||
}")
|
||||
|
||||
(define_insn_and_split "*movdi_insn"
|
||||
[(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m")
|
||||
[(set (match_operand:DI 0 "move_dest_operand" "=w, w,r,m")
|
||||
(match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))]
|
||||
"register_operand (operands[0], DImode)
|
||||
|| register_operand (operands[1], DImode)"
|
||||
@ -993,50 +993,36 @@
|
||||
switch (which_alternative)
|
||||
{
|
||||
default:
|
||||
case 0 :
|
||||
/* We normally copy the low-numbered register first. However, if
|
||||
the first register operand 0 is the same as the second register of
|
||||
operand 1, we must copy in the opposite order. */
|
||||
if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
|
||||
return \"mov%? %R0,%R1\;mov%? %0,%1\";
|
||||
else
|
||||
return \"mov%? %0,%1\;mov%? %R0,%R1\";
|
||||
case 1 :
|
||||
return \"mov%? %L0,%L1\;mov%? %H0,%H1\";
|
||||
case 2 :
|
||||
/* If the low-address word is used in the address, we must load it
|
||||
last. Otherwise, load it first. Note that we cannot have
|
||||
auto-increment in that case since the address register is known to be
|
||||
dead. */
|
||||
if (refers_to_regno_p (REGNO (operands[0]), operands[1]))
|
||||
return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
|
||||
else switch (GET_CODE (XEXP(operands[1], 0)))
|
||||
{
|
||||
case POST_MODIFY: case POST_INC: case POST_DEC:
|
||||
return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\";
|
||||
case PRE_MODIFY: case PRE_INC: case PRE_DEC:
|
||||
return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\";
|
||||
default:
|
||||
return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\";
|
||||
}
|
||||
case 3 :
|
||||
switch (GET_CODE (XEXP(operands[0], 0)))
|
||||
{
|
||||
case POST_MODIFY: case POST_INC: case POST_DEC:
|
||||
return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\";
|
||||
case PRE_MODIFY: case PRE_INC: case PRE_DEC:
|
||||
return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\";
|
||||
default:
|
||||
return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\";
|
||||
}
|
||||
return \"#\";
|
||||
|
||||
case 2:
|
||||
if (TARGET_LL64
|
||||
&& ((even_register_operand (operands[0], DImode)
|
||||
&& memory_operand (operands[1], DImode))
|
||||
|| (memory_operand (operands[0], DImode)
|
||||
&& even_register_operand (operands[1], DImode))))
|
||||
return \"ldd%U1%V1 %0,%1%&\";
|
||||
return \"#\";
|
||||
|
||||
case 3:
|
||||
if (TARGET_LL64
|
||||
&& ((even_register_operand (operands[0], DImode)
|
||||
&& memory_operand (operands[1], DImode))
|
||||
|| (memory_operand (operands[0], DImode)
|
||||
&& even_register_operand (operands[1], DImode))))
|
||||
return \"std%U0%V0 %1,%0\";
|
||||
return \"#\";
|
||||
}
|
||||
}"
|
||||
"&& reload_completed && optimize"
|
||||
[(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
|
||||
"arc_split_move (operands);"
|
||||
"reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
arc_split_move (operands);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "move,move,load,store")
|
||||
;; ??? The ld/st values could be 4 if it's [reg,bignum].
|
||||
(set_attr "length" "8,16,16,16")])
|
||||
(set_attr "length" "8,16,*,*")])
|
||||
|
||||
|
||||
;; Floating point move insns.
|
||||
@ -1066,23 +1052,46 @@
|
||||
""
|
||||
"if (prepare_move_operands (operands, DFmode)) DONE;")
|
||||
|
||||
(define_insn "*movdf_insn"
|
||||
(define_insn_and_split "*movdf_insn"
|
||||
[(set (match_operand:DF 0 "move_dest_operand" "=D,r,c,c,r,m")
|
||||
(match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
|
||||
"register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
|
||||
"#"
|
||||
"*
|
||||
{
|
||||
switch (which_alternative)
|
||||
{
|
||||
default:
|
||||
return \"#\";
|
||||
case 4:
|
||||
if (TARGET_LL64
|
||||
&& ((even_register_operand (operands[0], DFmode)
|
||||
&& memory_operand (operands[1], DFmode))
|
||||
|| (memory_operand (operands[0], DFmode)
|
||||
&& even_register_operand (operands[1], DFmode))))
|
||||
return \"ldd%U1%V1 %0,%1%&\";
|
||||
return \"#\";
|
||||
|
||||
case 5:
|
||||
if (TARGET_LL64
|
||||
&& ((even_register_operand (operands[0], DFmode)
|
||||
&& memory_operand (operands[1], DFmode))
|
||||
|| (memory_operand (operands[0], DFmode)
|
||||
&& even_register_operand (operands[1], DFmode))))
|
||||
return \"std%U0%V0 %1,%0\";
|
||||
return \"#\";
|
||||
}
|
||||
}"
|
||||
"reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
arc_split_move (operands);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "move,move,move,move,load,store")
|
||||
(set_attr "predicable" "no,no,yes,yes,no,no")
|
||||
;; ??? The ld/st values could be 16 if it's [reg,bignum].
|
||||
(set_attr "length" "4,16,8,16,16,16")])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:DF 0 "move_dest_operand" "")
|
||||
(match_operand:DF 1 "move_double_src_operand" ""))]
|
||||
"reload_completed"
|
||||
[(match_dup 2)]
|
||||
"operands[2] = arc_split_move (operands);")
|
||||
|
||||
(define_insn_and_split "*movdf_insn_nolrsr"
|
||||
[(set (match_operand:DF 0 "register_operand" "=r")
|
||||
(match_operand:DF 1 "arc_double_register_operand" "D"))
|
||||
|
@ -409,3 +409,7 @@ Target Joined
|
||||
matomic
|
||||
Target Report Mask(ATOMIC)
|
||||
Enable atomic instructions.
|
||||
|
||||
mll64
|
||||
Target Report Mask(LL64)
|
||||
Enable double load/store instructions for ARC HS.
|
||||
|
@ -784,3 +784,14 @@
|
||||
|
||||
(define_predicate "any_mem_operand"
|
||||
(match_code "mem"))
|
||||
|
||||
; Special predicate to match even-odd double register pair
|
||||
(define_predicate "even_register_operand"
|
||||
(match_code "reg")
|
||||
{
|
||||
if ((GET_MODE (op) != mode) && (mode != VOIDmode))
|
||||
return 0;
|
||||
|
||||
return (REG_P (op) && ((REGNO (op) >= FIRST_PSEUDO_REGISTER)
|
||||
|| ((REGNO (op) & 1) == 0)));
|
||||
})
|
||||
|
@ -599,7 +599,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-mmixed-code -mq-class -mRcq -mRcw -msize-level=@var{level} @gol
|
||||
-mtune=@var{cpu} -mmultcost=@var{num} @gol
|
||||
-munalign-prob-threshold=@var{probability} -mmpy-option=@var{multo} @gol
|
||||
-mdiv-rem -mcode-density}
|
||||
-mdiv-rem -mcode-density -mll64}
|
||||
|
||||
@emph{ARM Options}
|
||||
@gccoptlist{-mapcs-frame -mno-apcs-frame @gol
|
||||
@ -13259,6 +13259,10 @@ Enable DIV/REM instructions for ARCv2 cores.
|
||||
@opindex mcode-density
|
||||
Enable code density instructions for ARC EM, default on for ARC HS.
|
||||
|
||||
@item -mll64
|
||||
@opindex mll64
|
||||
Enable double load/store operations for ARC HS cores.
|
||||
|
||||
@item -mmpy-option=@var{multo}
|
||||
@opindex mmpy-option
|
||||
Compile ARCv2 code with a multiplier design option. @samp{wlh1} is
|
||||
|
Loading…
x
Reference in New Issue
Block a user