arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on initial store.
* arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on initial store. * thumb2.md (thumb2_storewb_parisi): New pattern. From-SVN: r202279
This commit is contained in:
parent
5922847b1c
commit
5a200acb05
|
@ -1,3 +1,9 @@
|
||||||
|
2013-09-05 Richard Earnshaw <rearnsha@arm.com>
|
||||||
|
|
||||||
|
* arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on
|
||||||
|
initial store.
|
||||||
|
* thumb2.md (thumb2_storewb_parisi): New pattern.
|
||||||
|
|
||||||
2013-09-05 Yufeng Zhang <yufeng.zhang@arm.com>
|
2013-09-05 Yufeng Zhang <yufeng.zhang@arm.com>
|
||||||
|
|
||||||
* config/aarch64/aarch64-option-extensions.def: Add
|
* config/aarch64/aarch64-option-extensions.def: Add
|
||||||
|
|
|
@ -16814,123 +16814,165 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate and emit a pattern that will be recognized as STRD pattern. If even
|
/* Generate and emit a sequence of insns equivalent to PUSH, but using
|
||||||
number of registers are being pushed, multiple STRD patterns are created for
|
STR and STRD. If an even number of registers are being pushed, one
|
||||||
all register pairs. If odd number of registers are pushed, emit a
|
or more STRD patterns are created for each register pair. If an
|
||||||
combination of STRDs and STR for the prologue saves. */
|
odd number of registers are pushed, emit an initial STR followed by
|
||||||
|
as many STRD instructions as are needed. This works best when the
|
||||||
|
stack is initially 64-bit aligned (the normal case), since it
|
||||||
|
ensures that each STRD is also 64-bit aligned. */
|
||||||
static void
|
static void
|
||||||
thumb2_emit_strd_push (unsigned long saved_regs_mask)
|
thumb2_emit_strd_push (unsigned long saved_regs_mask)
|
||||||
{
|
{
|
||||||
int num_regs = 0;
|
int num_regs = 0;
|
||||||
int i, j;
|
int i;
|
||||||
|
int regno;
|
||||||
rtx par = NULL_RTX;
|
rtx par = NULL_RTX;
|
||||||
rtx insn = NULL_RTX;
|
|
||||||
rtx dwarf = NULL_RTX;
|
rtx dwarf = NULL_RTX;
|
||||||
rtx tmp, reg, tmp1;
|
rtx tmp;
|
||||||
|
bool first = true;
|
||||||
|
|
||||||
for (i = 0; i <= LAST_ARM_REGNUM; i++)
|
num_regs = bit_count (saved_regs_mask);
|
||||||
if (saved_regs_mask & (1 << i))
|
|
||||||
num_regs++;
|
|
||||||
|
|
||||||
gcc_assert (num_regs && num_regs <= 16);
|
|
||||||
|
|
||||||
/* Pre-decrement the stack pointer, based on there being num_regs 4-byte
|
|
||||||
registers to push. */
|
|
||||||
tmp = gen_rtx_SET (VOIDmode,
|
|
||||||
stack_pointer_rtx,
|
|
||||||
plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
|
|
||||||
RTX_FRAME_RELATED_P (tmp) = 1;
|
|
||||||
insn = emit_insn (tmp);
|
|
||||||
|
|
||||||
/* Create sequence for DWARF info. */
|
|
||||||
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
|
|
||||||
|
|
||||||
/* RTLs cannot be shared, hence create new copy for dwarf. */
|
|
||||||
tmp1 = gen_rtx_SET (VOIDmode,
|
|
||||||
stack_pointer_rtx,
|
|
||||||
plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
|
|
||||||
RTX_FRAME_RELATED_P (tmp1) = 1;
|
|
||||||
XVECEXP (dwarf, 0, 0) = tmp1;
|
|
||||||
|
|
||||||
|
/* Must be at least one register to save, and can't save SP or PC. */
|
||||||
|
gcc_assert (num_regs > 0 && num_regs <= 14);
|
||||||
gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
|
gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
|
||||||
gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
|
gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
|
||||||
|
|
||||||
/* Var j iterates over all the registers to gather all the registers in
|
/* Create sequence for DWARF info. All the frame-related data for
|
||||||
saved_regs_mask. Var i gives index of register R_j in stack frame.
|
debugging is held in this wrapper. */
|
||||||
A PARALLEL RTX of register-pair is created here, so that pattern for
|
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
|
||||||
STRD can be matched. If num_regs is odd, 1st register will be pushed
|
|
||||||
using STR and remaining registers will be pushed with STRD in pairs.
|
|
||||||
If num_regs is even, all registers are pushed with STRD in pairs.
|
|
||||||
Hence, skip first element for odd num_regs. */
|
|
||||||
for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
|
|
||||||
if (saved_regs_mask & (1 << j))
|
|
||||||
{
|
|
||||||
/* Create RTX for store. New RTX is created for dwarf as
|
|
||||||
they are not sharable. */
|
|
||||||
reg = gen_rtx_REG (SImode, j);
|
|
||||||
tmp = gen_rtx_SET (SImode,
|
|
||||||
gen_frame_mem
|
|
||||||
(SImode,
|
|
||||||
plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
|
|
||||||
reg);
|
|
||||||
|
|
||||||
tmp1 = gen_rtx_SET (SImode,
|
/* Describe the stack adjustment. */
|
||||||
gen_frame_mem
|
tmp = gen_rtx_SET (VOIDmode,
|
||||||
(SImode,
|
stack_pointer_rtx,
|
||||||
plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
|
plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
|
||||||
reg);
|
RTX_FRAME_RELATED_P (tmp) = 1;
|
||||||
RTX_FRAME_RELATED_P (tmp) = 1;
|
XVECEXP (dwarf, 0, 0) = tmp;
|
||||||
RTX_FRAME_RELATED_P (tmp1) = 1;
|
|
||||||
|
|
||||||
if (((i - (num_regs % 2)) % 2) == 1)
|
/* Find the first register. */
|
||||||
/* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
|
for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
|
||||||
be created. Hence create it first. The STRD pattern we are
|
;
|
||||||
generating is :
|
|
||||||
[ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
|
|
||||||
(SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
|
|
||||||
where the target registers need not be consecutive. */
|
|
||||||
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
|
|
||||||
|
|
||||||
/* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is
|
i = 0;
|
||||||
even, the reg_j is added as 0th element and if it is odd, reg_i is
|
|
||||||
added as 1st element of STRD pattern shown above. */
|
|
||||||
XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
|
|
||||||
XVECEXP (dwarf, 0, (i + 1)) = tmp1;
|
|
||||||
|
|
||||||
if (((i - (num_regs % 2)) % 2) == 0)
|
/* If there's an odd number of registers to push. Start off by
|
||||||
/* When (i - (num_regs % 2)) is even, RTXs for both the registers
|
pushing a single register. This ensures that subsequent strd
|
||||||
to be loaded are generated in above given STRD pattern, and the
|
operations are dword aligned (assuming that SP was originally
|
||||||
pattern can be emitted now. */
|
64-bit aligned). */
|
||||||
emit_insn (par);
|
if ((num_regs & 1) != 0)
|
||||||
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((num_regs % 2) == 1)
|
|
||||||
{
|
{
|
||||||
/* If odd number of registers are pushed, generate STR pattern to store
|
rtx reg, mem, insn;
|
||||||
lone register. */
|
|
||||||
for (; (saved_regs_mask & (1 << j)) == 0; j--);
|
|
||||||
|
|
||||||
tmp1 = gen_frame_mem (SImode, plus_constant (Pmode,
|
reg = gen_rtx_REG (SImode, regno);
|
||||||
stack_pointer_rtx, 4 * i));
|
if (num_regs == 1)
|
||||||
reg = gen_rtx_REG (SImode, j);
|
mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
|
||||||
tmp = gen_rtx_SET (SImode, tmp1, reg);
|
stack_pointer_rtx));
|
||||||
|
else
|
||||||
|
mem = gen_frame_mem (Pmode,
|
||||||
|
gen_rtx_PRE_MODIFY
|
||||||
|
(Pmode, stack_pointer_rtx,
|
||||||
|
plus_constant (Pmode, stack_pointer_rtx,
|
||||||
|
-4 * num_regs)));
|
||||||
|
|
||||||
|
tmp = gen_rtx_SET (VOIDmode, mem, reg);
|
||||||
RTX_FRAME_RELATED_P (tmp) = 1;
|
RTX_FRAME_RELATED_P (tmp) = 1;
|
||||||
|
insn = emit_insn (tmp);
|
||||||
emit_insn (tmp);
|
RTX_FRAME_RELATED_P (insn) = 1;
|
||||||
|
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
|
||||||
tmp1 = gen_rtx_SET (SImode,
|
tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
|
||||||
gen_frame_mem
|
reg);
|
||||||
(SImode,
|
RTX_FRAME_RELATED_P (tmp) = 1;
|
||||||
plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
|
i++;
|
||||||
reg);
|
regno++;
|
||||||
RTX_FRAME_RELATED_P (tmp1) = 1;
|
XVECEXP (dwarf, 0, i) = tmp;
|
||||||
XVECEXP (dwarf, 0, (i + 1)) = tmp1;
|
first = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
|
while (i < num_regs)
|
||||||
RTX_FRAME_RELATED_P (insn) = 1;
|
if (saved_regs_mask & (1 << regno))
|
||||||
|
{
|
||||||
|
rtx reg1, reg2, mem1, mem2;
|
||||||
|
rtx tmp0, tmp1, tmp2;
|
||||||
|
int regno2;
|
||||||
|
|
||||||
|
/* Find the register to pair with this one. */
|
||||||
|
for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
|
||||||
|
regno2++)
|
||||||
|
;
|
||||||
|
|
||||||
|
reg1 = gen_rtx_REG (SImode, regno);
|
||||||
|
reg2 = gen_rtx_REG (SImode, regno2);
|
||||||
|
|
||||||
|
if (first)
|
||||||
|
{
|
||||||
|
rtx insn;
|
||||||
|
|
||||||
|
first = false;
|
||||||
|
mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
|
||||||
|
stack_pointer_rtx,
|
||||||
|
-4 * num_regs));
|
||||||
|
mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
|
||||||
|
stack_pointer_rtx,
|
||||||
|
-4 * (num_regs - 1)));
|
||||||
|
tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
|
||||||
|
plus_constant (Pmode, stack_pointer_rtx,
|
||||||
|
-4 * (num_regs)));
|
||||||
|
tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
|
||||||
|
tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
|
||||||
|
RTX_FRAME_RELATED_P (tmp0) = 1;
|
||||||
|
RTX_FRAME_RELATED_P (tmp1) = 1;
|
||||||
|
RTX_FRAME_RELATED_P (tmp2) = 1;
|
||||||
|
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
|
||||||
|
XVECEXP (par, 0, 0) = tmp0;
|
||||||
|
XVECEXP (par, 0, 1) = tmp1;
|
||||||
|
XVECEXP (par, 0, 2) = tmp2;
|
||||||
|
insn = emit_insn (par);
|
||||||
|
RTX_FRAME_RELATED_P (insn) = 1;
|
||||||
|
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
|
||||||
|
stack_pointer_rtx,
|
||||||
|
4 * i));
|
||||||
|
mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
|
||||||
|
stack_pointer_rtx,
|
||||||
|
4 * (i + 1)));
|
||||||
|
tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
|
||||||
|
tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
|
||||||
|
RTX_FRAME_RELATED_P (tmp1) = 1;
|
||||||
|
RTX_FRAME_RELATED_P (tmp2) = 1;
|
||||||
|
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
|
||||||
|
XVECEXP (par, 0, 0) = tmp1;
|
||||||
|
XVECEXP (par, 0, 1) = tmp2;
|
||||||
|
emit_insn (par);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create unwind information. This is an approximation. */
|
||||||
|
tmp1 = gen_rtx_SET (VOIDmode,
|
||||||
|
gen_frame_mem (Pmode,
|
||||||
|
plus_constant (Pmode,
|
||||||
|
stack_pointer_rtx,
|
||||||
|
4 * i)),
|
||||||
|
reg1);
|
||||||
|
tmp2 = gen_rtx_SET (VOIDmode,
|
||||||
|
gen_frame_mem (Pmode,
|
||||||
|
plus_constant (Pmode,
|
||||||
|
stack_pointer_rtx,
|
||||||
|
4 * (i + 1))),
|
||||||
|
reg2);
|
||||||
|
|
||||||
|
RTX_FRAME_RELATED_P (tmp1) = 1;
|
||||||
|
RTX_FRAME_RELATED_P (tmp2) = 1;
|
||||||
|
XVECEXP (dwarf, 0, i + 1) = tmp1;
|
||||||
|
XVECEXP (dwarf, 0, i + 2) = tmp2;
|
||||||
|
i += 2;
|
||||||
|
regno = regno2 + 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
regno++;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -325,6 +325,21 @@
|
||||||
(set_attr "neg_pool_range" "*,*,*,250")]
|
(set_attr "neg_pool_range" "*,*,*,250")]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
(define_insn "*thumb2_storewb_pairsi"
|
||||||
|
[(set (match_operand:SI 0 "register_operand" "=&kr")
|
||||||
|
(plus:SI (match_operand:SI 1 "register_operand" "0")
|
||||||
|
(match_operand:SI 2 "const_int_operand" "n")))
|
||||||
|
(set (mem:SI (plus:SI (match_dup 0) (match_dup 2)))
|
||||||
|
(match_operand:SI 3 "register_operand" "r"))
|
||||||
|
(set (mem:SI (plus:SI (match_dup 0)
|
||||||
|
(match_operand:SI 5 "const_int_operand" "n")))
|
||||||
|
(match_operand:SI 4 "register_operand" "r"))]
|
||||||
|
"TARGET_THUMB2
|
||||||
|
&& INTVAL (operands[5]) == INTVAL (operands[2]) + 4"
|
||||||
|
"strd\\t%3, %4, [%0, %2]!"
|
||||||
|
[(set_attr "type" "store2")]
|
||||||
|
)
|
||||||
|
|
||||||
(define_insn "*thumb2_cmpsi_neg_shiftsi"
|
(define_insn "*thumb2_cmpsi_neg_shiftsi"
|
||||||
[(set (reg:CC CC_REGNUM)
|
[(set (reg:CC CC_REGNUM)
|
||||||
(compare:CC (match_operand:SI 0 "s_register_operand" "r")
|
(compare:CC (match_operand:SI 0 "s_register_operand" "r")
|
||||||
|
|
Loading…
Reference in New Issue