arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on initial store.

* arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on
	initial store.
	* thumb2.md (thumb2_storewb_parisi): New pattern.

From-SVN: r202279
This commit is contained in:
Richard Earnshaw 2013-09-05 12:37:29 +00:00 committed by Richard Earnshaw
parent 5922847b1c
commit 5a200acb05
3 changed files with 160 additions and 97 deletions

View File

@ -1,3 +1,9 @@
2013-09-05 Richard Earnshaw <rearnsha@arm.com>
* arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on
initial store.
* thumb2.md (thumb2_storewb_parisi): New pattern.
2013-09-05 Yufeng Zhang <yufeng.zhang@arm.com> 2013-09-05 Yufeng Zhang <yufeng.zhang@arm.com>
* config/aarch64/aarch64-option-extensions.def: Add * config/aarch64/aarch64-option-extensions.def: Add

View File

@ -16814,123 +16814,165 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
} }
} }
/* Generate and emit a pattern that will be recognized as STRD pattern. If even /* Generate and emit a sequence of insns equivalent to PUSH, but using
number of registers are being pushed, multiple STRD patterns are created for STR and STRD. If an even number of registers are being pushed, one
all register pairs. If odd number of registers are pushed, emit a or more STRD patterns are created for each register pair. If an
combination of STRDs and STR for the prologue saves. */ odd number of registers are pushed, emit an initial STR followed by
as many STRD instructions as are needed. This works best when the
stack is initially 64-bit aligned (the normal case), since it
ensures that each STRD is also 64-bit aligned. */
static void static void
thumb2_emit_strd_push (unsigned long saved_regs_mask) thumb2_emit_strd_push (unsigned long saved_regs_mask)
{ {
int num_regs = 0; int num_regs = 0;
int i, j; int i;
int regno;
rtx par = NULL_RTX; rtx par = NULL_RTX;
rtx insn = NULL_RTX;
rtx dwarf = NULL_RTX; rtx dwarf = NULL_RTX;
rtx tmp, reg, tmp1; rtx tmp;
bool first = true;
for (i = 0; i <= LAST_ARM_REGNUM; i++) num_regs = bit_count (saved_regs_mask);
if (saved_regs_mask & (1 << i))
num_regs++;
gcc_assert (num_regs && num_regs <= 16);
/* Pre-decrement the stack pointer, based on there being num_regs 4-byte
registers to push. */
tmp = gen_rtx_SET (VOIDmode,
stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
RTX_FRAME_RELATED_P (tmp) = 1;
insn = emit_insn (tmp);
/* Create sequence for DWARF info. */
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
/* RTLs cannot be shared, hence create new copy for dwarf. */
tmp1 = gen_rtx_SET (VOIDmode,
stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
RTX_FRAME_RELATED_P (tmp1) = 1;
XVECEXP (dwarf, 0, 0) = tmp1;
/* Must be at least one register to save, and can't save SP or PC. */
gcc_assert (num_regs > 0 && num_regs <= 14);
gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
/* Var j iterates over all the registers to gather all the registers in /* Create sequence for DWARF info. All the frame-related data for
saved_regs_mask. Var i gives index of register R_j in stack frame. debugging is held in this wrapper. */
A PARALLEL RTX of register-pair is created here, so that pattern for dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
STRD can be matched. If num_regs is odd, 1st register will be pushed
using STR and remaining registers will be pushed with STRD in pairs.
If num_regs is even, all registers are pushed with STRD in pairs.
Hence, skip first element for odd num_regs. */
for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
if (saved_regs_mask & (1 << j))
{
/* Create RTX for store. New RTX is created for dwarf as
they are not sharable. */
reg = gen_rtx_REG (SImode, j);
tmp = gen_rtx_SET (SImode,
gen_frame_mem
(SImode,
plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
reg);
tmp1 = gen_rtx_SET (SImode, /* Describe the stack adjustment. */
gen_frame_mem tmp = gen_rtx_SET (VOIDmode,
(SImode, stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx, 4 * i)), plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
reg); RTX_FRAME_RELATED_P (tmp) = 1;
RTX_FRAME_RELATED_P (tmp) = 1; XVECEXP (dwarf, 0, 0) = tmp;
RTX_FRAME_RELATED_P (tmp1) = 1;
if (((i - (num_regs % 2)) % 2) == 1) /* Find the first register. */
/* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
be created. Hence create it first. The STRD pattern we are ;
generating is :
[ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
(SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
where the target registers need not be consecutive. */
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
/* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is i = 0;
even, the reg_j is added as 0th element and if it is odd, reg_i is
added as 1st element of STRD pattern shown above. */
XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
XVECEXP (dwarf, 0, (i + 1)) = tmp1;
if (((i - (num_regs % 2)) % 2) == 0) /* If there's an odd number of registers to push. Start off by
/* When (i - (num_regs % 2)) is even, RTXs for both the registers pushing a single register. This ensures that subsequent strd
to be loaded are generated in above given STRD pattern, and the operations are dword aligned (assuming that SP was originally
pattern can be emitted now. */ 64-bit aligned). */
emit_insn (par); if ((num_regs & 1) != 0)
i--;
}
if ((num_regs % 2) == 1)
{ {
/* If odd number of registers are pushed, generate STR pattern to store rtx reg, mem, insn;
lone register. */
for (; (saved_regs_mask & (1 << j)) == 0; j--);
tmp1 = gen_frame_mem (SImode, plus_constant (Pmode, reg = gen_rtx_REG (SImode, regno);
stack_pointer_rtx, 4 * i)); if (num_regs == 1)
reg = gen_rtx_REG (SImode, j); mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
tmp = gen_rtx_SET (SImode, tmp1, reg); stack_pointer_rtx));
else
mem = gen_frame_mem (Pmode,
gen_rtx_PRE_MODIFY
(Pmode, stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
-4 * num_regs)));
tmp = gen_rtx_SET (VOIDmode, mem, reg);
RTX_FRAME_RELATED_P (tmp) = 1; RTX_FRAME_RELATED_P (tmp) = 1;
insn = emit_insn (tmp);
emit_insn (tmp); RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
tmp1 = gen_rtx_SET (SImode, tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
gen_frame_mem reg);
(SImode, RTX_FRAME_RELATED_P (tmp) = 1;
plus_constant (Pmode, stack_pointer_rtx, 4 * i)), i++;
reg); regno++;
RTX_FRAME_RELATED_P (tmp1) = 1; XVECEXP (dwarf, 0, i) = tmp;
XVECEXP (dwarf, 0, (i + 1)) = tmp1; first = false;
} }
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); while (i < num_regs)
RTX_FRAME_RELATED_P (insn) = 1; if (saved_regs_mask & (1 << regno))
{
rtx reg1, reg2, mem1, mem2;
rtx tmp0, tmp1, tmp2;
int regno2;
/* Find the register to pair with this one. */
for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
regno2++)
;
reg1 = gen_rtx_REG (SImode, regno);
reg2 = gen_rtx_REG (SImode, regno2);
if (first)
{
rtx insn;
first = false;
mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
-4 * num_regs));
mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
-4 * (num_regs - 1)));
tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
-4 * (num_regs)));
tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
RTX_FRAME_RELATED_P (tmp0) = 1;
RTX_FRAME_RELATED_P (tmp1) = 1;
RTX_FRAME_RELATED_P (tmp2) = 1;
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
XVECEXP (par, 0, 0) = tmp0;
XVECEXP (par, 0, 1) = tmp1;
XVECEXP (par, 0, 2) = tmp2;
insn = emit_insn (par);
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
}
else
{
mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
4 * i));
mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
4 * (i + 1)));
tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
RTX_FRAME_RELATED_P (tmp1) = 1;
RTX_FRAME_RELATED_P (tmp2) = 1;
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
XVECEXP (par, 0, 0) = tmp1;
XVECEXP (par, 0, 1) = tmp2;
emit_insn (par);
}
/* Create unwind information. This is an approximation. */
tmp1 = gen_rtx_SET (VOIDmode,
gen_frame_mem (Pmode,
plus_constant (Pmode,
stack_pointer_rtx,
4 * i)),
reg1);
tmp2 = gen_rtx_SET (VOIDmode,
gen_frame_mem (Pmode,
plus_constant (Pmode,
stack_pointer_rtx,
4 * (i + 1))),
reg2);
RTX_FRAME_RELATED_P (tmp1) = 1;
RTX_FRAME_RELATED_P (tmp2) = 1;
XVECEXP (dwarf, 0, i + 1) = tmp1;
XVECEXP (dwarf, 0, i + 2) = tmp2;
i += 2;
regno = regno2 + 1;
}
else
regno++;
return; return;
} }

View File

@ -325,6 +325,21 @@
(set_attr "neg_pool_range" "*,*,*,250")] (set_attr "neg_pool_range" "*,*,*,250")]
) )
(define_insn "*thumb2_storewb_pairsi"
[(set (match_operand:SI 0 "register_operand" "=&kr")
(plus:SI (match_operand:SI 1 "register_operand" "0")
(match_operand:SI 2 "const_int_operand" "n")))
(set (mem:SI (plus:SI (match_dup 0) (match_dup 2)))
(match_operand:SI 3 "register_operand" "r"))
(set (mem:SI (plus:SI (match_dup 0)
(match_operand:SI 5 "const_int_operand" "n")))
(match_operand:SI 4 "register_operand" "r"))]
"TARGET_THUMB2
&& INTVAL (operands[5]) == INTVAL (operands[2]) + 4"
"strd\\t%3, %4, [%0, %2]!"
[(set_attr "type" "store2")]
)
(define_insn "*thumb2_cmpsi_neg_shiftsi" (define_insn "*thumb2_cmpsi_neg_shiftsi"
[(set (reg:CC CC_REGNUM) [(set (reg:CC CC_REGNUM)
(compare:CC (match_operand:SI 0 "s_register_operand" "r") (compare:CC (match_operand:SI 0 "s_register_operand" "r")