[AArch64] Expand DImode constant stores to two SImode stores when profitable

* config/aarch64/aarch64.md (mov<mode>): Call
	aarch64_split_dimode_const_store on DImode constant stores.
	* config/aarch64/aarch64-protos.h (aarch64_split_dimode_const_store):
	New prototype.
	* config/aarch64/aarch64.c (aarch64_split_dimode_const_store): New
	function.

	* gcc.target/aarch64/store_repeating_constant_1.c: New test.
	* gcc.target/aarch64/store_repeating_constant_2.c: Likewise.

From-SVN: r242551
This commit is contained in:
Kyrylo Tkachov 2016-11-17 14:25:30 +00:00 committed by Kyrylo Tkachov
parent 54e63f0028
commit 141a3ccff1
7 changed files with 103 additions and 0 deletions

View File

@ -1,3 +1,12 @@
2016-11-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.md (mov<mode>): Call
aarch64_split_dimode_const_store on DImode constant stores.
* config/aarch64/aarch64-protos.h (aarch64_split_dimode_const_store):
New prototype.
* config/aarch64/aarch64.c (aarch64_split_dimode_const_store): New
function.
2016-11-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Richard Biener <rguenther@suse.de>

View File

@ -337,6 +337,7 @@ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
struct simd_immediate_info *);
bool aarch64_split_dimode_const_store (rtx, rtx);
bool aarch64_symbolic_address_p (rtx);
bool aarch64_uimm12_shift (HOST_WIDE_INT);
bool aarch64_use_return_insn_p (void);

View File

@ -13211,6 +13211,63 @@ aarch64_expand_movmem (rtx *operands)
return true;
}
/* Split a DImode store of a CONST_INT SRC to MEM DST as two
SImode stores. Handle the case when the constant has identical
bottom and top halves. This is beneficial when the two stores can be
merged into an STP and we avoid synthesising potentially expensive
immediates twice. Return true if such a split is possible. */
bool
aarch64_split_dimode_const_store (rtx dst, rtx src)
{
rtx lo = gen_lowpart (SImode, src);
rtx hi = gen_highpart_mode (SImode, DImode, src);
bool size_p = optimize_function_for_size_p (cfun);
if (!rtx_equal_p (lo, hi))
return false;
unsigned int orig_cost
= aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
unsigned int lo_cost
= aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
/* We want to transform:
MOV x1, 49370
MOVK x1, 0x140, lsl 16
MOVK x1, 0xc0da, lsl 32
MOVK x1, 0x140, lsl 48
STR x1, [x0]
into:
MOV w1, 49370
MOVK w1, 0x140, lsl 16
STP w1, w1, [x0]
So we want to perform this only when we save two instructions
or more. When optimizing for size, however, accept any code size
savings we can. */
if (size_p && orig_cost <= lo_cost)
return false;
if (!size_p
&& (orig_cost <= lo_cost + 1))
return false;
rtx mem_lo = adjust_address (dst, SImode, 0);
if (!aarch64_mem_pair_operand (mem_lo, SImode))
return false;
rtx tmp_reg = gen_reg_rtx (SImode);
aarch64_expand_mov_immediate (tmp_reg, lo);
rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
/* Don't emit an explicit store pair as this may not be always profitable.
Let the sched-fusion logic decide whether to merge them. */
emit_move_insn (mem_lo, tmp_reg);
emit_move_insn (mem_hi, tmp_reg);
return true;
}
/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
static unsigned HOST_WIDE_INT

View File

@ -1011,6 +1011,11 @@
(match_operand:GPI 1 "general_operand" ""))]
""
"
if (MEM_P (operands[0]) && CONST_INT_P (operands[1])
&& <MODE>mode == DImode
&& aarch64_split_dimode_const_store (operands[0], operands[1]))
DONE;
if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
operands[1] = force_reg (<MODE>mode, operands[1]);

View File

@ -1,3 +1,8 @@
2016-11-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/store_repeating_constant_1.c: New test.
* gcc.target/aarch64/store_repeating_constant_2.c: Likewise.
2016-11-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Richard Biener <rguenther@suse.de>

View File

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mtune=generic" } */
void
foo (unsigned long long *a)
{
a[0] = 0x0140c0da0140c0daULL;
}
/* { dg-final { scan-assembler-times "movk\\tw.*" 1 } } */
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]+.*" 1 } } */

View File

@ -0,0 +1,15 @@
/* { dg-do compile } */
/* { dg-options "-Os" } */
/* Check that for -Os we synthesize only the bottom half and then
store it twice with an STP rather than synthesizing it twice in each
half of an X-reg. */
void
foo (unsigned long long *a)
{
a[0] = 0xc0da0000c0daULL;
}
/* { dg-final { scan-assembler-times "mov\\tw.*" 1 } } */
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]+.*" 1 } } */