[AArch64] Canonicalise SVE predicate constants

This patch makes sure that we build all SVE predicate constants as
VNx16BI before RA, to encourage similar constants to be reused
between modes.  This is also useful for the ACLE, where the single
predicate type svbool_t is always a VNx16BI.

Also, and again to encourage reuse, the patch makes us use a .B PTRUE
for all ptrue-predicated operations, rather than (for example) using
a .S PTRUE for 32-bit operations and a .D PTRUE for 64-bit operations.

The only current case in which a .H, .S or .D operation needs to be
predicated by a "strict" .H/.S/.D PTRUE is the PTEST in a conditional
branch, which an earlier patch fixed to use an appropriate VNx16BI
constant.

2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/aarch64.c (aarch64_target_reg): New function.
	(aarch64_emit_set_immediate): Likewise.
	(aarch64_ptrue_reg): Build a VNx16BI constant and then bitcast it.
	(aarch64_pfalse_reg): Likewise.
	(aarch64_convert_sve_data_to_pred): New function.
	(aarch64_sve_move_pred_via_while): Take an optional target register
	and the required register mode.
	(aarch64_expand_sve_const_pred_1): New function.
	(aarch64_expand_sve_const_pred): Likewise.
	(aarch64_expand_mov_immediate): Build an all-true predicate
	if the significant bits of the immediate are all true.  Use
	aarch64_expand_sve_const_pred for all compile-time predicate constants.
	(aarch64_mov_operand_p): Force predicate constants to be VNx16BI
	before register allocation.
	* config/aarch64/aarch64-sve.md (*vec_duplicate<mode>_reg): Use
	a VNx16BI PTRUE when splitting the memory alternative.
	(vec_duplicate<mode>): Update accordingly.
	(*pred_cmp<cmp_op><mode>): Rename to...
	(@aarch64_pred_cmp<cmp_op><mode>): ...this.

gcc/testsuite/
	* gcc.target/aarch64/sve/spill_4.c: Expect all ptrues to be .Bs.
	* gcc.target/aarch64/sve/single_1.c: Likewise.
	* gcc.target/aarch64/sve/single_2.c: Likewise.
	* gcc.target/aarch64/sve/single_3.c: Likewise.
	* gcc.target/aarch64/sve/single_4.c: Likewise.

From-SVN: r274415
This commit is contained in:
Richard Sandiford 2019-08-14 08:03:26 +00:00 committed by Richard Sandiford
parent 3446728963
commit 678faefcab
9 changed files with 204 additions and 46 deletions

View File

@ -1,3 +1,25 @@
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64.c (aarch64_target_reg): New function.
(aarch64_emit_set_immediate): Likewise.
(aarch64_ptrue_reg): Build a VNx16BI constant and then bitcast it.
(aarch64_pfalse_reg): Likewise.
(aarch64_convert_sve_data_to_pred): New function.
(aarch64_sve_move_pred_via_while): Take an optional target register
and the required register mode.
(aarch64_expand_sve_const_pred_1): New function.
(aarch64_expand_sve_const_pred): Likewise.
(aarch64_expand_mov_immediate): Build an all-true predicate
if the significant bits of the immediate are all true. Use
aarch64_expand_sve_const_pred for all compile-time predicate constants.
(aarch64_mov_operand_p): Force predicate constants to be VNx16BI
before register allocation.
* config/aarch64/aarch64-sve.md (*vec_duplicate<mode>_reg): Use
a VNx16BI PTRUE when splitting the memory alternative.
(vec_duplicate<mode>): Update accordingly.
(*pred_cmp<cmp_op><mode>): Rename to...
(@aarch64_pred_cmp<cmp_op><mode>): ...this.
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64-protos.h (aarch64_ptrue_all): Declare.

View File

@ -846,7 +846,7 @@
[(set (match_operand:SVE_ALL 0 "register_operand")
(vec_duplicate:SVE_ALL
(match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
(clobber (scratch:<VPRED>))])]
(clobber (scratch:VNx16BI))])]
"TARGET_SVE"
{
if (MEM_P (operands[1]))
@ -867,7 +867,7 @@
[(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
(vec_duplicate:SVE_ALL
(match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
(clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
(clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
"TARGET_SVE"
"@
mov\t%0.<Vetype>, %<vwcore>1
@ -877,9 +877,10 @@
[(const_int 0)]
{
if (GET_CODE (operands[2]) == SCRATCH)
operands[2] = gen_reg_rtx (<VPRED>mode);
emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
operands[2] = gen_reg_rtx (VNx16BImode);
emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
CONST0_RTX (<MODE>mode)));
DONE;
}
@ -2971,7 +2972,7 @@
)
;; Predicated integer comparisons.
(define_insn "*pred_cmp<cmp_op><mode>"
(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
(and:<VPRED>
(SVE_INT_CMP:<VPRED>

View File

@ -2546,6 +2546,36 @@ aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
}
/* Return TARGET if it is nonnull and a register of mode MODE.
Otherwise, return a fresh register of mode MODE if we can,
or TARGET reinterpreted as MODE if we can't. */
static rtx
aarch64_target_reg (rtx target, machine_mode mode)
{
if (target && REG_P (target) && GET_MODE (target) == mode)
return target;
if (!can_create_pseudo_p ())
{
gcc_assert (target);
return gen_lowpart (mode, target);
}
return gen_reg_rtx (mode);
}
/* Return a register that contains the constant in BUILDER, given that
the constant is a legitimate move operand. Use TARGET as the register
if it is nonnull and convenient. */
static rtx
aarch64_emit_set_immediate (rtx target, rtx_vector_builder &builder)
{
rtx src = builder.build ();
target = aarch64_target_reg (target, GET_MODE (src));
emit_insn (gen_rtx_SET (target, src));
return target;
}
static rtx
aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
{
@ -2721,7 +2751,8 @@ rtx
aarch64_ptrue_reg (machine_mode mode)
{
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
return force_reg (mode, CONSTM1_RTX (mode));
rtx reg = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
return gen_lowpart (mode, reg);
}
/* Return an all-false predicate register of mode MODE. */
@ -2730,7 +2761,26 @@ rtx
aarch64_pfalse_reg (machine_mode mode)
{
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
return force_reg (mode, CONST0_RTX (mode));
rtx reg = force_reg (VNx16BImode, CONST0_RTX (VNx16BImode));
return gen_lowpart (mode, reg);
}
/* Use a comparison to convert integer vector SRC into MODE, which is
the corresponding SVE predicate mode. Use TARGET for the result
if it's nonnull and convenient. */
static rtx
aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
{
machine_mode src_mode = GET_MODE (src);
insn_code icode = code_for_aarch64_pred_cmp (NE, src_mode);
expand_operand ops[4];
create_output_operand (&ops[0], target, mode);
create_input_operand (&ops[1], CONSTM1_RTX (mode), mode);
create_input_operand (&ops[2], src, src_mode);
create_input_operand (&ops[3], CONST0_RTX (src_mode), src_mode);
expand_insn (icode, 4, ops);
return ops[0].value;
}
/* Return true if we can move VALUE into a register using a single
@ -3633,15 +3683,80 @@ aarch64_expand_sve_const_vector (rtx target, rtx src)
return target;
}
/* Use WHILE to set predicate register DEST so that the first VL bits
are set and the rest are clear. */
/* Use WHILE to set a predicate register of mode MODE in which the first
VL bits are set and the rest are clear. Use TARGET for the register
if it's nonnull and convenient. */
static void
aarch64_sve_move_pred_via_while (rtx dest, unsigned int vl)
static rtx
aarch64_sve_move_pred_via_while (rtx target, machine_mode mode,
unsigned int vl)
{
rtx limit = force_reg (DImode, gen_int_mode (vl, DImode));
emit_insn (gen_while_ult (DImode, GET_MODE (dest),
dest, const0_rtx, limit));
target = aarch64_target_reg (target, mode);
emit_insn (gen_while_ult (DImode, mode, target, const0_rtx, limit));
return target;
}
/* Subroutine of aarch64_expand_sve_const_pred. Try to load the VNx16BI
constant in BUILDER into an SVE predicate register. Return the register
on success, otherwise return null. Use TARGET for the register if
nonnull and convenient. */
static rtx
aarch64_expand_sve_const_pred_1 (rtx target, rtx_vector_builder &builder)
{
if (builder.encoded_nelts () == 1)
/* A PFALSE or a PTRUE .B ALL. */
return aarch64_emit_set_immediate (target, builder);
unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
{
/* If we can load the constant using PTRUE, use it as-is. */
machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
if (aarch64_svpattern_for_vl (mode, vl) != AARCH64_NUM_SVPATTERNS)
return aarch64_emit_set_immediate (target, builder);
/* Otherwise use WHILE to set the first VL bits. */
return aarch64_sve_move_pred_via_while (target, mode, vl);
}
return NULL_RTX;
}
/* Return an SVE predicate register that contains the VNx16BImode
constant in BUILDER, without going through the move expanders.
The returned register can have whatever mode seems most natural
given the contents of BUILDER. Use TARGET for the result if
convenient. */
static rtx
aarch64_expand_sve_const_pred (rtx target, rtx_vector_builder &builder)
{
/* Try loading the constant using pure predicate operations. */
if (rtx res = aarch64_expand_sve_const_pred_1 (target, builder))
return res;
/* Try forcing the constant to memory. */
if (builder.full_nelts ().is_constant ())
if (rtx mem = force_const_mem (VNx16BImode, builder.build ()))
{
target = aarch64_target_reg (target, VNx16BImode);
emit_move_insn (target, mem);
return target;
}
/* The last resort is to load the constant as an integer and then
compare it against zero. Use -1 for set bits in order to increase
the changes of using SVE DUPM or an Advanced SIMD byte mask. */
rtx_vector_builder int_builder (VNx16QImode, builder.npatterns (),
builder.nelts_per_pattern ());
for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
int_builder.quick_push (INTVAL (builder.elt (i))
? constm1_rtx : const0_rtx);
return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
int_builder.build ());
}
/* Set DEST to immediate IMM. */
@ -3770,6 +3885,32 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
if (!CONST_INT_P (imm))
{
if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
{
/* Only the low bit of each .H, .S and .D element is defined,
so we can set the upper bits to whatever we like. If the
predicate is all-true in MODE, prefer to set all the undefined
bits as well, so that we can share a single .B predicate for
all modes. */
if (imm == CONSTM1_RTX (mode))
imm = CONSTM1_RTX (VNx16BImode);
/* All methods for constructing predicate modes wider than VNx16BI
will set the upper bits of each element to zero. Expose this
by moving such constants as a VNx16BI, so that all bits are
significant and so that constants for different modes can be
shared. The wider constant will still be available as a
REG_EQUAL note. */
rtx_vector_builder builder;
if (aarch64_get_sve_pred_bits (builder, imm))
{
rtx res = aarch64_expand_sve_const_pred (dest, builder);
if (dest != res)
emit_move_insn (dest, gen_lowpart (mode, res));
return;
}
}
if (GET_CODE (imm) == HIGH
|| aarch64_simd_valid_immediate (imm, NULL))
{
@ -3777,19 +3918,6 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
return;
}
rtx_vector_builder builder;
if (GET_MODE_CLASS (GET_MODE (imm)) == MODE_VECTOR_BOOL
&& aarch64_get_sve_pred_bits (builder, imm))
{
unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
int vl = aarch64_partial_ptrue_length (builder, elt_size);
if (vl > 0)
{
aarch64_sve_move_pred_via_while (dest, vl);
return;
}
}
if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode))
if (rtx res = aarch64_expand_sve_const_vector (dest, imm))
{
@ -15178,7 +15306,17 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
return true;
if (VECTOR_MODE_P (GET_MODE (x)))
return aarch64_simd_valid_immediate (x, NULL);
{
/* Require predicate constants to be VNx16BI before RA, so that we
force everything to have a canonical form. */
if (!lra_in_progress
&& !reload_completed
&& GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL
&& GET_MODE (x) != VNx16BImode)
return false;
return aarch64_simd_valid_immediate (x, NULL);
}
if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
return true;

View File

@ -1,3 +1,11 @@
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/spill_4.c: Expect all ptrues to be .Bs.
* gcc.target/aarch64/sve/single_1.c: Likewise.
* gcc.target/aarch64/sve/single_2.c: Likewise.
* gcc.target/aarch64/sve/single_3.c: Likewise.
* gcc.target/aarch64/sve/single_4.c: Likewise.
2019-08-13 Steven G. Kargl <kargl@gcc.gnu.org>
PR fortran/87991

View File

@ -40,10 +40,7 @@ TEST_LOOP (double, 3.0)
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 2 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl16\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl8\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl4\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 } } */
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */

View File

@ -16,10 +16,7 @@
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 2 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl32\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl16\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl8\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 } } */
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */

View File

@ -16,10 +16,7 @@
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 2 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl64\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl32\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl16\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 } } */
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */

View File

@ -16,10 +16,7 @@
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 2 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl128\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl64\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl32\n} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 } } */
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */

View File

@ -24,9 +24,10 @@ TEST_LOOP (uint16_t, 0x1234);
TEST_LOOP (uint32_t, 0x12345);
TEST_LOOP (uint64_t, 0x123456);
/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.b,} 6 } } */
/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.h,} } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s,} 3 } } */
/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d,} 3 } } */
/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.s,} } } */
/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.d,} } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, w[0-9]+\n} 3 } } */
/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s,} 3 } } */
/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d,} 3 } } */