arm: Implement auto-vectorized MVE comparisons with vectors of boolean predicates
We make use of qualifier_predicate to describe MVE builtins prototypes, restricting to auto-vectorizable vcmp* and vpsel builtins, as they are exercised by the tests added earlier in the series. Special handling is needed for mve_vpselq because it has a v2di variant, which has no natural VPR.P0 representation: we keep HImode for it. The vector_compare expansion code is updated to use the right VxBI mode instead of HI for the result. We extend the existing thumb2_movhi_vfp and thumb2_movhi_fp16 patterns to use the new MVE_7_HI iterator which covers HI and the new VxBI modes, in conjunction with the new DB constraint for a constant vector of booleans. This patch also adds tests derived from the one provided in PR target/101325: there is a compile-only test because I did not have access to anything that could execute MVE code until recently. I have been able to add an executable test since QEMU supports MVE. Instead of adding arm_v8_1m_mve_hw, I update arm_mve_hw so that it uses add_options_for_arm_v8_1m_mve_fp, like arm_neon_hw does. This ensures arm_mve_hw passes even if the toolchain does not generate MVE code by default. Most of the work of this patch series was carried out while I was working at STMicroelectronics as a Linaro assignee. 2022-02-22 Christophe Lyon <christophe.lyon@arm.com> Richard Sandiford <richard.sandiford@arm.com> gcc/ PR target/100757 PR target/101325 * config/arm/arm-builtins.cc (BINOP_PRED_UNONE_UNONE_QUALIFIERS) (BINOP_PRED_NONE_NONE_QUALIFIERS) (TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS) (TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS): New. * config/arm/arm-protos.h (mve_bool_vec_to_const): New. * config/arm/arm.cc (arm_hard_regno_mode_ok): Handle new VxBI modes. (arm_mode_to_pred_mode): New. (arm_expand_vector_compare): Use the right VxBI mode instead of HI. (arm_expand_vcond): Likewise. (simd_valid_immediate): Handle MODE_VECTOR_BOOL. (mve_bool_vec_to_const): New. (neon_make_constant): Call mve_bool_vec_to_const when needed. * config/arm/arm_mve_builtins.def (vcmpneq_, vcmphiq_, vcmpcsq_) (vcmpltq_, vcmpleq_, vcmpgtq_, vcmpgeq_, vcmpeqq_, vcmpneq_f) (vcmpltq_f, vcmpleq_f, vcmpgtq_f, vcmpgeq_f, vcmpeqq_f, vpselq_u) (vpselq_s, vpselq_f): Use new predicated qualifiers. * config/arm/constraints.md (DB): New. * config/arm/iterators.md (MVE_7, MVE_7_HI): New mode iterators. (MVE_VPRED, MVE_vpred): New attribute iterators. * config/arm/mve.md (@mve_vcmp<mve_cmp_op>q_<mode>) (@mve_vcmp<mve_cmp_op>q_f<mode>, @mve_vpselq_<supf><mode>) (@mve_vpselq_f<mode>): Use MVE_VPRED instead of HI. (@mve_vpselq_<supf>v2di): Define separately. (mov<mode>): New expander for VxBI modes. * config/arm/vfp.md (thumb2_movhi_vfp, thumb2_movhi_fp16): Use MVE_7_HI iterator and add support for DB constraint. gcc/testsuite/ PR target/100757 PR target/101325 * gcc.dg/rtl/arm/mve-vxbi.c: New test. * gcc.target/arm/simd/pr101325.c: New. * gcc.target/arm/simd/pr101325-2.c: New. * lib/target-supports.exp (check_effective_target_arm_mve_hw): Use add_options_for_arm_v8_1m_mve_fp.
This commit is contained in:
parent
884f77b422
commit
91224cf625
|
@ -420,6 +420,12 @@ arm_binop_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|||
#define BINOP_UNONE_UNONE_UNONE_QUALIFIERS \
|
||||
(arm_binop_unone_unone_unone_qualifiers)
|
||||
|
||||
static enum arm_type_qualifiers
|
||||
arm_binop_pred_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_predicate, qualifier_unsigned, qualifier_unsigned };
|
||||
#define BINOP_PRED_UNONE_UNONE_QUALIFIERS \
|
||||
(arm_binop_pred_unone_unone_qualifiers)
|
||||
|
||||
static enum arm_type_qualifiers
|
||||
arm_binop_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_unsigned, qualifier_none, qualifier_immediate };
|
||||
|
@ -438,6 +444,12 @@ arm_binop_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|||
#define BINOP_UNONE_NONE_NONE_QUALIFIERS \
|
||||
(arm_binop_unone_none_none_qualifiers)
|
||||
|
||||
static enum arm_type_qualifiers
|
||||
arm_binop_pred_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_predicate, qualifier_none, qualifier_none };
|
||||
#define BINOP_PRED_NONE_NONE_QUALIFIERS \
|
||||
(arm_binop_pred_none_none_qualifiers)
|
||||
|
||||
static enum arm_type_qualifiers
|
||||
arm_binop_unone_unone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_unsigned, qualifier_unsigned, qualifier_none };
|
||||
|
@ -509,6 +521,12 @@ arm_ternop_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|||
#define TERNOP_NONE_NONE_NONE_UNONE_QUALIFIERS \
|
||||
(arm_ternop_none_none_none_unone_qualifiers)
|
||||
|
||||
static enum arm_type_qualifiers
|
||||
arm_ternop_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_none, qualifier_none, qualifier_none, qualifier_predicate };
|
||||
#define TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS \
|
||||
(arm_ternop_none_none_none_pred_qualifiers)
|
||||
|
||||
static enum arm_type_qualifiers
|
||||
arm_ternop_none_none_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_none, qualifier_none, qualifier_immediate, qualifier_unsigned };
|
||||
|
@ -528,6 +546,13 @@ arm_ternop_unone_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|||
#define TERNOP_UNONE_UNONE_UNONE_UNONE_QUALIFIERS \
|
||||
(arm_ternop_unone_unone_unone_unone_qualifiers)
|
||||
|
||||
static enum arm_type_qualifiers
|
||||
arm_ternop_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
|
||||
qualifier_predicate };
|
||||
#define TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS \
|
||||
(arm_ternop_unone_unone_unone_pred_qualifiers)
|
||||
|
||||
static enum arm_type_qualifiers
|
||||
arm_ternop_none_none_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
|
||||
|
|
|
@ -101,6 +101,7 @@ extern char *neon_output_shift_immediate (const char *, char, rtx *,
|
|||
machine_mode, int, bool);
|
||||
extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
|
||||
rtx (*) (rtx, rtx, rtx));
|
||||
extern rtx mve_bool_vec_to_const (rtx const_vec);
|
||||
extern rtx neon_make_constant (rtx, bool generate = true);
|
||||
extern tree arm_builtin_vectorized_function (unsigned int, tree, tree);
|
||||
extern void neon_expand_vector_init (rtx, rtx);
|
||||
|
|
|
@ -12802,7 +12802,10 @@ simd_valid_immediate (rtx op, machine_mode mode, int inverse,
|
|||
innersize = GET_MODE_UNIT_SIZE (mode);
|
||||
|
||||
/* Only support 128-bit vectors for MVE. */
|
||||
if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16))
|
||||
if (TARGET_HAVE_MVE
|
||||
&& (!vector
|
||||
|| (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
|
||||
|| n_elts * innersize != 16))
|
||||
return -1;
|
||||
|
||||
/* Vectors of float constants. */
|
||||
|
@ -13167,6 +13170,29 @@ neon_vdup_constant (rtx vals, bool generate)
|
|||
return gen_vec_duplicate (mode, x);
|
||||
}
|
||||
|
||||
/* Return a HI representation of CONST_VEC suitable for MVE predicates. */
|
||||
rtx
|
||||
mve_bool_vec_to_const (rtx const_vec)
|
||||
{
|
||||
int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec));
|
||||
int repeat = 16 / n_elts;
|
||||
int i;
|
||||
int hi_val = 0;
|
||||
|
||||
for (i = 0; i < n_elts; i++)
|
||||
{
|
||||
rtx el = CONST_VECTOR_ELT (const_vec, i);
|
||||
unsigned HOST_WIDE_INT elpart;
|
||||
|
||||
gcc_assert (CONST_INT_P (el));
|
||||
elpart = INTVAL (el);
|
||||
|
||||
for (int j = 0; j < repeat; j++)
|
||||
hi_val |= elpart << (i * repeat + j);
|
||||
}
|
||||
return gen_int_mode (hi_val, HImode);
|
||||
}
|
||||
|
||||
/* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
|
||||
constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
|
||||
into a register.
|
||||
|
@ -13207,6 +13233,8 @@ neon_make_constant (rtx vals, bool generate)
|
|||
&& simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
|
||||
/* Load using VMOV. On Cortex-A8 this takes one cycle. */
|
||||
return const_vec;
|
||||
else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL))
|
||||
return mve_bool_vec_to_const (const_vec);
|
||||
else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
|
||||
/* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
|
||||
pipeline cycle; creating the constant takes one or two ARM
|
||||
|
@ -25365,7 +25393,10 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|
|||
return false;
|
||||
|
||||
if (IS_VPR_REGNUM (regno))
|
||||
return mode == HImode;
|
||||
return mode == HImode
|
||||
|| mode == V16BImode
|
||||
|| mode == V8BImode
|
||||
|| mode == V4BImode;
|
||||
|
||||
if (TARGET_THUMB1)
|
||||
/* For the Thumb we only allow values bigger than SImode in
|
||||
|
@ -31053,6 +31084,19 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
|
|||
arm_post_atomic_barrier (model);
|
||||
}
|
||||
|
||||
/* Return the mode for the MVE vector of predicates corresponding to MODE. */
|
||||
machine_mode
|
||||
arm_mode_to_pred_mode (machine_mode mode)
|
||||
{
|
||||
switch (GET_MODE_NUNITS (mode))
|
||||
{
|
||||
case 16: return V16BImode;
|
||||
case 8: return V8BImode;
|
||||
case 4: return V4BImode;
|
||||
}
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Expand code to compare vectors OP0 and OP1 using condition CODE.
|
||||
If CAN_INVERT, store either the result or its inverse in TARGET
|
||||
and return true if TARGET contains the inverse. If !CAN_INVERT,
|
||||
|
@ -31136,7 +31180,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
|
|||
if (vcond_mve)
|
||||
vpr_p0 = target;
|
||||
else
|
||||
vpr_p0 = gen_reg_rtx (HImode);
|
||||
vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
|
||||
|
||||
switch (GET_MODE_CLASS (cmp_mode))
|
||||
{
|
||||
|
@ -31178,7 +31222,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
|
|||
if (vcond_mve)
|
||||
vpr_p0 = target;
|
||||
else
|
||||
vpr_p0 = gen_reg_rtx (HImode);
|
||||
vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
|
||||
|
||||
emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
|
||||
if (!vcond_mve)
|
||||
|
@ -31205,7 +31249,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
|
|||
if (vcond_mve)
|
||||
vpr_p0 = target;
|
||||
else
|
||||
vpr_p0 = gen_reg_rtx (HImode);
|
||||
vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
|
||||
|
||||
emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, force_reg (cmp_mode, op1), op0));
|
||||
if (!vcond_mve)
|
||||
|
@ -31258,7 +31302,7 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
|
|||
if (TARGET_HAVE_MVE)
|
||||
{
|
||||
vcond_mve=true;
|
||||
mask = gen_reg_rtx (HImode);
|
||||
mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode));
|
||||
}
|
||||
else
|
||||
mask = gen_reg_rtx (cmp_result_mode);
|
||||
|
|
|
@ -89,7 +89,7 @@ VAR3 (BINOP_UNONE_UNONE_IMM, vshrq_n_u, v16qi, v8hi, v4si)
|
|||
VAR3 (BINOP_NONE_NONE_IMM, vshrq_n_s, v16qi, v8hi, v4si)
|
||||
VAR1 (BINOP_NONE_NONE_UNONE, vaddlvq_p_s, v4si)
|
||||
VAR1 (BINOP_UNONE_UNONE_UNONE, vaddlvq_p_u, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_PRED_NONE_NONE, vcmpneq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_NONE_NONE_NONE, vshlq_s, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_NONE, vshlq_u, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_UNONE, vsubq_u, v16qi, v8hi, v4si)
|
||||
|
@ -117,9 +117,9 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vhsubq_n_u, v16qi, v8hi, v4si)
|
|||
VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_u, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_n_u, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_UNONE, veorq_u, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_PRED_UNONE_UNONE, vcmphiq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_n_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_PRED_UNONE_UNONE, vcmpcsq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si)
|
||||
|
@ -143,15 +143,15 @@ VAR3 (BINOP_UNONE_UNONE_IMM, vshlq_n_u, v16qi, v8hi, v4si)
|
|||
VAR3 (BINOP_UNONE_UNONE_IMM, vrshrq_n_u, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_UNONE_IMM, vqshlq_n_u, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_n_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_PRED_NONE_NONE, vcmpltq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_n_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_PRED_NONE_NONE, vcmpleq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_n_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_PRED_NONE_NONE, vcmpgtq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpgeq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_PRED_NONE_NONE, vcmpgeq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpgeq_n_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpeqq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_PRED_NONE_NONE, vcmpeqq_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_NONE, vcmpeqq_n_, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_UNONE_NONE_IMM, vqshluq_n_s, v16qi, v8hi, v4si)
|
||||
VAR3 (BINOP_NONE_NONE_UNONE, vaddvq_p_s, v16qi, v8hi, v4si)
|
||||
|
@ -219,17 +219,17 @@ VAR2 (BINOP_UNONE_UNONE_IMM, vshllbq_n_u, v16qi, v8hi)
|
|||
VAR2 (BINOP_UNONE_UNONE_IMM, vorrq_n_u, v8hi, v4si)
|
||||
VAR2 (BINOP_UNONE_UNONE_IMM, vbicq_n_u, v8hi, v4si)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpneq_n_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpneq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_PRED_NONE_NONE, vcmpneq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpltq_n_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpltq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_PRED_NONE_NONE, vcmpltq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpleq_n_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpleq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_PRED_NONE_NONE, vcmpleq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpgtq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_PRED_NONE_NONE, vcmpgtq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpgeq_n_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpgeq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_PRED_NONE_NONE, vcmpgeq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpeqq_n_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_UNONE_NONE_NONE, vcmpeqq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_PRED_NONE_NONE, vcmpeqq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_NONE_NONE_NONE, vsubq_f, v8hf, v4sf)
|
||||
VAR2 (BINOP_NONE_NONE_NONE, vqmovntq_s, v8hi, v4si)
|
||||
VAR2 (BINOP_NONE_NONE_NONE, vqmovnbq_s, v8hi, v4si)
|
||||
|
@ -295,8 +295,8 @@ VAR2 (TERNOP_UNONE_UNONE_NONE_UNONE, vcvtaq_m_u, v8hi, v4si)
|
|||
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vcvtaq_m_s, v8hi, v4si)
|
||||
VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vshlcq_vec_u, v16qi, v8hi, v4si)
|
||||
VAR3 (TERNOP_NONE_NONE_UNONE_IMM, vshlcq_vec_s, v16qi, v8hi, v4si)
|
||||
VAR4 (TERNOP_UNONE_UNONE_UNONE_UNONE, vpselq_u, v16qi, v8hi, v4si, v2di)
|
||||
VAR4 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_s, v16qi, v8hi, v4si, v2di)
|
||||
VAR4 (TERNOP_UNONE_UNONE_UNONE_PRED, vpselq_u, v16qi, v8hi, v4si, v2di)
|
||||
VAR4 (TERNOP_NONE_NONE_NONE_PRED, vpselq_s, v16qi, v8hi, v4si, v2di)
|
||||
VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrev64q_m_u, v16qi, v8hi, v4si)
|
||||
VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmvnq_m_u, v16qi, v8hi, v4si)
|
||||
VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlasq_n_u, v16qi, v8hi, v4si)
|
||||
|
@ -426,7 +426,7 @@ VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrev64q_m_f, v8hf, v4sf)
|
|||
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrev32q_m_s, v16qi, v8hi)
|
||||
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vqmovntq_m_s, v8hi, v4si)
|
||||
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vqmovnbq_m_s, v8hi, v4si)
|
||||
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_f, v8hf, v4sf)
|
||||
VAR2 (TERNOP_NONE_NONE_NONE_PRED, vpselq_f, v8hf, v4sf)
|
||||
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vnegq_m_f, v8hf, v4sf)
|
||||
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmovntq_m_s, v8hi, v4si)
|
||||
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmovnbq_m_s, v8hi, v4si)
|
||||
|
|
|
@ -312,6 +312,12 @@
|
|||
(and (match_code "const_vector")
|
||||
(match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)")))
|
||||
|
||||
(define_constraint "DB"
|
||||
"@internal
|
||||
In ARM/Thumb-2 state with MVE a constant vector of booleans."
|
||||
(and (match_code "const_vector")
|
||||
(match_test "TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL")))
|
||||
|
||||
(define_constraint "Da"
|
||||
"@internal
|
||||
In ARM/Thumb-2 state a const_int, const_double or const_vector that can
|
||||
|
|
|
@ -272,6 +272,8 @@
|
|||
(define_mode_iterator MVE_2 [V16QI V8HI V4SI])
|
||||
(define_mode_iterator MVE_5 [V8HI V4SI])
|
||||
(define_mode_iterator MVE_6 [V8HI V4SI])
|
||||
(define_mode_iterator MVE_7 [V16BI V8BI V4BI])
|
||||
(define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI])
|
||||
|
||||
;;----------------------------------------------------------------------------
|
||||
;; Code iterators
|
||||
|
@ -946,6 +948,10 @@
|
|||
(V8HF "u16") (V4SF "32")])
|
||||
(define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w")
|
||||
(V8HF "=w") (V4SF "=&w")])
|
||||
(define_mode_attr MVE_VPRED [(V16QI "V16BI") (V8HI "V8BI") (V4SI "V4BI")
|
||||
(V2DI "HI") (V8HF "V8BI") (V4SF "V4BI")])
|
||||
(define_mode_attr MVE_vpred [(V16QI "v16bi") (V8HI "v8bi") (V4SI "v4bi")
|
||||
(V2DI "hi") (V8HF "v8bi") (V4SF "v4bi")])
|
||||
|
||||
;;----------------------------------------------------------------------------
|
||||
;; Code attributes
|
||||
|
|
|
@ -839,8 +839,8 @@
|
|||
;;
|
||||
(define_insn "@mve_vcmp<mve_cmp_op>q_<mode>"
|
||||
[
|
||||
(set (match_operand:HI 0 "vpr_register_operand" "=Up")
|
||||
(MVE_COMPARISONS:HI (match_operand:MVE_2 1 "s_register_operand" "w")
|
||||
(set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
|
||||
(MVE_COMPARISONS:<MVE_VPRED> (match_operand:MVE_2 1 "s_register_operand" "w")
|
||||
(match_operand:MVE_2 2 "s_register_operand" "w")))
|
||||
]
|
||||
"TARGET_HAVE_MVE"
|
||||
|
@ -1929,8 +1929,8 @@
|
|||
;;
|
||||
(define_insn "@mve_vcmp<mve_cmp_op>q_f<mode>"
|
||||
[
|
||||
(set (match_operand:HI 0 "vpr_register_operand" "=Up")
|
||||
(MVE_FP_COMPARISONS:HI (match_operand:MVE_0 1 "s_register_operand" "w")
|
||||
(set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
|
||||
(MVE_FP_COMPARISONS:<MVE_VPRED> (match_operand:MVE_0 1 "s_register_operand" "w")
|
||||
(match_operand:MVE_0 2 "s_register_operand" "w")))
|
||||
]
|
||||
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
|
||||
|
@ -3324,7 +3324,7 @@
|
|||
(set (match_operand:MVE_1 0 "s_register_operand" "=w")
|
||||
(unspec:MVE_1 [(match_operand:MVE_1 1 "s_register_operand" "w")
|
||||
(match_operand:MVE_1 2 "s_register_operand" "w")
|
||||
(match_operand:HI 3 "vpr_register_operand" "Up")]
|
||||
(match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
|
||||
VPSELQ))
|
||||
]
|
||||
"TARGET_HAVE_MVE"
|
||||
|
@ -4419,7 +4419,7 @@
|
|||
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
|
||||
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
|
||||
(match_operand:MVE_0 2 "s_register_operand" "w")
|
||||
(match_operand:HI 3 "vpr_register_operand" "Up")]
|
||||
(match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
|
||||
VPSELQ_F))
|
||||
]
|
||||
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
|
||||
|
@ -10516,3 +10516,14 @@
|
|||
"vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1"
|
||||
[(set_attr "type" "mve_load")]
|
||||
)
|
||||
|
||||
;; Expander for VxBI moves
|
||||
(define_expand "mov<mode>"
|
||||
[(set (match_operand:MVE_7 0 "nonimmediate_operand")
|
||||
(match_operand:MVE_7 1 "general_operand"))]
|
||||
"TARGET_HAVE_MVE"
|
||||
{
|
||||
if (!register_operand (operands[0], <MODE>mode))
|
||||
operands[1] = force_reg (<MODE>mode, operands[1]);
|
||||
}
|
||||
)
|
||||
|
|
|
@ -73,21 +73,26 @@
|
|||
|
||||
(define_insn "*thumb2_movhi_vfp"
|
||||
[(set
|
||||
(match_operand:HI 0 "nonimmediate_operand"
|
||||
(match_operand:MVE_7_HI 0 "nonimmediate_operand"
|
||||
"=rk, r, l, r, m, r, *t, r, *t, Up, r")
|
||||
(match_operand:HI 1 "general_operand"
|
||||
"rk, I, Py, n, r, m, r, *t, *t, r, Up"))]
|
||||
(match_operand:MVE_7_HI 1 "general_operand"
|
||||
"rk, IDB, Py, n, r, m, r, *t, *t, r, Up"))]
|
||||
"TARGET_THUMB2 && TARGET_VFP_BASE
|
||||
&& !TARGET_VFP_FP16INST
|
||||
&& (register_operand (operands[0], HImode)
|
||||
|| register_operand (operands[1], HImode))"
|
||||
&& (register_operand (operands[0], <MODE>mode)
|
||||
|| register_operand (operands[1], <MODE>mode))"
|
||||
{
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
return "mov%?\t%0, %1\t%@ movhi";
|
||||
case 1:
|
||||
if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_BOOL)
|
||||
operands[1] = mve_const_bool_vec_to_hi (operands[1]);
|
||||
else
|
||||
operands[1] = gen_lowpart (HImode, operands[1]);
|
||||
return "mov%?\t%0, %1\t%@ movhi";
|
||||
case 3:
|
||||
return "movw%?\t%0, %L1\t%@ movhi";
|
||||
case 4:
|
||||
|
@ -173,20 +178,25 @@
|
|||
|
||||
(define_insn "*thumb2_movhi_fp16"
|
||||
[(set
|
||||
(match_operand:HI 0 "nonimmediate_operand"
|
||||
(match_operand:MVE_7_HI 0 "nonimmediate_operand"
|
||||
"=rk, r, l, r, m, r, *t, r, *t, Up, r")
|
||||
(match_operand:HI 1 "general_operand"
|
||||
"rk, I, Py, n, r, m, r, *t, *t, r, Up"))]
|
||||
(match_operand:MVE_7_HI 1 "general_operand"
|
||||
"rk, IDB, Py, n, r, m, r, *t, *t, r, Up"))]
|
||||
"TARGET_THUMB2 && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE)
|
||||
&& (register_operand (operands[0], HImode)
|
||||
|| register_operand (operands[1], HImode))"
|
||||
&& (register_operand (operands[0], <MODE>mode)
|
||||
|| register_operand (operands[1], <MODE>mode))"
|
||||
{
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
return "mov%?\t%0, %1\t%@ movhi";
|
||||
case 1:
|
||||
if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_BOOL)
|
||||
operands[1] = mve_const_bool_vec_to_hi (operands[1]);
|
||||
else
|
||||
operands[1] = gen_lowpart (HImode, operands[1]);
|
||||
return "mov%?\t%0, %1\t%@ movhi";
|
||||
case 3:
|
||||
return "movw%?\t%0, %L1\t%@ movhi";
|
||||
case 4:
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
/* { dg-do compile { target arm*-*-* } } */
|
||||
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
|
||||
/* { dg-add-options arm_v8_1m_mve } */
|
||||
/* { dg-additional-options "-O2" } */
|
||||
|
||||
void __RTL (startwith ("ira")) foo (void *ptr)
|
||||
{
|
||||
(function "foo"
|
||||
(param "ptr"
|
||||
(DECL_RTL (reg/v:SI <0> [ ptr ]))
|
||||
(DECL_RTL_INCOMING (reg:SI r0 [ ptr ]))
|
||||
) ;; param "n"
|
||||
(insn-chain
|
||||
(block 2
|
||||
(edge-from entry (flags "FALLTHRU"))
|
||||
(cnote 5 [bb 2] NOTE_INSN_BASIC_BLOCK)
|
||||
(insn 7 (set (reg:V4BI <1>)
|
||||
(const_vector:V4BI [(const_int 1)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 1)])) (nil))
|
||||
(insn 8 (set (mem:V4BI (reg:SI <0>) [1 ptr+0 S2 A16]) (reg:V4BI <1>)))
|
||||
(edge-to exit (flags "FALLTHRU"))
|
||||
) ;; block 2
|
||||
) ;; insn-chain
|
||||
) ;; function
|
||||
}
|
||||
|
||||
void __RTL (startwith ("ira")) foo2 (void *ptr)
|
||||
{
|
||||
(function "foo"
|
||||
(param "ptr"
|
||||
(DECL_RTL (reg/v:SI <0> [ ptr ]))
|
||||
(DECL_RTL_INCOMING (reg:SI r0 [ ptr ]))
|
||||
) ;; param "n"
|
||||
(insn-chain
|
||||
(block 2
|
||||
(edge-from entry (flags "FALLTHRU"))
|
||||
(cnote 5 [bb 2] NOTE_INSN_BASIC_BLOCK)
|
||||
(insn 7 (set (reg:V8BI <1>)
|
||||
(const_vector:V8BI [(const_int 1)
|
||||
(const_int 0)
|
||||
(const_int 1)
|
||||
(const_int 1)
|
||||
(const_int 1)
|
||||
(const_int 1)
|
||||
(const_int 0)
|
||||
(const_int 1)])) (nil))
|
||||
(insn 8 (set (mem:V8BI (reg:SI <0>) [1 ptr+0 S2 A16]) (reg:V8BI <1>)))
|
||||
(edge-to exit (flags "FALLTHRU"))
|
||||
) ;; block 2
|
||||
) ;; insn-chain
|
||||
) ;; function
|
||||
}
|
||||
|
||||
void __RTL (startwith ("ira")) foo3 (void *ptr)
|
||||
{
|
||||
(function "foo"
|
||||
(param "ptr"
|
||||
(DECL_RTL (reg/v:SI <0> [ ptr ]))
|
||||
(DECL_RTL_INCOMING (reg:SI r0 [ ptr ]))
|
||||
) ;; param "n"
|
||||
(insn-chain
|
||||
(block 2
|
||||
(edge-from entry (flags "FALLTHRU"))
|
||||
(cnote 5 [bb 2] NOTE_INSN_BASIC_BLOCK)
|
||||
(insn 7 (set (reg:V16BI <1>)
|
||||
(const_vector:V16BI [(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)
|
||||
(const_int 0)])) (nil))
|
||||
(insn 8 (set (mem:V16BI (reg:SI <0>) [1 ptr+0 S2 A16]) (reg:V16BI <1>)))
|
||||
(edge-to exit (flags "FALLTHRU"))
|
||||
) ;; block 2
|
||||
) ;; insn-chain
|
||||
) ;; function
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target arm_mve_hw } */
|
||||
/* { dg-options "-O3" } */
|
||||
/* { dg-add-options arm_v8_1m_mve } */
|
||||
|
||||
#include <arm_mve.h>
|
||||
|
||||
|
||||
__attribute((noipa))
|
||||
unsigned foo(int8x16_t v, int8x16_t w)
|
||||
{
|
||||
return vcmpeqq (v, w);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
if (foo (vdupq_n_s8(0), vdupq_n_s8(0)) != 0xffffU)
|
||||
__builtin_abort ();
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
|
||||
/* { dg-add-options arm_v8_1m_mve } */
|
||||
/* { dg-additional-options "-O3" } */
|
||||
|
||||
#include <arm_mve.h>
|
||||
|
||||
unsigned foo(int8x16_t v, int8x16_t w)
|
||||
{
|
||||
return vcmpeqq (v, w);
|
||||
}
|
||||
/* { dg-final { scan-assembler {\tvcmp.i8 eq} } } */
|
||||
/* { dg-final { scan-assembler {\tvmrs\tr[0-9]+, P0} } } */
|
||||
/* { dg-final { scan-assembler {\tuxth} } } */
|
|
@ -5038,6 +5038,7 @@ proc check_effective_target_arm_cmse_hw { } {
|
|||
}
|
||||
} "-mcmse"]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports executing MVE instructions, 0
|
||||
# otherwise.
|
||||
|
||||
|
@ -5053,7 +5054,7 @@ proc check_effective_target_arm_mve_hw {} {
|
|||
: "0" (a), "r" (b));
|
||||
return (a != 2);
|
||||
}
|
||||
} ""]
|
||||
} [add_options_for_arm_v8_1m_mve_fp ""]]
|
||||
}
|
||||
|
||||
# Return 1 if this is an ARM target where ARMv8-M Security Extensions with
|
||||
|
|
Loading…
Reference in New Issue