[AArch64] PR/64134: Make aarch64_expand_vector_init use 'ins' more often
gcc/: PR target/64134 * config/aarch64/aarch64.c (aarch64_expand_vector_init): Load constant and overwrite variable parts if <= 1/2 the elements are variable. gcc/testsuite/: PR target/64134 * gcc.target/aarch64/vec_init_1.c: New test. From-SVN: r222229
This commit is contained in:
parent
86f25864e7
commit
8b66a2d43b
|
@ -1,3 +1,9 @@
|
|||
2015-04-20 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
PR target/64134
|
||||
* config/aarch64/aarch64.c (aarch64_expand_vector_init): Load constant
|
||||
and overwrite variable parts if <= 1/2 the elements are variable.
|
||||
|
||||
2015-04-19 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
PR rtl-optimization/65805
|
||||
|
|
|
@ -8769,22 +8769,19 @@ aarch64_expand_vector_init (rtx target, rtx vals)
|
|||
machine_mode mode = GET_MODE (target);
|
||||
machine_mode inner_mode = GET_MODE_INNER (mode);
|
||||
int n_elts = GET_MODE_NUNITS (mode);
|
||||
int n_var = 0, one_var = -1;
|
||||
int n_var = 0;
|
||||
rtx any_const = NULL_RTX;
|
||||
bool all_same = true;
|
||||
rtx x, mem;
|
||||
int i;
|
||||
|
||||
x = XVECEXP (vals, 0, 0);
|
||||
if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
|
||||
n_var = 1, one_var = 0;
|
||||
|
||||
for (i = 1; i < n_elts; ++i)
|
||||
for (int i = 0; i < n_elts; ++i)
|
||||
{
|
||||
x = XVECEXP (vals, 0, i);
|
||||
rtx x = XVECEXP (vals, 0, i);
|
||||
if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
|
||||
++n_var, one_var = i;
|
||||
++n_var;
|
||||
else
|
||||
any_const = x;
|
||||
|
||||
if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
|
||||
if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
|
||||
all_same = false;
|
||||
}
|
||||
|
||||
|
@ -8801,36 +8798,60 @@ aarch64_expand_vector_init (rtx target, rtx vals)
|
|||
/* Splat a single non-constant element if we can. */
|
||||
if (all_same)
|
||||
{
|
||||
x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
|
||||
rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
|
||||
aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
|
||||
return;
|
||||
}
|
||||
|
||||
/* One field is non-constant. Load constant then overwrite varying
|
||||
field. This is more efficient than using the stack. */
|
||||
if (n_var == 1)
|
||||
/* Half the fields (or less) are non-constant. Load constant then overwrite
|
||||
varying fields. Hope that this is more efficient than using the stack. */
|
||||
if (n_var <= n_elts/2)
|
||||
{
|
||||
rtx copy = copy_rtx (vals);
|
||||
rtx index = GEN_INT (one_var);
|
||||
enum insn_code icode;
|
||||
|
||||
/* Load constant part of vector, substitute neighboring value for
|
||||
varying element. */
|
||||
XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
|
||||
/* Load constant part of vector. We really don't care what goes into the
|
||||
parts we will overwrite, but we're more likely to be able to load the
|
||||
constant efficiently if it has fewer, larger, repeating parts
|
||||
(see aarch64_simd_valid_immediate). */
|
||||
for (int i = 0; i < n_elts; i++)
|
||||
{
|
||||
rtx x = XVECEXP (vals, 0, i);
|
||||
if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
|
||||
continue;
|
||||
rtx subst = any_const;
|
||||
for (int bit = n_elts / 2; bit > 0; bit /= 2)
|
||||
{
|
||||
/* Look in the copied vector, as more elements are const. */
|
||||
rtx test = XVECEXP (copy, 0, i ^ bit);
|
||||
if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
|
||||
{
|
||||
subst = test;
|
||||
break;
|
||||
}
|
||||
}
|
||||
XVECEXP (copy, 0, i) = subst;
|
||||
}
|
||||
aarch64_expand_vector_init (target, copy);
|
||||
|
||||
/* Insert variable. */
|
||||
x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
|
||||
icode = optab_handler (vec_set_optab, mode);
|
||||
/* Insert variables. */
|
||||
enum insn_code icode = optab_handler (vec_set_optab, mode);
|
||||
gcc_assert (icode != CODE_FOR_nothing);
|
||||
emit_insn (GEN_FCN (icode) (target, x, index));
|
||||
|
||||
for (int i = 0; i < n_elts; i++)
|
||||
{
|
||||
rtx x = XVECEXP (vals, 0, i);
|
||||
if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
|
||||
continue;
|
||||
x = copy_to_mode_reg (inner_mode, x);
|
||||
emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Construct the vector in memory one field at a time
|
||||
and load the whole vector. */
|
||||
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
|
||||
for (i = 0; i < n_elts; i++)
|
||||
rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
|
||||
for (int i = 0; i < n_elts; i++)
|
||||
emit_move_insn (adjust_address_nv (mem, inner_mode,
|
||||
i * GET_MODE_SIZE (inner_mode)),
|
||||
XVECEXP (vals, 0, i));
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2015-04-20 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
PR target/64134
|
||||
* gcc.target/aarch64/vec_init_1.c: New test.
|
||||
|
||||
2015-04-20 Yvan Roux <yvan.roux@linaro.org>
|
||||
|
||||
* gcc.target/arm/pr65729.c: Restrict to hard float ABI compliant
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-inline" } */
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
typedef float float16x4_t __attribute__ ((vector_size ((16))));
|
||||
|
||||
float a;
|
||||
float b;
|
||||
|
||||
float16x4_t
|
||||
make_vector ()
|
||||
{
|
||||
return (float16x4_t) { 0, 0, a, b };
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
a = 4.0;
|
||||
b = 3.0;
|
||||
float16x4_t vec = make_vector ();
|
||||
if (vec[0] != 0 || vec[1] != 0 || vec[2] != a || vec[3] != b)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "ins\\t" 2 } } */
|
||||
/* What we want to check, is that make_vector does not stp the whole vector
|
||||
to the stack. Unfortunately here we scan the body of main() too, which may
|
||||
be a bit fragile - the test is currently passing only because of the option
|
||||
-fomit-frame-pointer which avoids use of stp in the prologue to main(). */
|
||||
/* { dg-final { scan-assembler-not "stp\\t" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
Loading…
Reference in New Issue