PR target/83663 - Revert r255946
gcc/ 2018-01-08 Vidya Praveen <vidyapraveen@arm.com> PR target/83663 - Revert r255946 * config/aarch64/aarch64.c (aarch64_expand_vector_init): Modify code generation for cases where splatting a value is not useful. * simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge across a vec_duplicate and a paradoxical subreg forming a vector mode to a vec_concat. gcc/testsuite/ 2018-01-08 Vidya Praveen <vidyapraveen@arm.com> PR target/83663 - Revert r255946 * gcc.target/aarch64/vect-slp-dup.c: New. From-SVN: r256346
This commit is contained in:
parent
a00a42220e
commit
647c61f1c4
@ -1,3 +1,12 @@
|
||||
2018-01-08 Vidya Praveen <vidyapraveen@arm.com>
|
||||
|
||||
PR target/83663 - Revert r255946
|
||||
* config/aarch64/aarch64.c (aarch64_expand_vector_init): Modify code
|
||||
generation for cases where splatting a value is not useful.
|
||||
* simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge
|
||||
across a vec_duplicate and a paradoxical subreg forming a vector
|
||||
mode to a vec_concat.
|
||||
|
||||
2018-01-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
* config/arm/t-aprofile (MULTILIB_MATCHES): Add mapping rules for
|
||||
|
@ -12129,51 +12129,9 @@ aarch64_expand_vector_init (rtx target, rtx vals)
|
||||
maxv = matches[i][1];
|
||||
}
|
||||
|
||||
/* Create a duplicate of the most common element, unless all elements
|
||||
are equally useless to us, in which case just immediately set the
|
||||
vector register using the first element. */
|
||||
|
||||
if (maxv == 1)
|
||||
{
|
||||
/* For vectors of two 64-bit elements, we can do even better. */
|
||||
if (n_elts == 2
|
||||
&& (inner_mode == E_DImode
|
||||
|| inner_mode == E_DFmode))
|
||||
|
||||
{
|
||||
rtx x0 = XVECEXP (vals, 0, 0);
|
||||
rtx x1 = XVECEXP (vals, 0, 1);
|
||||
/* Combine can pick up this case, but handling it directly
|
||||
here leaves clearer RTL.
|
||||
|
||||
This is load_pair_lanes<mode>, and also gives us a clean-up
|
||||
for store_pair_lanes<mode>. */
|
||||
if (memory_operand (x0, inner_mode)
|
||||
&& memory_operand (x1, inner_mode)
|
||||
&& !STRICT_ALIGNMENT
|
||||
&& rtx_equal_p (XEXP (x1, 0),
|
||||
plus_constant (Pmode,
|
||||
XEXP (x0, 0),
|
||||
GET_MODE_SIZE (inner_mode))))
|
||||
{
|
||||
rtx t;
|
||||
if (inner_mode == DFmode)
|
||||
t = gen_load_pair_lanesdf (target, x0, x1);
|
||||
else
|
||||
t = gen_load_pair_lanesdi (target, x0, x1);
|
||||
emit_insn (t);
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
|
||||
aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
|
||||
maxelement = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
|
||||
aarch64_emit_move (target, gen_vec_duplicate (mode, x));
|
||||
}
|
||||
/* Create a duplicate of the most common element. */
|
||||
rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
|
||||
aarch64_emit_move (target, gen_vec_duplicate (mode, x));
|
||||
|
||||
/* Insert the rest. */
|
||||
for (int i = 0; i < n_elts; i++)
|
||||
|
@ -5888,57 +5888,6 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
|
||||
return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
|
||||
}
|
||||
|
||||
/* Replace:
|
||||
|
||||
(vec_merge:outer (vec_duplicate:outer x:inner)
|
||||
(subreg:outer y:inner 0)
|
||||
(const_int N))
|
||||
|
||||
with (vec_concat:outer x:inner y:inner) if N == 1,
|
||||
or (vec_concat:outer y:inner x:inner) if N == 2.
|
||||
We assume that degenrate cases (N == 0 or N == 3), which
|
||||
represent taking all elements from either input, are handled
|
||||
elsewhere.
|
||||
|
||||
Implicitly, this means we have a paradoxical subreg, but such
|
||||
a check is cheap, so make it anyway.
|
||||
|
||||
Only applies for vectors of two elements. */
|
||||
|
||||
if ((GET_CODE (op0) == VEC_DUPLICATE
|
||||
|| GET_CODE (op1) == VEC_DUPLICATE)
|
||||
&& GET_MODE (op0) == GET_MODE (op1)
|
||||
&& known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
|
||||
&& known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
|
||||
&& IN_RANGE (sel, 1, 2))
|
||||
{
|
||||
rtx newop0 = op0, newop1 = op1;
|
||||
|
||||
/* Canonicalize locally such that the VEC_DUPLICATE is always
|
||||
the first operand. */
|
||||
if (GET_CODE (newop1) == VEC_DUPLICATE)
|
||||
{
|
||||
std::swap (newop0, newop1);
|
||||
/* If we swap the operand order, we also need to swap
|
||||
the selector mask. */
|
||||
sel = sel == 1 ? 2 : 1;
|
||||
}
|
||||
|
||||
if (GET_CODE (newop1) == SUBREG
|
||||
&& paradoxical_subreg_p (newop1)
|
||||
&& subreg_lowpart_p (newop1)
|
||||
&& GET_MODE (SUBREG_REG (newop1))
|
||||
== GET_MODE (XEXP (newop0, 0)))
|
||||
{
|
||||
newop0 = XEXP (newop0, 0);
|
||||
newop1 = SUBREG_REG (newop1);
|
||||
if (sel == 2)
|
||||
std::swap (newop0, newop1);
|
||||
return simplify_gen_binary (VEC_CONCAT, mode,
|
||||
newop0, newop1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Replace (vec_merge (vec_duplicate x) (vec_duplicate y)
|
||||
(const_int n))
|
||||
with (vec_concat x y) or (vec_concat y x) depending on value
|
||||
|
@ -1,3 +1,8 @@
|
||||
2018-01-08 Vidya Praveen <vidyapraveen@arm.com>
|
||||
|
||||
PR target/83663 - Revert r255946
|
||||
* gcc.target/aarch64/vect-slp-dup.c: New.
|
||||
|
||||
2018-01-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
* gcc.target/arm/multilib.exp: Add fp16, dotprod and armv8.3-a
|
||||
|
@ -1,20 +0,0 @@
|
||||
/* { dg-do compile } */
|
||||
|
||||
/* { dg-options "-O3 -ftree-vectorize -fno-vect-cost-model" } */
|
||||
|
||||
void bar (double);
|
||||
|
||||
void
|
||||
foo (double *restrict in, double *restrict in2,
|
||||
double *restrict out1, double *restrict out2)
|
||||
{
|
||||
for (int i = 0; i < 1024; i++)
|
||||
{
|
||||
out1[i] = in[i] + 2.0 * in[i+128];
|
||||
out1[i+1] = in[i+1] + 2.0 * in2[i];
|
||||
bar (in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "dup\tv\[0-9\]+.2d, v\[0-9\]+" } } */
|
||||
|
Loading…
Reference in New Issue
Block a user