tree-optimization/95273 - more vectorizable_shift massaging

Covering all bases in vectorizable_shift is hard - this makes sure
to appropriately handle the case of PR95356 without breaking others.

2020-05-28  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/95273
	PR tree-optimization/95356
	* tree-vect-stmts.c (vectorizable_shift): Adjust when and to
	what we set the vector type of the shift operand SLP node
	again.

	* gcc.target/i386/pr95356.c: New testcase.
This commit is contained in:
Richard Biener 2020-05-28 10:27:48 +02:00
parent dd019ef073
commit 09df85393c
2 changed files with 130 additions and 1 deletions

View File

@ -0,0 +1,125 @@
/* { dg-do compile } */
/* { dg-options "-O3 -mavx512dq" } */
extern void abort (void);
long long a[16];
__attribute__((noinline, noclone)) void
f1 (void)
{
long long a0, a1, a2, a3;
a0 = a[0];
a1 = a[1];
a2 = a[2];
a3 = a[3];
a0 = a0 << 2;
a1 = a1 << 3;
a2 = a2 << 4;
a3 = a3 << 5;
a[0] = a0;
a[1] = a1;
a[2] = a2;
a[3] = a3;
}
__attribute__((noinline, noclone)) void
f2 (void)
{
long long a0, a1, a2, a3;
a0 = a[0];
a1 = a[1];
a2 = a[2];
a3 = a[3];
a0 = a0 << 2;
a1 = a1 << 2;
a2 = a2 << 2;
a3 = a3 << 2;
a[0] = a0;
a[1] = a1;
a[2] = a2;
a[3] = a3;
}
__attribute__((noinline, noclone)) void
f2a (int x)
{
long long a0, a1, a2, a3;
a0 = a[0];
a1 = a[1];
a2 = a[2];
a3 = a[3];
a0 = a0 << x;
a1 = a1 << 2;
a2 = a2 << 2;
a3 = a3 << 2;
a[0] = a0;
a[1] = a1;
a[2] = a2;
a[3] = a3;
}
__attribute__((noinline, noclone)) void
f2b (int x)
{
long long a0, a1, a2, a3;
a0 = a[0];
a1 = a[1];
a2 = a[2];
a3 = a[3];
a0 = a0 << 2;
a1 = a1 << 2;
a2 = a2 << x;
a3 = a3 << 2;
a[0] = a0;
a[1] = a1;
a[2] = a2;
a[3] = a3;
}
__attribute__((noinline, noclone)) void
f3 (int x)
{
long long a0, a1, a2, a3;
a0 = a[0];
a1 = a[1];
a2 = a[2];
a3 = a[3];
a0 = a0 << x;
a1 = a1 << x;
a2 = a2 << x;
a3 = a3 << x;
a[0] = a0;
a[1] = a1;
a[2] = a2;
a[3] = a3;
}
int
main ()
{
a[0] = 4LL;
a[1] = 3LL;
a[2] = 2LL;
a[3] = 1LL;
f1 ();
if (a[0] != (4LL << 2) || a[1] != (3LL << 3)
|| a[2] != (2LL << 4) || a[3] != (1LL << 5))
abort ();
f2 ();
if (a[0] != (4LL << 4) || a[1] != (3LL << 5)
|| a[2] != (2LL << 6) || a[3] != (1LL << 7))
abort ();
f3 (3);
if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
|| a[2] != (2LL << 9) || a[3] != (1LL << 10))
abort ();
f2a (3);
if (a[0] != (4LL << 10) || a[1] != (3LL << 10)
|| a[2] != (2LL << 11) || a[3] != (1LL << 12))
abort ();
f2b (3);
if (a[0] != (4LL << 12) || a[1] != (3LL << 12)
|| a[2] != (2LL << 14) || a[3] != (1LL << 14))
abort ();
return 0;
}

View File

@ -5792,7 +5792,11 @@ vectorizable_shift (vec_info *vinfo,
if (slp_node
&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
|| (!scalar_shift_arg
&& !vect_maybe_update_slp_op_vectype (slp_op1, vectype))))
&& (!incompatible_op1_vectype_p
|| dt[1] == vect_constant_def)
&& !vect_maybe_update_slp_op_vectype
(slp_op1,
incompatible_op1_vectype_p ? vectype : op1_vectype))))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,