tree-optimization/95866 - avoid using scalar ops for vectorized shift

This avoids using the original scalar SSA operand when vectorizing
a shift with a vectorized shift operand where we know all vector
components have the same value and thus we can use a vector by
scalar shift.  Using the scalar SSA operand causes a possibly
long chain of scalar computation to be retained so it's better
to simply extract lane zero from the available vectorized shift
operand.

2020-06-25  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/95866
	* tree-vect-stmts.c (vectorizable_shift): Reject incompatible
	vectorized shift operands.  For scalar shifts use lane zero
	of a vectorized shift operand.

	* gcc.dg/vect/bb-slp-pr95866.c: New testcase.
This commit is contained in:
Richard Biener 2020-06-25 11:21:20 +02:00
parent 88891c5ff0
commit 86ce59b4f0
2 changed files with 44 additions and 3 deletions

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
int x[4];
int j[4];
void foo()
{
x[0] = (x[0] << j[0]) + j[0];
x[1] = (x[1] << j[0]) + j[1];
x[2] = (x[2] << j[0]) + j[2];
x[3] = (x[3] << j[0]) + j[3];
}
/* The scalar shift argument should be extracted from the available vector. */
/* { dg-final { scan-tree-dump "BIT_FIELD_REF" "slp2" } } */
/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */

View File

@ -5413,6 +5413,15 @@ vectorizable_shift (vec_info *vinfo,
= (!op1_vectype
|| !tree_nop_conversion_p (TREE_TYPE (vectype),
TREE_TYPE (op1)));
if (incompatible_op1_vectype_p
&& dt[1] == vect_internal_def)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unusable type for last operand in"
" vector/vector shift/rotate.\n");
return false;
}
}
}
}
@ -5457,7 +5466,7 @@ vectorizable_shift (vec_info *vinfo,
{
if (slp_node
&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
|| (!scalar_shift_arg
|| ((!scalar_shift_arg || dt[1] == vect_internal_def)
&& (!incompatible_op1_vectype_p
|| dt[1] == vect_constant_def)
&& !vect_maybe_update_slp_op_vectype
@ -5499,6 +5508,7 @@ vectorizable_shift (vec_info *vinfo,
if (incompatible_op1_vectype_p && !slp_node)
{
gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
op1 = fold_convert (TREE_TYPE (vectype), op1);
if (dt[1] != vect_constant_def)
op1 = vect_init_vector (vinfo, stmt_info, op1,
@ -5508,7 +5518,7 @@ vectorizable_shift (vec_info *vinfo,
/* Handle def. */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
if (scalar_shift_arg)
if (scalar_shift_arg && dt[1] != vect_internal_def)
{
/* Vector shl and shr insn patterns can be defined with scalar
operand 2 (shift operand). In this case, use constant or loop
@ -5533,7 +5543,7 @@ vectorizable_shift (vec_info *vinfo,
vec_oprnds1.quick_push (vec_oprnd1);
}
}
else if (slp_node && incompatible_op1_vectype_p)
else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
{
if (was_scalar_shift_arg)
{
@ -5566,6 +5576,20 @@ vectorizable_shift (vec_info *vinfo,
FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
{
vop1 = vec_oprnds1[i];
/* For internal defs where we need to use a scalar shift arg
extract the first lane. */
if (scalar_shift_arg && dt[1] == vect_internal_def)
{
new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
gassign *new_stmt
= gimple_build_assign (new_temp,
build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
vop1,
TYPE_SIZE (TREE_TYPE (new_temp)),
bitsize_zero_node));
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
vop1 = new_temp;
}
gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);