tree-optimization/95866 - avoid using scalar ops for vectorized shift
This avoids using the original scalar SSA operand when vectorizing a shift with a vectorized shift operand where we know all vector components have the same value and thus we can use a vector by scalar shift. Using the scalar SSA operand causes a possibly long chain of scalar computation to be retained so it's better to simply extract lane zero from the available vectorized shift operand. 2020-06-25 Richard Biener <rguenther@suse.de> PR tree-optimization/95866 * tree-vect-stmts.c (vectorizable_shift): Reject incompatible vectorized shift operands. For scalar shifts use lane zero of a vectorized shift operand. * gcc.dg/vect/bb-slp-pr95866.c: New testcase.
This commit is contained in:
parent
88891c5ff0
commit
86ce59b4f0
17
gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c
Normal file
17
gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
/* { dg-require-effective-target vect_shift } */
|
||||||
|
|
||||||
|
int x[4];
|
||||||
|
int j[4];
|
||||||
|
void foo()
|
||||||
|
{
|
||||||
|
x[0] = (x[0] << j[0]) + j[0];
|
||||||
|
x[1] = (x[1] << j[0]) + j[1];
|
||||||
|
x[2] = (x[2] << j[0]) + j[2];
|
||||||
|
x[3] = (x[3] << j[0]) + j[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The scalar shift argument should be extracted from the available vector. */
|
||||||
|
/* { dg-final { scan-tree-dump "BIT_FIELD_REF" "slp2" } } */
|
||||||
|
/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */
|
@ -5413,6 +5413,15 @@ vectorizable_shift (vec_info *vinfo,
|
|||||||
= (!op1_vectype
|
= (!op1_vectype
|
||||||
|| !tree_nop_conversion_p (TREE_TYPE (vectype),
|
|| !tree_nop_conversion_p (TREE_TYPE (vectype),
|
||||||
TREE_TYPE (op1)));
|
TREE_TYPE (op1)));
|
||||||
|
if (incompatible_op1_vectype_p
|
||||||
|
&& dt[1] == vect_internal_def)
|
||||||
|
{
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||||
|
"unusable type for last operand in"
|
||||||
|
" vector/vector shift/rotate.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -5457,7 +5466,7 @@ vectorizable_shift (vec_info *vinfo,
|
|||||||
{
|
{
|
||||||
if (slp_node
|
if (slp_node
|
||||||
&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
|
&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
|
||||||
|| (!scalar_shift_arg
|
|| ((!scalar_shift_arg || dt[1] == vect_internal_def)
|
||||||
&& (!incompatible_op1_vectype_p
|
&& (!incompatible_op1_vectype_p
|
||||||
|| dt[1] == vect_constant_def)
|
|| dt[1] == vect_constant_def)
|
||||||
&& !vect_maybe_update_slp_op_vectype
|
&& !vect_maybe_update_slp_op_vectype
|
||||||
@ -5499,6 +5508,7 @@ vectorizable_shift (vec_info *vinfo,
|
|||||||
|
|
||||||
if (incompatible_op1_vectype_p && !slp_node)
|
if (incompatible_op1_vectype_p && !slp_node)
|
||||||
{
|
{
|
||||||
|
gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
|
||||||
op1 = fold_convert (TREE_TYPE (vectype), op1);
|
op1 = fold_convert (TREE_TYPE (vectype), op1);
|
||||||
if (dt[1] != vect_constant_def)
|
if (dt[1] != vect_constant_def)
|
||||||
op1 = vect_init_vector (vinfo, stmt_info, op1,
|
op1 = vect_init_vector (vinfo, stmt_info, op1,
|
||||||
@ -5508,7 +5518,7 @@ vectorizable_shift (vec_info *vinfo,
|
|||||||
/* Handle def. */
|
/* Handle def. */
|
||||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||||
|
|
||||||
if (scalar_shift_arg)
|
if (scalar_shift_arg && dt[1] != vect_internal_def)
|
||||||
{
|
{
|
||||||
/* Vector shl and shr insn patterns can be defined with scalar
|
/* Vector shl and shr insn patterns can be defined with scalar
|
||||||
operand 2 (shift operand). In this case, use constant or loop
|
operand 2 (shift operand). In this case, use constant or loop
|
||||||
@ -5533,7 +5543,7 @@ vectorizable_shift (vec_info *vinfo,
|
|||||||
vec_oprnds1.quick_push (vec_oprnd1);
|
vec_oprnds1.quick_push (vec_oprnd1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (slp_node && incompatible_op1_vectype_p)
|
else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
|
||||||
{
|
{
|
||||||
if (was_scalar_shift_arg)
|
if (was_scalar_shift_arg)
|
||||||
{
|
{
|
||||||
@ -5566,6 +5576,20 @@ vectorizable_shift (vec_info *vinfo,
|
|||||||
FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
|
FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
|
||||||
{
|
{
|
||||||
vop1 = vec_oprnds1[i];
|
vop1 = vec_oprnds1[i];
|
||||||
|
/* For internal defs where we need to use a scalar shift arg
|
||||||
|
extract the first lane. */
|
||||||
|
if (scalar_shift_arg && dt[1] == vect_internal_def)
|
||||||
|
{
|
||||||
|
new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
|
||||||
|
gassign *new_stmt
|
||||||
|
= gimple_build_assign (new_temp,
|
||||||
|
build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
|
||||||
|
vop1,
|
||||||
|
TYPE_SIZE (TREE_TYPE (new_temp)),
|
||||||
|
bitsize_zero_node));
|
||||||
|
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
|
||||||
|
vop1 = new_temp;
|
||||||
|
}
|
||||||
gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
|
gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
|
||||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||||
|
Loading…
Reference in New Issue
Block a user