re PR tree-optimization/92324 (ICE in expand_direct_optab_fn, at internal-fn.c:2890)

2019-11-15  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/92324
	* tree-vect-loop.c (vect_create_epilog_for_reduction): Fix
	singedness of SLP reduction epilouge operations.  Also reduce
	the vector width for SLP reductions before doing elementwise
	operations if possible.

	* gcc.dg/vect/pr92324-4.c: New testcase.

From-SVN: r278289
This commit is contained in:
Richard Biener 2019-11-15 12:48:34 +00:00 committed by Richard Biener
parent 8eea62d8ab
commit d03431d0f5
4 changed files with 81 additions and 30 deletions

View File

@ -1,3 +1,11 @@
2019-11-15 Richard Biener <rguenther@suse.de>
PR tree-optimization/92324
* tree-vect-loop.c (vect_create_epilog_for_reduction): Fix
singedness of SLP reduction epilouge operations. Also reduce
the vector width for SLP reductions before doing elementwise
operations if possible.
2019-11-15 Matthew Malcomson <matthew.malcomson@arm.com>
* passes.c (skip_pass): Set epilogue_completed if skipping the

View File

@ -1,3 +1,8 @@
2019-11-15 Richard Biener <rguenther@suse.de>
PR tree-optimization/92324
* gcc.dg/vect/pr92324-4.c: New testcase.
2019-11-15 Paul Thomas <pault@gcc.gnu.org>
PR fortran/69654

View File

@ -0,0 +1,30 @@
#include "tree-vect.h"
unsigned a[1024];
int gres1, gres2;
int __attribute__((noipa))
foo (int n)
{
int res1 = 0;
int res2 = 0;
for (int i = 0; i < n; ++i)
{
res1 = res1 > a[2*i] ? res1 : a[2*i];
res2 = res2 > a[2*i+1] ? res2 : a[2*i+1];
}
gres1 = res1;
gres2 = res2;
}
int main ()
{
check_vect ();
a[30] = (unsigned)__INT_MAX__ + 1;
a[31] = (unsigned)__INT_MAX__ + 1;
foo (16);
if (gres1 != -__INT_MAX__ - 1
|| gres2 != -__INT_MAX__ - 1)
__builtin_abort ();
return 0;
}

View File

@ -4930,6 +4930,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
bool reduce_with_shift;
tree vec_temp;
gcc_assert (slp_reduc || new_phis.length () == 1);
/* See if the target wants to do the final (shift) reduction
in a vector mode of smaller size and first reduce upper/lower
halves against each other. */
@ -4937,6 +4939,21 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
tree stype = TREE_TYPE (vectype);
unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
unsigned nunits1 = nunits;
if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode
&& new_phis.length () == 1)
{
nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
/* For SLP reductions we have to make sure lanes match up, but
since we're doing individual element final reduction reducing
vector width here is even more important.
??? We can also separate lanes with permutes, for the common
case of power-of-two group-size odd/even extracts would work. */
if (slp_reduc && nunits != nunits1)
{
nunits1 = least_common_multiple (nunits1, group_size);
gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits);
}
}
if (!slp_reduc
&& (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
@ -4958,7 +4975,6 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
new_temp = new_phi_result;
while (nunits > nunits1)
{
gcc_assert (!slp_reduc);
nunits /= 2;
vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
stype, nunits);
@ -5113,6 +5129,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
int element_bitsize = tree_to_uhwi (bitsize);
tree compute_type = TREE_TYPE (vectype);
gimple_seq stmts = NULL;
FOR_EACH_VEC_ELT (new_phis, i, new_phi)
{
int bit_offset;
@ -5120,12 +5138,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
vec_temp = PHI_RESULT (new_phi);
else
vec_temp = gimple_assign_lhs (new_phi);
tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
bitsize_zero_node);
epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type,
vec_temp, bitsize, bitsize_zero_node);
/* In SLP we don't need to apply reduction operation, so we just
collect s' values in SCALAR_RESULTS. */
@ -5137,14 +5151,9 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
bit_offset += element_bitsize)
{
tree bitpos = bitsize_int (bit_offset);
tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
bitsize, bitpos);
epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_name);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
new_name = gimple_build (&stmts, BIT_FIELD_REF,
compute_type, vec_temp,
bitsize, bitpos);
if (slp_reduc)
{
/* In SLP we don't need to apply reduction operation, so
@ -5153,13 +5162,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
scalar_results.safe_push (new_name);
}
else
{
epilog_stmt = gimple_build_assign (new_scalar_dest, code,
new_name, new_temp);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
}
new_temp = gimple_build (&stmts, code, compute_type,
new_name, new_temp);
}
}
@ -5170,24 +5174,28 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
if (slp_reduc)
{
tree res, first_res, new_res;
gimple *new_stmt;
/* Reduce multiple scalar results in case of SLP unrolling. */
for (j = group_size; scalar_results.iterate (j, &res);
j++)
{
first_res = scalar_results[j % group_size];
new_stmt = gimple_build_assign (new_scalar_dest, code,
first_res, res);
new_res = make_ssa_name (new_scalar_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_res);
gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
new_res = gimple_build (&stmts, code, compute_type,
first_res, res);
scalar_results[j % group_size] = new_res;
}
for (k = 0; k < group_size; k++)
scalar_results[k] = gimple_convert (&stmts, scalar_type,
scalar_results[k]);
}
else
/* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
scalar_results.safe_push (new_temp);
{
/* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
new_temp = gimple_convert (&stmts, scalar_type, new_temp);
scalar_results.safe_push (new_temp);
}
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
}
if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)