re PR tree-optimization/92324 (ICE in expand_direct_optab_fn, at internal-fn.c:2890)
2019-11-15 Richard Biener <rguenther@suse.de> PR tree-optimization/92324 * tree-vect-loop.c (vect_create_epilog_for_reduction): Fix singedness of SLP reduction epilouge operations. Also reduce the vector width for SLP reductions before doing elementwise operations if possible. * gcc.dg/vect/pr92324-4.c: New testcase. From-SVN: r278289
This commit is contained in:
parent
8eea62d8ab
commit
d03431d0f5
@ -1,3 +1,11 @@
|
||||
2019-11-15 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/92324
|
||||
* tree-vect-loop.c (vect_create_epilog_for_reduction): Fix
|
||||
singedness of SLP reduction epilouge operations. Also reduce
|
||||
the vector width for SLP reductions before doing elementwise
|
||||
operations if possible.
|
||||
|
||||
2019-11-15 Matthew Malcomson <matthew.malcomson@arm.com>
|
||||
|
||||
* passes.c (skip_pass): Set epilogue_completed if skipping the
|
||||
|
@ -1,3 +1,8 @@
|
||||
2019-11-15 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/92324
|
||||
* gcc.dg/vect/pr92324-4.c: New testcase.
|
||||
|
||||
2019-11-15 Paul Thomas <pault@gcc.gnu.org>
|
||||
|
||||
PR fortran/69654
|
||||
|
30
gcc/testsuite/gcc.dg/vect/pr92324-4.c
Normal file
30
gcc/testsuite/gcc.dg/vect/pr92324-4.c
Normal file
@ -0,0 +1,30 @@
|
||||
#include "tree-vect.h"
|
||||
|
||||
unsigned a[1024];
|
||||
int gres1, gres2;
|
||||
|
||||
int __attribute__((noipa))
|
||||
foo (int n)
|
||||
{
|
||||
int res1 = 0;
|
||||
int res2 = 0;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
res1 = res1 > a[2*i] ? res1 : a[2*i];
|
||||
res2 = res2 > a[2*i+1] ? res2 : a[2*i+1];
|
||||
}
|
||||
gres1 = res1;
|
||||
gres2 = res2;
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
check_vect ();
|
||||
a[30] = (unsigned)__INT_MAX__ + 1;
|
||||
a[31] = (unsigned)__INT_MAX__ + 1;
|
||||
foo (16);
|
||||
if (gres1 != -__INT_MAX__ - 1
|
||||
|| gres2 != -__INT_MAX__ - 1)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
@ -4930,6 +4930,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
|
||||
bool reduce_with_shift;
|
||||
tree vec_temp;
|
||||
|
||||
gcc_assert (slp_reduc || new_phis.length () == 1);
|
||||
|
||||
/* See if the target wants to do the final (shift) reduction
|
||||
in a vector mode of smaller size and first reduce upper/lower
|
||||
halves against each other. */
|
||||
@ -4937,6 +4939,21 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
|
||||
tree stype = TREE_TYPE (vectype);
|
||||
unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
|
||||
unsigned nunits1 = nunits;
|
||||
if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode
|
||||
&& new_phis.length () == 1)
|
||||
{
|
||||
nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
|
||||
/* For SLP reductions we have to make sure lanes match up, but
|
||||
since we're doing individual element final reduction reducing
|
||||
vector width here is even more important.
|
||||
??? We can also separate lanes with permutes, for the common
|
||||
case of power-of-two group-size odd/even extracts would work. */
|
||||
if (slp_reduc && nunits != nunits1)
|
||||
{
|
||||
nunits1 = least_common_multiple (nunits1, group_size);
|
||||
gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits);
|
||||
}
|
||||
}
|
||||
if (!slp_reduc
|
||||
&& (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
|
||||
nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
|
||||
@ -4958,7 +4975,6 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
|
||||
new_temp = new_phi_result;
|
||||
while (nunits > nunits1)
|
||||
{
|
||||
gcc_assert (!slp_reduc);
|
||||
nunits /= 2;
|
||||
vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
|
||||
stype, nunits);
|
||||
@ -5113,6 +5129,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
|
||||
|
||||
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
|
||||
int element_bitsize = tree_to_uhwi (bitsize);
|
||||
tree compute_type = TREE_TYPE (vectype);
|
||||
gimple_seq stmts = NULL;
|
||||
FOR_EACH_VEC_ELT (new_phis, i, new_phi)
|
||||
{
|
||||
int bit_offset;
|
||||
@ -5120,12 +5138,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
|
||||
vec_temp = PHI_RESULT (new_phi);
|
||||
else
|
||||
vec_temp = gimple_assign_lhs (new_phi);
|
||||
tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
|
||||
bitsize_zero_node);
|
||||
epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
|
||||
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
gimple_assign_set_lhs (epilog_stmt, new_temp);
|
||||
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
||||
new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type,
|
||||
vec_temp, bitsize, bitsize_zero_node);
|
||||
|
||||
/* In SLP we don't need to apply reduction operation, so we just
|
||||
collect s' values in SCALAR_RESULTS. */
|
||||
@ -5137,14 +5151,9 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
|
||||
bit_offset += element_bitsize)
|
||||
{
|
||||
tree bitpos = bitsize_int (bit_offset);
|
||||
tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
|
||||
bitsize, bitpos);
|
||||
|
||||
epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
|
||||
new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
gimple_assign_set_lhs (epilog_stmt, new_name);
|
||||
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
||||
|
||||
new_name = gimple_build (&stmts, BIT_FIELD_REF,
|
||||
compute_type, vec_temp,
|
||||
bitsize, bitpos);
|
||||
if (slp_reduc)
|
||||
{
|
||||
/* In SLP we don't need to apply reduction operation, so
|
||||
@ -5153,13 +5162,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
|
||||
scalar_results.safe_push (new_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
epilog_stmt = gimple_build_assign (new_scalar_dest, code,
|
||||
new_name, new_temp);
|
||||
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
gimple_assign_set_lhs (epilog_stmt, new_temp);
|
||||
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
||||
}
|
||||
new_temp = gimple_build (&stmts, code, compute_type,
|
||||
new_name, new_temp);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5170,24 +5174,28 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
|
||||
if (slp_reduc)
|
||||
{
|
||||
tree res, first_res, new_res;
|
||||
gimple *new_stmt;
|
||||
|
||||
/* Reduce multiple scalar results in case of SLP unrolling. */
|
||||
for (j = group_size; scalar_results.iterate (j, &res);
|
||||
j++)
|
||||
{
|
||||
first_res = scalar_results[j % group_size];
|
||||
new_stmt = gimple_build_assign (new_scalar_dest, code,
|
||||
first_res, res);
|
||||
new_res = make_ssa_name (new_scalar_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_res);
|
||||
gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
|
||||
new_res = gimple_build (&stmts, code, compute_type,
|
||||
first_res, res);
|
||||
scalar_results[j % group_size] = new_res;
|
||||
}
|
||||
for (k = 0; k < group_size; k++)
|
||||
scalar_results[k] = gimple_convert (&stmts, scalar_type,
|
||||
scalar_results[k]);
|
||||
}
|
||||
else
|
||||
/* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
|
||||
scalar_results.safe_push (new_temp);
|
||||
{
|
||||
/* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
|
||||
new_temp = gimple_convert (&stmts, scalar_type, new_temp);
|
||||
scalar_results.safe_push (new_temp);
|
||||
}
|
||||
|
||||
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
|
||||
}
|
||||
|
||||
if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
|
||||
|
Loading…
Reference in New Issue
Block a user