re PR tree-optimization/66051 (can't vectorize reductions inside an SLP group)
2015-12-03 Richard Biener <rguenther@suse.de> PR tree-optimization/66051 * tree-vect-slp.c (vect_build_slp_tree_1): Remove restriction on load group size. Do not pass in vectorization_factor. (vect_transform_slp_perm_load): Do not require any permute support. (vect_build_slp_tree): Do not pass in vectorization factor. (vect_analyze_slp_instance): Do not compute vectorization factor estimate. Use vector size instead of vectorization factor estimate to split store groups for BB vectorization. * gcc.dg/vect/slp-42.c: New testcase. From-SVN: r231225
This commit is contained in:
parent
b5be36b1db
commit
97a1a64299
@ -1,3 +1,14 @@
|
||||
2015-12-03 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/66051
|
||||
* tree-vect-slp.c (vect_build_slp_tree_1): Remove restriction
|
||||
on load group size. Do not pass in vectorization_factor.
|
||||
(vect_transform_slp_perm_load): Do not require any permute support.
|
||||
(vect_build_slp_tree): Do not pass in vectorization factor.
|
||||
(vect_analyze_slp_instance): Do not compute vectorization
|
||||
factor estimate. Use vector size instead of vectorization factor
|
||||
estimate to split store groups for BB vectorization.
|
||||
|
||||
2015-12-03 Ilya Enkovich <enkovich.gnu@gmail.com>
|
||||
|
||||
* cfgexpand.c (expand_gimple_stmt_1): Return statement with
|
||||
|
@ -1,3 +1,8 @@
|
||||
2015-12-03 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/66051
|
||||
* gcc.dg/vect/slp-42.c: New testcase.
|
||||
|
||||
2015-12-02 Kirill Yukhin <kirill.yukhin@intel.com>
|
||||
|
||||
* gcc.target/i386/avx512vl-vextractf32x4-1.c: Fix scan pattern.
|
||||
|
19
gcc/testsuite/gcc.dg/vect/slp-42.c
Normal file
19
gcc/testsuite/gcc.dg/vect/slp-42.c
Normal file
@ -0,0 +1,19 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
int p[4096], q[4096];
|
||||
|
||||
void foo (int n)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
p[i*4+0] = q[i*8+0] + q[i*8+4];
|
||||
p[i*4+1] = q[i*8+1] + q[i*8+5];
|
||||
p[i*4+2] = q[i*8+2] + q[i*8+6];
|
||||
p[i*4+3] = q[i*8+3] + q[i*8+7];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
|
@ -430,8 +430,7 @@ static bool
|
||||
vect_build_slp_tree_1 (vec_info *vinfo,
|
||||
vec<gimple *> stmts, unsigned int group_size,
|
||||
unsigned nops, unsigned int *max_nunits,
|
||||
unsigned int vectorization_factor, bool *matches,
|
||||
bool *two_operators)
|
||||
bool *matches, bool *two_operators)
|
||||
{
|
||||
unsigned int i;
|
||||
gimple *first_stmt = stmts[0], *stmt = stmts[0];
|
||||
@ -523,11 +522,7 @@ vect_build_slp_tree_1 (vec_info *vinfo,
|
||||
|
||||
/* In case of multiple types we need to detect the smallest type. */
|
||||
if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
|
||||
{
|
||||
*max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
if (is_a <bb_vec_info> (vinfo))
|
||||
vectorization_factor = *max_nunits;
|
||||
}
|
||||
*max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
|
||||
{
|
||||
@ -700,31 +695,6 @@ vect_build_slp_tree_1 (vec_info *vinfo,
|
||||
else
|
||||
{
|
||||
/* Load. */
|
||||
/* Check that the size of interleaved loads group is not
|
||||
greater than the SLP group size. */
|
||||
unsigned ncopies
|
||||
= vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
|
||||
if (is_a <loop_vec_info> (vinfo)
|
||||
&& GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
|
||||
&& ((GROUP_SIZE (vinfo_for_stmt (stmt))
|
||||
- GROUP_GAP (vinfo_for_stmt (stmt)))
|
||||
> ncopies * group_size))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"Build SLP failed: the number "
|
||||
"of interleaved loads is greater than "
|
||||
"the SLP group size ");
|
||||
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
|
||||
stmt, 0);
|
||||
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
|
||||
}
|
||||
/* Fatal mismatch. */
|
||||
matches[0] = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
|
||||
if (prev_first_load)
|
||||
{
|
||||
@ -871,7 +841,6 @@ vect_build_slp_tree (vec_info *vinfo,
|
||||
slp_tree *node, unsigned int group_size,
|
||||
unsigned int *max_nunits,
|
||||
vec<slp_tree> *loads,
|
||||
unsigned int vectorization_factor,
|
||||
bool *matches, unsigned *npermutes, unsigned *tree_size,
|
||||
unsigned max_tree_size)
|
||||
{
|
||||
@ -895,8 +864,7 @@ vect_build_slp_tree (vec_info *vinfo,
|
||||
bool two_operators = false;
|
||||
if (!vect_build_slp_tree_1 (vinfo,
|
||||
SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
|
||||
max_nunits, vectorization_factor, matches,
|
||||
&two_operators))
|
||||
max_nunits, matches, &two_operators))
|
||||
return false;
|
||||
SLP_TREE_TWO_OPERATORS (*node) = two_operators;
|
||||
|
||||
@ -959,8 +927,7 @@ vect_build_slp_tree (vec_info *vinfo,
|
||||
}
|
||||
|
||||
if (vect_build_slp_tree (vinfo, &child,
|
||||
group_size, max_nunits, loads,
|
||||
vectorization_factor, matches,
|
||||
group_size, max_nunits, loads, matches,
|
||||
npermutes, &this_tree_size, max_tree_size))
|
||||
{
|
||||
/* If we have all children of child built up from scalars then just
|
||||
@ -1074,7 +1041,6 @@ vect_build_slp_tree (vec_info *vinfo,
|
||||
bool *tem = XALLOCAVEC (bool, group_size);
|
||||
if (vect_build_slp_tree (vinfo, &child,
|
||||
group_size, max_nunits, loads,
|
||||
vectorization_factor,
|
||||
tem, npermutes, &this_tree_size,
|
||||
max_tree_size))
|
||||
{
|
||||
@ -1656,7 +1622,6 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
||||
unsigned int unrolling_factor = 1, nunits;
|
||||
tree vectype, scalar_type = NULL_TREE;
|
||||
gimple *next;
|
||||
unsigned int vectorization_factor = 0;
|
||||
unsigned int i;
|
||||
unsigned int max_nunits = 0;
|
||||
vec<slp_tree> loads;
|
||||
@ -1697,12 +1662,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
if (is_a <loop_vec_info> (vinfo))
|
||||
vectorization_factor = as_a <loop_vec_info> (vinfo)->vectorization_factor;
|
||||
else
|
||||
vectorization_factor = nunits;
|
||||
|
||||
/* Calculate the unrolling factor. */
|
||||
unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
|
||||
@ -1755,8 +1715,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
||||
unsigned npermutes = 0;
|
||||
if (vect_build_slp_tree (vinfo, &node, group_size,
|
||||
&max_nunits, &loads,
|
||||
vectorization_factor, matches, &npermutes, NULL,
|
||||
max_tree_size))
|
||||
matches, &npermutes, NULL, max_tree_size))
|
||||
{
|
||||
/* Calculate the unrolling factor based on the smallest type. */
|
||||
if (max_nunits > nunits)
|
||||
@ -1852,7 +1811,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
||||
loads.release ();
|
||||
|
||||
/* For basic block SLP, try to break the group up into multiples of the
|
||||
vectorization factor. */
|
||||
vector size. */
|
||||
if (is_a <bb_vec_info> (vinfo)
|
||||
&& GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
|
||||
&& STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
|
||||
@ -1862,11 +1821,11 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
||||
for (i = 0; i < group_size; i++)
|
||||
if (!matches[i]) break;
|
||||
|
||||
if (i >= vectorization_factor && i < group_size)
|
||||
if (i >= nunits && i < group_size)
|
||||
{
|
||||
/* Split into two groups at the first vector boundary before i. */
|
||||
gcc_assert ((vectorization_factor & (vectorization_factor - 1)) == 0);
|
||||
unsigned group1_size = i & ~(vectorization_factor - 1);
|
||||
gcc_assert ((nunits & (nunits - 1)) == 0);
|
||||
unsigned group1_size = i & ~(nunits - 1);
|
||||
|
||||
gimple *rest = vect_split_slp_store_group (stmt, group1_size);
|
||||
bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
|
||||
@ -1874,9 +1833,9 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
||||
skip the rest of that vector. */
|
||||
if (group1_size < i)
|
||||
{
|
||||
i = group1_size + vectorization_factor;
|
||||
i = group1_size + nunits;
|
||||
if (i < group_size)
|
||||
rest = vect_split_slp_store_group (rest, vectorization_factor);
|
||||
rest = vect_split_slp_store_group (rest, nunits);
|
||||
}
|
||||
if (i < group_size)
|
||||
res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
|
||||
@ -3274,18 +3233,6 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||
|
||||
mode = TYPE_MODE (vectype);
|
||||
|
||||
if (!can_vec_perm_p (mode, false, NULL))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"no vect permute for ");
|
||||
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
|
||||
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The generic VEC_PERM_EXPR code always uses an integral type of the
|
||||
same size as the vector element being permuted. */
|
||||
mask_element_type = lang_hooks.types.type_for_mode
|
||||
|
Loading…
Reference in New Issue
Block a user