From 97a1a642995cbe356786d2fb2b7c203fea7d0282 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 3 Dec 2015 11:26:56 +0000 Subject: [PATCH] re PR tree-optimization/66051 (can't vectorize reductions inside an SLP group) 2015-12-03 Richard Biener PR tree-optimization/66051 * tree-vect-slp.c (vect_build_slp_tree_1): Remove restriction on load group size. Do not pass in vectorization_factor. (vect_transform_slp_perm_load): Do not require any permute support. (vect_build_slp_tree): Do not pass in vectorization factor. (vect_analyze_slp_instance): Do not compute vectorization factor estimate. Use vector size instead of vectorization factor estimate to split store groups for BB vectorization. * gcc.dg/vect/slp-42.c: New testcase. From-SVN: r231225 --- gcc/ChangeLog | 11 +++++ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.dg/vect/slp-42.c | 19 ++++++++ gcc/tree-vect-slp.c | 75 +++++------------------------- 4 files changed, 46 insertions(+), 64 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/slp-42.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c576908d996..65b1b2b6284 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2015-12-03 Richard Biener + + PR tree-optimization/66051 + * tree-vect-slp.c (vect_build_slp_tree_1): Remove restriction + on load group size. Do not pass in vectorization_factor. + (vect_transform_slp_perm_load): Do not require any permute support. + (vect_build_slp_tree): Do not pass in vectorization factor. + (vect_analyze_slp_instance): Do not compute vectorization + factor estimate. Use vector size instead of vectorization factor + estimate to split store groups for BB vectorization. + 2015-12-03 Ilya Enkovich * cfgexpand.c (expand_gimple_stmt_1): Return statement with diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f3a526bd254..55529e05720 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-12-03 Richard Biener + + PR tree-optimization/66051 + * gcc.dg/vect/slp-42.c: New testcase. + 2015-12-02 Kirill Yukhin * gcc.target/i386/avx512vl-vextractf32x4-1.c: Fix scan pattern. diff --git a/gcc/testsuite/gcc.dg/vect/slp-42.c b/gcc/testsuite/gcc.dg/vect/slp-42.c new file mode 100644 index 00000000000..ea5fe167cdb --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-42.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +int p[4096], q[4096]; + +void foo (int n) +{ + int i; + for (i = 0; i < n; ++i) + { + p[i*4+0] = q[i*8+0] + q[i*8+4]; + p[i*4+1] = q[i*8+1] + q[i*8+5]; + p[i*4+2] = q[i*8+2] + q[i*8+6]; + p[i*4+3] = q[i*8+3] + q[i*8+7]; + } +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 5693ca5e35e..b8936823067 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -430,8 +430,7 @@ static bool vect_build_slp_tree_1 (vec_info *vinfo, vec stmts, unsigned int group_size, unsigned nops, unsigned int *max_nunits, - unsigned int vectorization_factor, bool *matches, - bool *two_operators) + bool *matches, bool *two_operators) { unsigned int i; gimple *first_stmt = stmts[0], *stmt = stmts[0]; @@ -523,11 +522,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, /* In case of multiple types we need to detect the smallest type. */ if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) - { - *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); - if (is_a (vinfo)) - vectorization_factor = *max_nunits; - } + *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); if (gcall *call_stmt = dyn_cast (stmt)) { @@ -700,31 +695,6 @@ vect_build_slp_tree_1 (vec_info *vinfo, else { /* Load. */ - /* Check that the size of interleaved loads group is not - greater than the SLP group size. */ - unsigned ncopies - = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); - if (is_a (vinfo) - && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt - && ((GROUP_SIZE (vinfo_for_stmt (stmt)) - - GROUP_GAP (vinfo_for_stmt (stmt))) - > ncopies * group_size)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Build SLP failed: the number " - "of interleaved loads is greater than " - "the SLP group size "); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - stmt, 0); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - /* Fatal mismatch. */ - matches[0] = false; - return false; - } - first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)); if (prev_first_load) { @@ -871,7 +841,6 @@ vect_build_slp_tree (vec_info *vinfo, slp_tree *node, unsigned int group_size, unsigned int *max_nunits, vec *loads, - unsigned int vectorization_factor, bool *matches, unsigned *npermutes, unsigned *tree_size, unsigned max_tree_size) { @@ -895,8 +864,7 @@ vect_build_slp_tree (vec_info *vinfo, bool two_operators = false; if (!vect_build_slp_tree_1 (vinfo, SLP_TREE_SCALAR_STMTS (*node), group_size, nops, - max_nunits, vectorization_factor, matches, - &two_operators)) + max_nunits, matches, &two_operators)) return false; SLP_TREE_TWO_OPERATORS (*node) = two_operators; @@ -959,8 +927,7 @@ vect_build_slp_tree (vec_info *vinfo, } if (vect_build_slp_tree (vinfo, &child, - group_size, max_nunits, loads, - vectorization_factor, matches, + group_size, max_nunits, loads, matches, npermutes, &this_tree_size, max_tree_size)) { /* If we have all children of child built up from scalars then just @@ -1074,7 +1041,6 @@ vect_build_slp_tree (vec_info *vinfo, bool *tem = XALLOCAVEC (bool, group_size); if (vect_build_slp_tree (vinfo, &child, group_size, max_nunits, loads, - vectorization_factor, tem, npermutes, &this_tree_size, max_tree_size)) { @@ -1656,7 +1622,6 @@ vect_analyze_slp_instance (vec_info *vinfo, unsigned int unrolling_factor = 1, nunits; tree vectype, scalar_type = NULL_TREE; gimple *next; - unsigned int vectorization_factor = 0; unsigned int i; unsigned int max_nunits = 0; vec loads; @@ -1697,12 +1662,7 @@ vect_analyze_slp_instance (vec_info *vinfo, return false; } - nunits = TYPE_VECTOR_SUBPARTS (vectype); - if (is_a (vinfo)) - vectorization_factor = as_a (vinfo)->vectorization_factor; - else - vectorization_factor = nunits; /* Calculate the unrolling factor. */ unrolling_factor = least_common_multiple (nunits, group_size) / group_size; @@ -1755,8 +1715,7 @@ vect_analyze_slp_instance (vec_info *vinfo, unsigned npermutes = 0; if (vect_build_slp_tree (vinfo, &node, group_size, &max_nunits, &loads, - vectorization_factor, matches, &npermutes, NULL, - max_tree_size)) + matches, &npermutes, NULL, max_tree_size)) { /* Calculate the unrolling factor based on the smallest type. */ if (max_nunits > nunits) @@ -1852,7 +1811,7 @@ vect_analyze_slp_instance (vec_info *vinfo, loads.release (); /* For basic block SLP, try to break the group up into multiples of the - vectorization factor. */ + vector size. */ if (is_a (vinfo) && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) @@ -1862,11 +1821,11 @@ vect_analyze_slp_instance (vec_info *vinfo, for (i = 0; i < group_size; i++) if (!matches[i]) break; - if (i >= vectorization_factor && i < group_size) + if (i >= nunits && i < group_size) { /* Split into two groups at the first vector boundary before i. */ - gcc_assert ((vectorization_factor & (vectorization_factor - 1)) == 0); - unsigned group1_size = i & ~(vectorization_factor - 1); + gcc_assert ((nunits & (nunits - 1)) == 0); + unsigned group1_size = i & ~(nunits - 1); gimple *rest = vect_split_slp_store_group (stmt, group1_size); bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size); @@ -1874,9 +1833,9 @@ vect_analyze_slp_instance (vec_info *vinfo, skip the rest of that vector. */ if (group1_size < i) { - i = group1_size + vectorization_factor; + i = group1_size + nunits; if (i < group_size) - rest = vect_split_slp_store_group (rest, vectorization_factor); + rest = vect_split_slp_store_group (rest, nunits); } if (i < group_size) res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size); @@ -3274,18 +3233,6 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, mode = TYPE_MODE (vectype); - if (!can_vec_perm_p (mode, false, NULL)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "no vect permute for "); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - /* The generic VEC_PERM_EXPR code always uses an integral type of the same size as the vector element being permuted. */ mask_element_type = lang_hooks.types.type_for_mode