From 14a6143710feb60e4471ca4288b03398cbba32e8 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 28 May 2015 13:29:41 +0000 Subject: [PATCH] tree-vect-loop.c (vect_fixup_reduc_chain): New function. 2015-05-28 Richard Biener * tree-vect-loop.c (vect_fixup_reduc_chain): New function. (vect_fixup_scalar_cycles_with_patterns): Likewise. (vect_analyze_loop_2): Call vect_fixup_scalar_cycles_with_patterns after pattern recog. (vect_create_epilog_for_reduction): Properly handle reductions with patterns. (vectorizable_reduction): Likewise. * tree-vect-slp.c (vect_analyze_slp_instance): Properly mark reduction chains. (vect_get_constant_vectors): Create the correct number of initial values for reductions. (vect_schedule_slp_instance): Handle reduction chains that are type changing properly. * tree-vect-stmts.c (vect_analyze_stmt): Adjust. * gcc.dg/vect/slp-reduc-sad.c: New testcase. From-SVN: r223818 --- gcc/ChangeLog | 17 ++++ gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c | 64 +++++++++++++++ gcc/tree-vect-loop.c | 95 ++++++++++++++++------- gcc/tree-vect-slp.c | 17 +++- gcc/tree-vect-stmts.c | 8 +- 6 files changed, 173 insertions(+), 32 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9e8110443aa..2a2829dad34 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2015-05-28 Richard Biener + + * tree-vect-loop.c (vect_fixup_reduc_chain): New function. + (vect_fixup_scalar_cycles_with_patterns): Likewise. + (vect_analyze_loop_2): Call vect_fixup_scalar_cycles_with_patterns + after pattern recog. + (vect_create_epilog_for_reduction): Properly handle reductions + with patterns. + (vectorizable_reduction): Likewise. + * tree-vect-slp.c (vect_analyze_slp_instance): Properly mark + reduction chains. + (vect_get_constant_vectors): Create the correct number of + initial values for reductions. + (vect_schedule_slp_instance): Handle reduction chains that are + type changing properly. + * tree-vect-stmts.c (vect_analyze_stmt): Adjust. + 2015-05-28 Richard Biener PR tree-optimization/66142 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ddc74fc274c..1a2b185eff0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2015-05-28 Richard Biener + + * gcc.dg/vect/slp-reduc-sad.c: New testcase. + 2015-05-28 Richard Biener PR tree-optimization/66142 diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c new file mode 100644 index 00000000000..472544f4fb2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c @@ -0,0 +1,64 @@ +/* { dg-require-effective-target vect_usad_char } */ + +#include "tree-vect.h" + +typedef unsigned int uint32_t; +typedef unsigned short uint16_t; +typedef unsigned char uint8_t; + +extern int abs (int); +extern void abort (void); + +int __attribute__((noinline,noclone)) +foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2) +{ + int i_sum = 0; + for( int y = 0; y < 16; y++ ) + { + i_sum += abs ( pix1[0] - pix2[0] ); + i_sum += abs ( pix1[1] - pix2[1] ); + i_sum += abs ( pix1[2] - pix2[2] ); + i_sum += abs ( pix1[3] - pix2[3] ); + i_sum += abs ( pix1[4] - pix2[4] ); + i_sum += abs ( pix1[5] - pix2[5] ); + i_sum += abs ( pix1[6] - pix2[6] ); + i_sum += abs ( pix1[7] - pix2[7] ); + i_sum += abs ( pix1[8] - pix2[8] ); + i_sum += abs ( pix1[9] - pix2[9] ); + i_sum += abs ( pix1[10] - pix2[10] ); + i_sum += abs ( pix1[11] - pix2[11] ); + i_sum += abs ( pix1[12] - pix2[12] ); + i_sum += abs ( pix1[13] - pix2[13] ); + i_sum += abs ( pix1[14] - pix2[14] ); + i_sum += abs ( pix1[15] - pix2[15] ); + pix1 += 16; + pix2 += i_stride_pix2; + } + return i_sum; +} + +int +main () +{ + check_vect (); + + uint8_t X[16*16]; + uint8_t Y[16*16]; + + for (int i = 0; i < 16*16; ++i) + { + X[i] = i; + Y[i] = 16*16 - i; + __asm__ volatile (""); + } + + if (foo (X, Y, 16) != 32512) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump "vect_recog_sad_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 8fe4dc6ad30..71df11c0f6f 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -828,6 +828,45 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); } +/* Transfer group and reduction information from STMT to its pattern stmt. */ + +static void +vect_fixup_reduc_chain (gimple stmt) +{ + gimple firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); + gimple stmtp; + gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp)) + && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))); + GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt)); + do + { + stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); + GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp; + stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); + if (stmt) + GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp)) + = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); + } + while (stmt); + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def; +} + +/* Fixup scalar cycles that now have their stmts detected as patterns. */ + +static void +vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo) +{ + gimple first; + unsigned i; + + FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first) + if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first))) + { + vect_fixup_reduc_chain (first); + LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i] + = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first)); + } +} /* Function vect_get_loop_niters. @@ -1708,6 +1747,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) vect_pattern_recog (loop_vinfo, NULL); + vect_fixup_scalar_cycles_with_patterns (loop_vinfo); + /* Analyze the access patterns of the data-refs in the loop (consecutive, complex, etc.). FORNOW: Only handle consecutive access pattern. */ @@ -4573,8 +4614,12 @@ vect_finalize_reduction: exit phi node. */ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) { - scalar_dest = gimple_assign_lhs ( - SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]); + gimple dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]; + /* Handle reduction patterns. */ + if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt))) + dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)); + + scalar_dest = gimple_assign_lhs (dest_stmt); group_size = 1; } @@ -4875,12 +4920,17 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, auto_vec phis; int vec_num; tree def0, def1, tem, op0, op1 = NULL_TREE; + bool first_p = true; /* In case of reduction chain we switch to the first stmt in the chain, but we don't update STMT_INFO, since only the last stmt is marked as reduction and has reduction properties. */ - if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) - stmt = GROUP_FIRST_ELEMENT (stmt_info); + if (GROUP_FIRST_ELEMENT (stmt_info) + && GROUP_FIRST_ELEMENT (stmt_info) != stmt) + { + stmt = GROUP_FIRST_ELEMENT (stmt_info); + first_p = false; + } if (nested_in_vect_loop_p (loop, stmt)) { @@ -4903,8 +4953,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, return false; /* Make sure it was already recognized as a reduction computation. */ - if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def - && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle) + if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def + && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle) return false; /* 2. Has this been recognized as a reduction pattern? @@ -4914,7 +4964,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, the STMT_VINFO_RELATED_STMT field records the last stmt in the original sequence that constitutes the pattern. */ - orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); if (orig_stmt) { orig_stmt_info = vinfo_for_stmt (orig_stmt); @@ -5040,20 +5090,16 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, return false; } + gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt, + !nested_cycle, &dummy); if (orig_stmt) - gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, - reduc_def_stmt, - !nested_cycle, - &dummy)); + gcc_assert (tmp == orig_stmt + || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt); else - { - gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt, - !nested_cycle, &dummy); - /* We changed STMT to be the first stmt in reduction chain, hence we - check that in this case the first element in the chain is STMT. */ - gcc_assert (stmt == tmp - || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt); - } + /* We changed STMT to be the first stmt in reduction chain, hence we + check that in this case the first element in the chain is STMT. */ + gcc_assert (stmt == tmp + || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt); if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt))) return false; @@ -5267,8 +5313,9 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, if (!vec_stmt) /* transformation not required. */ { - if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies, - reduc_index)) + if (first_p + && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies, + reduc_index)) return false; STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; return true; @@ -5324,11 +5371,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, prev_stmt_info = NULL; prev_phi_info = NULL; if (slp_node) - { - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out) - == TYPE_VECTOR_SUBPARTS (vectype_in)); - } + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); else { vec_num = 1; diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 9137144e67a..f38191d9884 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1793,6 +1793,11 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, scalar_stmts.safe_push (next); next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next)); } + /* Mark the first element of the reduction chain as reduction to properly + transform the node. In the reduction analysis phase only the last + element of the chain is marked as reduction. */ + if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def; } else { @@ -2738,7 +2743,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and {s5, s6, s7, s8}. */ - number_of_copies = least_common_multiple (nunits, group_size) / group_size; + number_of_copies = nunits * number_of_vectors / group_size; number_of_places_left_in_vector = nunits; elts = XALLOCAVEC (tree, nunits); @@ -3383,8 +3388,14 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, for the scalar stmts in each node of the SLP tree. Number of vector elements in one vector iteration is the number of scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector - size. */ - vec_stmts_size = (vectorization_factor * group_size) / nunits; + size. + Unless this is a SLP reduction in which case the number of vector + stmts is equal to the number of vector stmts of the children. */ + if (GROUP_FIRST_ELEMENT (stmt_info) + && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) + vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]); + else + vec_stmts_size = (vectorization_factor * group_size) / nunits; if (!SLP_TREE_VEC_STMTS (node).exists ()) { diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 06a59cb3995..6b018e53aad 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -7310,9 +7310,11 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) case vect_reduction_def: case vect_nested_cycle: - gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer - || relevance == vect_used_in_outer_by_reduction - || relevance == vect_unused_in_scope)); + gcc_assert (!bb_vinfo + && (relevance == vect_used_in_outer + || relevance == vect_used_in_outer_by_reduction + || relevance == vect_used_by_reduction + || relevance == vect_unused_in_scope)); break; case vect_induction_def: