tree-vect-loop.c (vect_fixup_reduc_chain): New function.

2015-05-28  Richard Biener  <rguenther@suse.de>

	* tree-vect-loop.c (vect_fixup_reduc_chain): New function.
	(vect_fixup_scalar_cycles_with_patterns): Likewise.
	(vect_analyze_loop_2): Call vect_fixup_scalar_cycles_with_patterns
	after pattern recog.
	(vect_create_epilog_for_reduction): Properly handle reductions
	with patterns.
	(vectorizable_reduction): Likewise.
	* tree-vect-slp.c (vect_analyze_slp_instance): Properly mark
	reduction chains.
	(vect_get_constant_vectors): Create the correct number of
	initial values for reductions.
	(vect_schedule_slp_instance): Handle reduction chains that are
	type changing properly.
	* tree-vect-stmts.c (vect_analyze_stmt): Adjust.

	* gcc.dg/vect/slp-reduc-sad.c: New testcase.

From-SVN: r223818
This commit is contained in:
Richard Biener 2015-05-28 13:29:41 +00:00 committed by Richard Biener
parent 228722248f
commit 14a6143710
6 changed files with 173 additions and 32 deletions

View File

@ -1,3 +1,20 @@
2015-05-28 Richard Biener <rguenther@suse.de>
* tree-vect-loop.c (vect_fixup_reduc_chain): New function.
(vect_fixup_scalar_cycles_with_patterns): Likewise.
(vect_analyze_loop_2): Call vect_fixup_scalar_cycles_with_patterns
after pattern recog.
(vect_create_epilog_for_reduction): Properly handle reductions
with patterns.
(vectorizable_reduction): Likewise.
* tree-vect-slp.c (vect_analyze_slp_instance): Properly mark
reduction chains.
(vect_get_constant_vectors): Create the correct number of
initial values for reductions.
(vect_schedule_slp_instance): Handle reduction chains that are
type changing properly.
* tree-vect-stmts.c (vect_analyze_stmt): Adjust.
2015-05-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/66142

View File

@ -1,3 +1,7 @@
2015-05-28 Richard Biener <rguenther@suse.de>
* gcc.dg/vect/slp-reduc-sad.c: New testcase.
2015-05-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/66142

View File

@ -0,0 +1,64 @@
/* { dg-require-effective-target vect_usad_char } */
#include "tree-vect.h"
typedef unsigned int uint32_t;
typedef unsigned short uint16_t;
typedef unsigned char uint8_t;
extern int abs (int);
extern void abort (void);
int __attribute__((noinline,noclone))
foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2)
{
int i_sum = 0;
for( int y = 0; y < 16; y++ )
{
i_sum += abs ( pix1[0] - pix2[0] );
i_sum += abs ( pix1[1] - pix2[1] );
i_sum += abs ( pix1[2] - pix2[2] );
i_sum += abs ( pix1[3] - pix2[3] );
i_sum += abs ( pix1[4] - pix2[4] );
i_sum += abs ( pix1[5] - pix2[5] );
i_sum += abs ( pix1[6] - pix2[6] );
i_sum += abs ( pix1[7] - pix2[7] );
i_sum += abs ( pix1[8] - pix2[8] );
i_sum += abs ( pix1[9] - pix2[9] );
i_sum += abs ( pix1[10] - pix2[10] );
i_sum += abs ( pix1[11] - pix2[11] );
i_sum += abs ( pix1[12] - pix2[12] );
i_sum += abs ( pix1[13] - pix2[13] );
i_sum += abs ( pix1[14] - pix2[14] );
i_sum += abs ( pix1[15] - pix2[15] );
pix1 += 16;
pix2 += i_stride_pix2;
}
return i_sum;
}
int
main ()
{
check_vect ();
uint8_t X[16*16];
uint8_t Y[16*16];
for (int i = 0; i < 16*16; ++i)
{
X[i] = i;
Y[i] = 16*16 - i;
__asm__ volatile ("");
}
if (foo (X, Y, 16) != 32512)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump "vect_recog_sad_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -828,6 +828,45 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
}
/* Transfer group and reduction information from STMT to its pattern stmt. */
static void
vect_fixup_reduc_chain (gimple stmt)
{
gimple firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
gimple stmtp;
gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
&& GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
do
{
stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
if (stmt)
GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
}
while (stmt);
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
}
/* Fixup scalar cycles that now have their stmts detected as patterns. */
static void
vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
{
gimple first;
unsigned i;
FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
{
vect_fixup_reduc_chain (first);
LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
}
}
/* Function vect_get_loop_niters.
@ -1708,6 +1747,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
vect_pattern_recog (loop_vinfo, NULL);
vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
/* Analyze the access patterns of the data-refs in the loop (consecutive,
complex, etc.). FORNOW: Only handle consecutive access pattern. */
@ -4573,8 +4614,12 @@ vect_finalize_reduction:
exit phi node. */
if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
scalar_dest = gimple_assign_lhs (
SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
gimple dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
/* Handle reduction patterns. */
if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
scalar_dest = gimple_assign_lhs (dest_stmt);
group_size = 1;
}
@ -4875,12 +4920,17 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
auto_vec<gimple> phis;
int vec_num;
tree def0, def1, tem, op0, op1 = NULL_TREE;
bool first_p = true;
/* In case of reduction chain we switch to the first stmt in the chain, but
we don't update STMT_INFO, since only the last stmt is marked as reduction
and has reduction properties. */
if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
stmt = GROUP_FIRST_ELEMENT (stmt_info);
if (GROUP_FIRST_ELEMENT (stmt_info)
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt)
{
stmt = GROUP_FIRST_ELEMENT (stmt_info);
first_p = false;
}
if (nested_in_vect_loop_p (loop, stmt))
{
@ -4903,8 +4953,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
return false;
/* Make sure it was already recognized as a reduction computation. */
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
&& STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
return false;
/* 2. Has this been recognized as a reduction pattern?
@ -4914,7 +4964,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
the STMT_VINFO_RELATED_STMT field records the last stmt in
the original sequence that constitutes the pattern. */
orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
if (orig_stmt)
{
orig_stmt_info = vinfo_for_stmt (orig_stmt);
@ -5040,20 +5090,16 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
return false;
}
gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
!nested_cycle, &dummy);
if (orig_stmt)
gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
reduc_def_stmt,
!nested_cycle,
&dummy));
gcc_assert (tmp == orig_stmt
|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
else
{
gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
!nested_cycle, &dummy);
/* We changed STMT to be the first stmt in reduction chain, hence we
check that in this case the first element in the chain is STMT. */
gcc_assert (stmt == tmp
|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
}
/* We changed STMT to be the first stmt in reduction chain, hence we
check that in this case the first element in the chain is STMT. */
gcc_assert (stmt == tmp
|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
@ -5267,8 +5313,9 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
if (!vec_stmt) /* transformation not required. */
{
if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
reduc_index))
if (first_p
&& !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
reduc_index))
return false;
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
return true;
@ -5324,11 +5371,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
prev_stmt_info = NULL;
prev_phi_info = NULL;
if (slp_node)
{
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
== TYPE_VECTOR_SUBPARTS (vectype_in));
}
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
else
{
vec_num = 1;

View File

@ -1793,6 +1793,11 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
scalar_stmts.safe_push (next);
next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
}
/* Mark the first element of the reduction chain as reduction to properly
transform the node. In the reduction analysis phase only the last
element of the chain is marked as reduction. */
if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
}
else
{
@ -2738,7 +2743,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
(s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
{s5, s6, s7, s8}. */
number_of_copies = least_common_multiple (nunits, group_size) / group_size;
number_of_copies = nunits * number_of_vectors / group_size;
number_of_places_left_in_vector = nunits;
elts = XALLOCAVEC (tree, nunits);
@ -3383,8 +3388,14 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
for the scalar stmts in each node of the SLP tree. Number of vector
elements in one vector iteration is the number of scalar elements in
one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
size. */
vec_stmts_size = (vectorization_factor * group_size) / nunits;
size.
Unless this is a SLP reduction in which case the number of vector
stmts is equal to the number of vector stmts of the children. */
if (GROUP_FIRST_ELEMENT (stmt_info)
&& !STMT_VINFO_GROUPED_ACCESS (stmt_info))
vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
else
vec_stmts_size = (vectorization_factor * group_size) / nunits;
if (!SLP_TREE_VEC_STMTS (node).exists ())
{

View File

@ -7310,9 +7310,11 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
case vect_reduction_def:
case vect_nested_cycle:
gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
|| relevance == vect_used_in_outer_by_reduction
|| relevance == vect_unused_in_scope));
gcc_assert (!bb_vinfo
&& (relevance == vect_used_in_outer
|| relevance == vect_used_in_outer_by_reduction
|| relevance == vect_used_by_reduction
|| relevance == vect_unused_in_scope));
break;
case vect_induction_def: