re PR tree-optimization/37027 (SLP loop vectorization missing support for reductions)
PR tree-optimization/37027 * tree-vectorizer.h (struct _loop_vec_info): Add new field reductions and macro to access it. (vectorizable_reduction): Add argument. (vect_get_slp_defs): Likewise. * tree-vect-loop.c (vect_analyze_scalar_cycles_1): Collect reduction statements for possible use in SLP. (new_loop_vec_info): Initialize LOOP_VINFO_REDUCTIONS. (destroy_loop_vec_info): Free LOOP_VINFO_REDUCTIONS. (vect_create_epilog_for_reduction): Handle SLP. Modify documentation, add new argument. (vectorizable_reduction): Likewise. * tree-vect-stmts.c (vect_get_vec_defs): Update call to vect_get_slp_defs. (vectorizable_type_demotion, vectorizable_type_promotion, vectorizable_store): Likewise. (vect_analyze_stmt): Update call to vectorizable_reduction. (vect_transform_stmt): Likewise. * tree-vect-slp.c (vect_get_and_check_slp_defs): Handle reduction. (vect_build_slp_tree): Fix indentation. Check that there are no loads from different interleaving chains in same node. (vect_slp_rearrange_stmts): New function. (vect_supported_load_permutation_p): Allow load permutations for reductions. Call vect_slp_rearrange_stmts() to rearrange statements inside SLP nodes if necessary. (vect_analyze_slp_instance): Handle reductions. (vect_analyze_slp): Try to build SLP instances originating from groups of reductions. (vect_detect_hybrid_slp_stmts): Skip reduction statements. (vect_get_constant_vectors): Create initial vectors for reductions according to reduction code. Add new argument. (vect_get_slp_defs): Add new argument, pass it to vect_get_constant_vectors. (vect_schedule_slp_instance): Remove SLP tree root statements. From-SVN: r158506
This commit is contained in:
parent
5a2fa9e8bf
commit
b5aeb3bb3e
|
@ -1,3 +1,40 @@
|
|||
2010-04-19 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
PR tree-optimization/37027
|
||||
* tree-vectorizer.h (struct _loop_vec_info): Add new field reductions
|
||||
and macro to access it.
|
||||
(vectorizable_reduction): Add argument.
|
||||
(vect_get_slp_defs): Likewise.
|
||||
* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Collect reduction
|
||||
statements for possible use in SLP.
|
||||
(new_loop_vec_info): Initialize LOOP_VINFO_REDUCTIONS.
|
||||
(destroy_loop_vec_info): Free LOOP_VINFO_REDUCTIONS.
|
||||
(vect_create_epilog_for_reduction): Handle SLP. Modify documentation,
|
||||
add new argument.
|
||||
(vectorizable_reduction): Likewise.
|
||||
* tree-vect-stmts.c (vect_get_vec_defs): Update call to
|
||||
vect_get_slp_defs.
|
||||
(vectorizable_type_demotion, vectorizable_type_promotion,
|
||||
vectorizable_store): Likewise.
|
||||
(vect_analyze_stmt): Update call to vectorizable_reduction.
|
||||
(vect_transform_stmt): Likewise.
|
||||
* tree-vect-slp.c (vect_get_and_check_slp_defs): Handle reduction.
|
||||
(vect_build_slp_tree): Fix indentation. Check that there are no loads
|
||||
from different interleaving chains in same node.
|
||||
(vect_slp_rearrange_stmts): New function.
|
||||
(vect_supported_load_permutation_p): Allow load permutations for
|
||||
reductions. Call vect_slp_rearrange_stmts() to rearrange statements
|
||||
inside SLP nodes if necessary.
|
||||
(vect_analyze_slp_instance): Handle reductions.
|
||||
(vect_analyze_slp): Try to build SLP instances originating from groups
|
||||
of reductions.
|
||||
(vect_detect_hybrid_slp_stmts): Skip reduction statements.
|
||||
(vect_get_constant_vectors): Create initial vectors for reductions
|
||||
according to reduction code. Add new argument.
|
||||
(vect_get_slp_defs): Add new argument, pass it to
|
||||
vect_get_constant_vectors.
|
||||
(vect_schedule_slp_instance): Remove SLP tree root statements.
|
||||
|
||||
2010-04-19 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* tree.h (ENUM_IS_SCOPED): Define.
|
||||
|
|
|
@ -1,3 +1,14 @@
|
|||
2010-04-19 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
PR tree-optimization/37027
|
||||
* lib/target-supports.exp
|
||||
(check_effective_target_vect_widen_sum_hi_to_si_pattern): New.
|
||||
* gcc.dg/vect/pr37027.c: New test.
|
||||
* gcc.dg/vect/slp-reduc-1.c, gcc.dg/vect/slp-reduc-2.c,
|
||||
gcc.dg/vect/slp-reduc-3.c, gcc.dg/vect/slp-reduc-4.c,
|
||||
gcc.dg/vect/slp-reduc-5.c, gcc.dg/vect/slp-reduc-6.c,
|
||||
gcc.dg/vect/vect-complex-6.c: Likewise.
|
||||
|
||||
2010-04-19 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* g++.dg/debug/dwarf2/enum1.C: New test.
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
struct mystr
|
||||
{
|
||||
int f1;
|
||||
int f2;
|
||||
};
|
||||
|
||||
struct mystr a[16];
|
||||
struct mystr b[16];
|
||||
int res1, res2;
|
||||
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
int i;
|
||||
int sum1;
|
||||
int sum2;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
sum1 += a[i].f1 + b[i].f1;
|
||||
sum2 += a[i].f2 + b[i].f2;
|
||||
}
|
||||
|
||||
res1 = sum1;
|
||||
res2 = sum2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_add } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_add } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
|
||||
unsigned int ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned int uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
|
||||
/* Vectorization of reduction using loop-aware SLP. */
|
||||
|
||||
__attribute__ ((noinline))
|
||||
int main1 (int n, int res0, int res1, int res2, int res3)
|
||||
{
|
||||
int i;
|
||||
unsigned int udiff0 = 5, udiff1 = 10, udiff2 = 20, udiff3 = 30;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
udiff3 += (ub[4*i + 3] - uc[4*i + 3]);
|
||||
udiff2 += (ub[4*i + 2] - uc[4*i + 2]);
|
||||
udiff1 += (ub[4*i + 1] - uc[4*i + 1]);
|
||||
udiff0 += (ub[4*i] - uc[4*i]);
|
||||
}
|
||||
|
||||
/* Check results: */
|
||||
if (udiff0 != res0
|
||||
|| udiff1 != res1
|
||||
|| udiff2 != res2
|
||||
|| udiff3 != res3)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (N/4, 53, 66, 84, 102);
|
||||
main1 (N/4 - 1, 29, 40, 56, 72);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_add } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_add } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
|
||||
unsigned int ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned int uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
|
||||
/* Vectorization of reduction using loop-aware SLP (with unrolling). */
|
||||
|
||||
__attribute__ ((noinline))
|
||||
int main1 (int n, int res0, int res1, int res2, int res3)
|
||||
{
|
||||
int i;
|
||||
unsigned int udiff0 = 5, udiff1 = 10;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
udiff1 += (ub[2*i + 1] - uc[2*i + 1]);
|
||||
udiff0 += (ub[2*i] - uc[2*i]);
|
||||
}
|
||||
|
||||
/* Check results: */
|
||||
if (udiff0 != res0
|
||||
|| udiff1 != res1)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (N/2, 117, 138, 84, 102);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_add } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_add } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT1 21834
|
||||
#define DOT2 21876
|
||||
|
||||
unsigned short X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
|
||||
/* short->short->int dot product.
|
||||
Not detected as a dot-product pattern.
|
||||
Requires support for non-widneing multiplication and widening-summation.
|
||||
Vectorized with loop-aware SLP. */
|
||||
__attribute__ ((noinline)) unsigned int
|
||||
foo1(int len, int *result1, int *result2)
|
||||
{
|
||||
int i;
|
||||
unsigned int res1 = 10, res2 = 20;
|
||||
unsigned short prod;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
prod = X[2*i] * Y[2*i];
|
||||
res1 += prod;
|
||||
prod = X[2*i+1] * Y[2*i+1];
|
||||
res2 += prod;
|
||||
}
|
||||
|
||||
*result1 = res1;
|
||||
*result2 = res2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
unsigned int dot1, dot2;
|
||||
unsigned short i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
foo1 (N/2, &dot1, &dot2);
|
||||
|
||||
if (dot1 != DOT1 || dot2 != DOT2)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The initialization loop in main also gets vectorized. */
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_short_mult && vect_widen_sum_hi_to_si } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_widen_sum_hi_to_si_pattern } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
|
@ -0,0 +1,60 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 128
|
||||
|
||||
unsigned int uc[N];
|
||||
|
||||
/* Vectorization of reduction using loop-aware SLP. */
|
||||
|
||||
__attribute__ ((noinline))
|
||||
int main1 (int n, int res0, int res1, int res2, int res3, int res4, int res5, int res6, int res7)
|
||||
{
|
||||
int i;
|
||||
unsigned int max0 = 5, max1 = 10, max2 = 20, max3 = 30, max4 = 2, max5 = 13, max6 = 7, max7 = 313;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
max2 = max2 < uc[8*i+2] ? uc[8*i+2] : max2;
|
||||
max3 = max3 < uc[8*i+3] ? uc[8*i+3] : max3;
|
||||
max1 = max1 < uc[8*i+1] ? uc[8*i+1] : max1;
|
||||
max7 = max7 < uc[8*i+7] ? uc[8*i+7] : max7;
|
||||
max6 = max6 < uc[8*i+6] ? uc[8*i+6] : max6;
|
||||
max0 = max0 < uc[8*i] ? uc[8*i] : max0;
|
||||
max4 = max4 < uc[8*i+4] ? uc[8*i+4] : max4;
|
||||
max5 = max5 < uc[8*i+5] ? uc[8*i+5] : max5;
|
||||
}
|
||||
|
||||
/* Check results: */
|
||||
if (max0 != res0
|
||||
|| max1 != res1
|
||||
|| max2 != res2
|
||||
|| max3 != res3
|
||||
|| max4 != res4
|
||||
|| max5 != res5
|
||||
|| max6 != res6
|
||||
|| max7 != res7)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
uc[i] = i+3;
|
||||
|
||||
main1 (N/8, 123, 124, 125, 126, 127, 128, 129, 313);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_max } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 128
|
||||
|
||||
int c[N];
|
||||
|
||||
/* Vectorization of reduction using loop-aware SLP. */
|
||||
|
||||
__attribute__ ((noinline))
|
||||
int main1 (int n, int res0, int res1)
|
||||
{
|
||||
int i;
|
||||
int max0 = -100, max1 = -313;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
max1 = max1 < c[2*i+1] ? c[2*i+1] : max1;
|
||||
max0 = max0 < c[2*i] ? c[2*i] : max0;
|
||||
}
|
||||
|
||||
/* Check results: */
|
||||
if (max0 != res0
|
||||
|| max1 != res1)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = (i+3) * -1;
|
||||
|
||||
c[0] = c[1] = -100;
|
||||
main1 (N/2, -5, -6);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_int_max } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_max } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 128
|
||||
|
||||
int a[N], b[N];
|
||||
|
||||
/* Vectorization of reduction. Loop-aware SLP is not possible, because of
|
||||
different arrays. */
|
||||
|
||||
__attribute__ ((noinline))
|
||||
int main1 (int n, int res0, int res1)
|
||||
{
|
||||
int i;
|
||||
int sum0 = 0, sum1 = 0;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
sum1 += a[2*i];
|
||||
sum0 += b[2*i];
|
||||
}
|
||||
|
||||
/* Check results: */
|
||||
if (sum0 != res0
|
||||
|| sum1 != res1)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = b[i] = i;
|
||||
|
||||
main1 (N/2, 4032, 4032);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_int_add } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 1 "vect" { target { ! vect_no_int_add } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -2105,6 +2105,25 @@ proc check_effective_target_vect_perm { } {
|
|||
return $et_vect_perm_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# widening summation of *short* args into *int* result, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
|
||||
global et_vect_widen_sum_hi_to_si_pattern
|
||||
|
||||
if [info exists et_vect_widen_sum_hi_to_si_pattern_saved] {
|
||||
verbose "check_effective_target_vect_widen_sum_hi_to_si_pattern: using cached result" 2
|
||||
} else {
|
||||
set et_vect_widen_sum_hi_to_si_pattern_saved 0
|
||||
if { [istarget powerpc*-*-*] } {
|
||||
set et_vect_widen_sum_hi_to_si_pattern_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_widen_sum_hi_to_si_pattern: returning $et_vect_widen_sum_hi_to_si_pattern_saved" 2
|
||||
return $et_vect_widen_sum_hi_to_si_pattern_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# widening summation of *short* args into *int* result, 0 otherwise.
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -670,6 +670,8 @@ vect_pattern_recog_1 (
|
|||
tree pattern_vectype;
|
||||
tree type_in, type_out;
|
||||
enum tree_code code;
|
||||
int i;
|
||||
gimple next;
|
||||
|
||||
pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out);
|
||||
if (!pattern_stmt)
|
||||
|
@ -735,7 +737,13 @@ vect_pattern_recog_1 (
|
|||
STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
|
||||
STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt;
|
||||
|
||||
return;
|
||||
/* Patterns cannot be vectorized using SLP, because they change the order of
|
||||
computation. */
|
||||
for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
|
||||
next);
|
||||
i++)
|
||||
if (next == stmt)
|
||||
VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -273,6 +273,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
break;
|
||||
|
||||
case vect_internal_def:
|
||||
case vect_reduction_def:
|
||||
if (i == 0)
|
||||
VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
|
||||
else
|
||||
|
@ -332,7 +333,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
HOST_WIDE_INT dummy;
|
||||
bool permutation = false;
|
||||
unsigned int load_place;
|
||||
gimple first_load;
|
||||
gimple first_load, prev_first_load = NULL;
|
||||
|
||||
/* For every stmt in NODE find its def stmt/s. */
|
||||
for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
|
||||
|
@ -485,42 +486,62 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
&pattern0, &pattern1))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Load. */
|
||||
/* FORNOW: Check that there is no gap between the loads. */
|
||||
if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
|
||||
&& DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
|
||||
|| (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
|
||||
&& DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
{
|
||||
fprintf (vect_dump, "Build SLP failed: strided "
|
||||
"loads have gaps ");
|
||||
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Load. */
|
||||
/* FORNOW: Check that there is no gap between the loads. */
|
||||
if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
|
||||
&& DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
|
||||
|| (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
|
||||
&& DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
{
|
||||
fprintf (vect_dump, "Build SLP failed: strided "
|
||||
"loads have gaps ");
|
||||
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check that the size of interleaved loads group is not
|
||||
greater than the SLP group size. */
|
||||
if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
|
||||
> ncopies * group_size)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
{
|
||||
fprintf (vect_dump, "Build SLP failed: the number of "
|
||||
"interleaved loads is greater than"
|
||||
" the SLP group size ");
|
||||
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
||||
}
|
||||
/* Check that the size of interleaved loads group is not
|
||||
greater than the SLP group size. */
|
||||
if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
{
|
||||
fprintf (vect_dump, "Build SLP failed: the number of "
|
||||
"interleaved loads is greater than"
|
||||
" the SLP group size ");
|
||||
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
|
||||
first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
|
||||
if (prev_first_load)
|
||||
{
|
||||
/* Check that there are no loads from different interleaving
|
||||
chains in the same node. The only exception is complex
|
||||
numbers. */
|
||||
if (prev_first_load != first_load
|
||||
&& rhs_code != REALPART_EXPR
|
||||
&& rhs_code != IMAGPART_EXPR)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
{
|
||||
fprintf (vect_dump, "Build SLP failed: different "
|
||||
"interleaving chains in one node ");
|
||||
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
prev_first_load = first_load;
|
||||
|
||||
if (first_load == stmt)
|
||||
{
|
||||
|
@ -787,6 +808,39 @@ vect_supported_slp_permutation_p (slp_instance instance)
|
|||
}
|
||||
|
||||
|
||||
/* Rearrange the statements of NODE according to PERMUTATION. */
|
||||
|
||||
static void
|
||||
vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
|
||||
VEC (int, heap) *permutation)
|
||||
{
|
||||
gimple stmt;
|
||||
VEC (gimple, heap) *tmp_stmts;
|
||||
unsigned int index, i;
|
||||
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
|
||||
vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
|
||||
|
||||
gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
|
||||
tmp_stmts = VEC_alloc (gimple, heap, group_size);
|
||||
|
||||
for (i = 0; i < group_size; i++)
|
||||
VEC_safe_push (gimple, heap, tmp_stmts, NULL);
|
||||
|
||||
for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
|
||||
{
|
||||
index = VEC_index (int, permutation, i);
|
||||
VEC_replace (gimple, tmp_stmts, index, stmt);
|
||||
}
|
||||
|
||||
VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
|
||||
SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
|
||||
}
|
||||
|
||||
|
||||
/* Check if the required load permutation is supported.
|
||||
LOAD_PERMUTATION contains a list of indices of the loads.
|
||||
In SLP this permutation is relative to the order of strided stores that are
|
||||
|
@ -796,9 +850,11 @@ static bool
|
|||
vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
||||
VEC (int, heap) *load_permutation)
|
||||
{
|
||||
int i = 0, j, prev = -1, next, k;
|
||||
bool supported;
|
||||
int i = 0, j, prev = -1, next, k, number_of_groups;
|
||||
bool supported, bad_permutation = false;
|
||||
sbitmap load_index;
|
||||
slp_tree node;
|
||||
gimple stmt;
|
||||
|
||||
/* FORNOW: permutations are only supported in SLP. */
|
||||
if (!slp_instn)
|
||||
|
@ -811,9 +867,72 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
|||
fprintf (vect_dump, "%d ", next);
|
||||
}
|
||||
|
||||
/* In case of reduction every load permutation is allowed, since the order
|
||||
of the reduction statements is not important (as opposed to the case of
|
||||
strided stores). The only condition we need to check is that all the
|
||||
load nodes are of the same size and have the same permutation (and then
|
||||
rearrange all the nodes of the SLP instance according to this
|
||||
permutation). */
|
||||
|
||||
/* Check that all the load nodes are of the same size. */
|
||||
for (i = 0;
|
||||
VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
|
||||
i++)
|
||||
if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
|
||||
!= (unsigned) group_size)
|
||||
return false;
|
||||
|
||||
node = SLP_INSTANCE_TREE (slp_instn);
|
||||
stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
|
||||
/* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
|
||||
instance, not all the loads belong to the same node or interleaving
|
||||
group. Hence, we need to divide them into groups according to
|
||||
GROUP_SIZE. */
|
||||
number_of_groups = VEC_length (int, load_permutation) / group_size;
|
||||
|
||||
/* Reduction (there are no data-refs in the root). */
|
||||
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
|
||||
{
|
||||
int first_group_load_index;
|
||||
|
||||
/* Compare all the permutation sequences to the first one. */
|
||||
for (i = 1; i < number_of_groups; i++)
|
||||
{
|
||||
k = 0;
|
||||
for (j = i * group_size; j < i * group_size + group_size; j++)
|
||||
{
|
||||
next = VEC_index (int, load_permutation, j);
|
||||
first_group_load_index = VEC_index (int, load_permutation, k);
|
||||
|
||||
if (next != first_group_load_index)
|
||||
{
|
||||
bad_permutation = true;
|
||||
break;
|
||||
}
|
||||
|
||||
k++;
|
||||
}
|
||||
|
||||
if (bad_permutation)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!bad_permutation)
|
||||
{
|
||||
/* This permutaion is valid for reduction. Since the order of the
|
||||
statements in the nodes is not important unless they are memory
|
||||
accesses, we can rearrange the statements in all the nodes
|
||||
according to the order of the loads. */
|
||||
vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
|
||||
load_permutation);
|
||||
VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* FORNOW: the only supported permutation is 0..01..1.. of length equal to
|
||||
GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
|
||||
well. */
|
||||
well (unless it's reduction). */
|
||||
if (VEC_length (int, load_permutation)
|
||||
!= (unsigned int) (group_size * group_size))
|
||||
return false;
|
||||
|
@ -896,17 +1015,28 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
slp_tree node = XNEW (struct _slp_tree);
|
||||
unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
|
||||
unsigned int unrolling_factor = 1, nunits;
|
||||
tree vectype, scalar_type;
|
||||
tree vectype, scalar_type = NULL_TREE;
|
||||
gimple next;
|
||||
unsigned int vectorization_factor = 0;
|
||||
int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
|
||||
int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
|
||||
unsigned int max_nunits = 0;
|
||||
VEC (int, heap) *load_permutation;
|
||||
VEC (slp_tree, heap) *loads;
|
||||
struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
|
||||
|
||||
if (dr)
|
||||
{
|
||||
scalar_type = TREE_TYPE (DR_REF (dr));
|
||||
vectype = get_vectype_for_scalar_type (scalar_type);
|
||||
group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
|
||||
}
|
||||
else
|
||||
{
|
||||
gcc_assert (loop_vinfo);
|
||||
vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
|
||||
group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
|
||||
}
|
||||
|
||||
scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
|
||||
vinfo_for_stmt (stmt))));
|
||||
vectype = get_vectype_for_scalar_type (scalar_type);
|
||||
if (!vectype)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
|
@ -914,6 +1044,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
|
||||
print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -938,11 +1069,29 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
/* Create a node (a root of the SLP tree) for the packed strided stores. */
|
||||
SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
|
||||
next = stmt;
|
||||
/* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
|
||||
while (next)
|
||||
if (dr)
|
||||
{
|
||||
VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
|
||||
next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
|
||||
/* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
|
||||
while (next)
|
||||
{
|
||||
VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
|
||||
next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Collect reduction statements. */
|
||||
for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
|
||||
next);
|
||||
i++)
|
||||
{
|
||||
VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
{
|
||||
fprintf (vect_dump, "pushing reduction into node: ");
|
||||
print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SLP_TREE_VEC_STMTS (node) = NULL;
|
||||
|
@ -1035,7 +1184,7 @@ bool
|
|||
vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
||||
{
|
||||
unsigned int i;
|
||||
VEC (gimple, heap) *strided_stores;
|
||||
VEC (gimple, heap) *strided_stores, *reductions = NULL;
|
||||
gimple store;
|
||||
bool ok = false;
|
||||
|
||||
|
@ -1043,10 +1192,14 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
|||
fprintf (vect_dump, "=== vect_analyze_slp ===");
|
||||
|
||||
if (loop_vinfo)
|
||||
strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
|
||||
{
|
||||
strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
|
||||
reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
|
||||
}
|
||||
else
|
||||
strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
|
||||
|
||||
/* Find SLP sequences starting from groups of strided stores. */
|
||||
for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
|
||||
if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
|
||||
ok = true;
|
||||
|
@ -1059,6 +1212,12 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Find SLP sequences starting from groups of reductions. */
|
||||
if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo))
|
||||
&& vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
|
||||
VEC_index (gimple, reductions, 0)))
|
||||
ok = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1120,7 +1279,10 @@ vect_detect_hybrid_slp_stmts (slp_tree node)
|
|||
if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
|
||||
&& !STMT_SLP_TYPE (stmt_vinfo)
|
||||
&& (STMT_VINFO_RELEVANT (stmt_vinfo)
|
||||
|| VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))))
|
||||
|| VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
|
||||
&& !(gimple_code (use_stmt) == GIMPLE_PHI
|
||||
&& STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
|
||||
== vect_reduction_def))
|
||||
vect_mark_slp_stmts (node, hybrid, i);
|
||||
|
||||
vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
|
||||
|
@ -1429,11 +1591,14 @@ vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
|
|||
/* For constant and loop invariant defs of SLP_NODE this function returns
|
||||
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
|
||||
OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
|
||||
stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
|
||||
stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
|
||||
REDUC_INDEX is the index of the reduction operand in the statements, unless
|
||||
it is -1. */
|
||||
|
||||
static void
|
||||
vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
|
||||
unsigned int op_num, unsigned int number_of_vectors)
|
||||
unsigned int op_num, unsigned int number_of_vectors,
|
||||
int reduc_index)
|
||||
{
|
||||
VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
|
||||
gimple stmt = VEC_index (gimple, stmts, 0);
|
||||
|
@ -1449,6 +1614,50 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
|
|||
int number_of_copies = 1;
|
||||
VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
|
||||
bool constant_p, is_store;
|
||||
tree neutral_op = NULL;
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
|
||||
{
|
||||
enum tree_code code = gimple_assign_rhs_code (stmt);
|
||||
if (reduc_index == -1)
|
||||
{
|
||||
VEC_free (tree, heap, *vec_oprnds);
|
||||
return;
|
||||
}
|
||||
|
||||
op_num = reduc_index - 1;
|
||||
op = gimple_op (stmt, op_num + 1);
|
||||
/* For additional copies (see the explanation of NUMBER_OF_COPIES below)
|
||||
we need either neutral operands or the original operands. See
|
||||
get_initial_def_for_reduction() for details. */
|
||||
switch (code)
|
||||
{
|
||||
case WIDEN_SUM_EXPR:
|
||||
case DOT_PROD_EXPR:
|
||||
case PLUS_EXPR:
|
||||
case MINUS_EXPR:
|
||||
case BIT_IOR_EXPR:
|
||||
case BIT_XOR_EXPR:
|
||||
if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
|
||||
neutral_op = build_real (TREE_TYPE (op), dconst0);
|
||||
else
|
||||
neutral_op = build_int_cst (TREE_TYPE (op), 0);
|
||||
|
||||
break;
|
||||
|
||||
case MULT_EXPR:
|
||||
case BIT_AND_EXPR:
|
||||
if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
|
||||
neutral_op = build_real (TREE_TYPE (op), dconst1);
|
||||
else
|
||||
neutral_op = build_int_cst (TREE_TYPE (op), 1);
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
neutral_op = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (STMT_VINFO_DATA_REF (stmt_vinfo))
|
||||
{
|
||||
|
@ -1499,6 +1708,19 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
|
|||
else
|
||||
op = gimple_op (stmt, op_num + 1);
|
||||
|
||||
if (reduc_index != -1)
|
||||
{
|
||||
struct loop *loop = (gimple_bb (stmt))->loop_father;
|
||||
gimple def_stmt = SSA_NAME_DEF_STMT (op);
|
||||
|
||||
gcc_assert (loop);
|
||||
/* Get the def before the loop. */
|
||||
op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
|
||||
loop_preheader_edge (loop));
|
||||
if (j != (number_of_copies - 1) && neutral_op)
|
||||
op = neutral_op;
|
||||
}
|
||||
|
||||
/* Create 'vect_ = {op0,op1,...,opn}'. */
|
||||
t = tree_cons (NULL_TREE, op, t);
|
||||
|
||||
|
@ -1536,8 +1758,25 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
|
|||
to replicate the vectors. */
|
||||
while (number_of_vectors > VEC_length (tree, *vec_oprnds))
|
||||
{
|
||||
for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
|
||||
VEC_quick_push (tree, *vec_oprnds, vop);
|
||||
tree neutral_vec = NULL;
|
||||
|
||||
if (neutral_op)
|
||||
{
|
||||
if (!neutral_vec)
|
||||
{
|
||||
t = NULL;
|
||||
for (i = 0; i < (unsigned) nunits; i++)
|
||||
t = tree_cons (NULL_TREE, neutral_op, t);
|
||||
neutral_vec = build_vector (vector_type, t);
|
||||
}
|
||||
|
||||
VEC_quick_push (tree, *vec_oprnds, neutral_vec);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
|
||||
VEC_quick_push (tree, *vec_oprnds, vop);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1576,7 +1815,7 @@ vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
|
|||
|
||||
void
|
||||
vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
|
||||
VEC (tree,heap) **vec_oprnds1)
|
||||
VEC (tree,heap) **vec_oprnds1, int reduc_index)
|
||||
{
|
||||
gimple first_stmt;
|
||||
enum tree_code code;
|
||||
|
@ -1607,19 +1846,26 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
|
|||
*vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
|
||||
|
||||
/* SLP_NODE corresponds either to a group of stores or to a group of
|
||||
unary/binary operations. We don't call this function for loads. */
|
||||
if (SLP_TREE_LEFT (slp_node))
|
||||
unary/binary operations. We don't call this function for loads.
|
||||
For reduction defs we call vect_get_constant_vectors(), since we are
|
||||
looking for initial loop invariant values. */
|
||||
if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
|
||||
/* The defs are already vectorized. */
|
||||
vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
|
||||
else
|
||||
/* Build vectors from scalar defs. */
|
||||
vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
|
||||
vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
|
||||
reduc_index);
|
||||
|
||||
if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
|
||||
/* Since we don't call this function with loads, this is a group of
|
||||
stores. */
|
||||
return;
|
||||
|
||||
/* For reductions, we only need initial values. */
|
||||
if (reduc_index != -1)
|
||||
return;
|
||||
|
||||
code = gimple_assign_rhs_code (first_stmt);
|
||||
if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
|
||||
return;
|
||||
|
@ -1638,7 +1884,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
|
|||
vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
|
||||
else
|
||||
/* Build vectors from scalar defs. */
|
||||
vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
|
||||
vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2027,22 +2273,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
|||
si = gsi_for_stmt (stmt);
|
||||
|
||||
is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
|
||||
if (is_store)
|
||||
{
|
||||
if (DR_GROUP_FIRST_DR (stmt_info))
|
||||
/* If IS_STORE is TRUE, the vectorization of the
|
||||
interleaving chain was completed - free all the stores in
|
||||
the chain. */
|
||||
vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
|
||||
else
|
||||
/* FORNOW: SLP originates only from strided stores. */
|
||||
gcc_unreachable ();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* FORNOW: SLP originates only from strided stores. */
|
||||
return false;
|
||||
return is_store;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2075,6 +2306,26 @@ vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
|||
fprintf (vect_dump, "vectorizing stmts using SLP.");
|
||||
}
|
||||
|
||||
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
|
||||
{
|
||||
slp_tree root = SLP_INSTANCE_TREE (instance);
|
||||
gimple store;
|
||||
unsigned int j;
|
||||
gimple_stmt_iterator gsi;
|
||||
|
||||
for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
|
||||
&& j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
|
||||
{
|
||||
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
|
||||
break;
|
||||
|
||||
/* Free the attached stmt_vec_info and remove the stmt. */
|
||||
gsi = gsi_for_stmt (store);
|
||||
gsi_remove (&gsi, true);
|
||||
free_stmt_vec_info (store);
|
||||
}
|
||||
}
|
||||
|
||||
return is_store;
|
||||
}
|
||||
|
||||
|
|
|
@ -1134,7 +1134,7 @@ vect_get_vec_defs (tree op0, tree op1, gimple stmt,
|
|||
slp_tree slp_node)
|
||||
{
|
||||
if (slp_node)
|
||||
vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
|
||||
vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1, -1);
|
||||
else
|
||||
{
|
||||
tree vec_oprnd;
|
||||
|
@ -2519,7 +2519,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
/* Handle uses. */
|
||||
if (slp_node)
|
||||
vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
|
||||
vect_get_slp_defs (slp_node, &vec_oprnds0, NULL, -1);
|
||||
else
|
||||
{
|
||||
VEC_free (tree, heap, vec_oprnds0);
|
||||
|
@ -2819,7 +2819,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
if (j == 0)
|
||||
{
|
||||
if (slp_node)
|
||||
vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
|
||||
vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1, -1);
|
||||
else
|
||||
{
|
||||
vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
|
||||
|
@ -3105,7 +3105,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
if (slp)
|
||||
{
|
||||
/* Get vectorized arguments for SLP_NODE. */
|
||||
vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
|
||||
vect_get_slp_defs (slp_node, &vec_oprnds, NULL, -1);
|
||||
|
||||
vec_oprnd = VEC_index (tree, vec_oprnds, 0);
|
||||
}
|
||||
|
@ -4049,7 +4049,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
|
|||
|| vectorizable_load (stmt, NULL, NULL, NULL, NULL)
|
||||
|| vectorizable_call (stmt, NULL, NULL)
|
||||
|| vectorizable_store (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_reduction (stmt, NULL, NULL)
|
||||
|| vectorizable_reduction (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0));
|
||||
else
|
||||
{
|
||||
|
@ -4201,8 +4201,7 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
break;
|
||||
|
||||
case reduc_vec_info_type:
|
||||
gcc_assert (!slp_node);
|
||||
done = vectorizable_reduction (stmt, gsi, &vec_stmt);
|
||||
done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
|
|
|
@ -242,6 +242,9 @@ typedef struct _loop_vec_info {
|
|||
/* The unrolling factor needed to SLP the loop. In case of that pure SLP is
|
||||
applied to the loop, i.e., no unrolling is needed, this is 1. */
|
||||
unsigned slp_unrolling_factor;
|
||||
|
||||
/* Reduction cycles detected in the loop. Used in loop-aware SLP. */
|
||||
VEC (gimple, heap) *reductions;
|
||||
} *loop_vec_info;
|
||||
|
||||
/* Access Functions. */
|
||||
|
@ -266,6 +269,7 @@ typedef struct _loop_vec_info {
|
|||
#define LOOP_VINFO_STRIDED_STORES(L) (L)->strided_stores
|
||||
#define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
|
||||
#define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
|
||||
#define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
|
||||
|
||||
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
|
||||
VEC_length (gimple, (L)->may_misalign_stmts) > 0
|
||||
|
@ -844,7 +848,8 @@ extern void vect_transform_loop (loop_vec_info);
|
|||
extern loop_vec_info vect_analyze_loop_form (struct loop *);
|
||||
extern bool vectorizable_live_operation (gimple, gimple_stmt_iterator *,
|
||||
gimple *);
|
||||
extern bool vectorizable_reduction (gimple, gimple_stmt_iterator *, gimple *);
|
||||
extern bool vectorizable_reduction (gimple, gimple_stmt_iterator *, gimple *,
|
||||
slp_tree);
|
||||
extern bool vectorizable_induction (gimple, gimple_stmt_iterator *, gimple *);
|
||||
extern int vect_estimate_min_profitable_iters (loop_vec_info);
|
||||
extern tree get_initial_def_for_reduction (gimple, tree, tree *);
|
||||
|
@ -862,7 +867,7 @@ extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
|
|||
extern void vect_make_slp_decision (loop_vec_info);
|
||||
extern void vect_detect_hybrid_slp (loop_vec_info);
|
||||
extern void vect_get_slp_defs (slp_tree, VEC (tree,heap) **,
|
||||
VEC (tree,heap) **);
|
||||
VEC (tree,heap) **, int);
|
||||
extern LOC find_bb_location (basic_block);
|
||||
extern bb_vec_info vect_slp_analyze_bb (basic_block);
|
||||
extern void vect_slp_transform_bb (basic_block);
|
||||
|
|
Loading…
Reference in New Issue