tree-vectorizer.h (vectorizable_condition): Add parameters.
* tree-vectorizer.h (vectorizable_condition): Add parameters. * tree-vect-loop.c (vect_is_simple_reduction): Support COND_EXPR. (get_initial_def_for_reduction): Likewise. (vectorizable_reduction): Skip the check of first operand in case of COND_EXPR. Add check that it is outer loop vectorization if nested cycle was detected. Call vectorizable_condition() for COND_EXPR. If reduction epilogue cannot be created do not fail for nested cycles (if it is not double reduction). Assert that there is only one type in the loop in case of COND_EXPR. Call vectorizable_condition() to vectorize COND_EXPR. * tree-vect-stmts.c (vectorizable_condition): Update comment. Add parameters. Allow nested cycles if called from vectorizable_reduction(). Use reduction vector variable if provided. (vect_analyze_stmt): Call vectorizable_reduction() before vectorizable_condition(). (vect_transform_stmt): Update call to vectorizable_condition(). From-SVN: r149806
This commit is contained in:
parent
9e7c935a29
commit
4bbe826276
@ -1,3 +1,22 @@
|
||||
2009-07-20 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* tree-vectorizer.h (vectorizable_condition): Add parameters.
|
||||
* tree-vect-loop.c (vect_is_simple_reduction): Support COND_EXPR.
|
||||
(get_initial_def_for_reduction): Likewise.
|
||||
(vectorizable_reduction): Skip the check of first operand in case
|
||||
of COND_EXPR. Add check that it is outer loop vectorization if
|
||||
nested cycle was detected. Call vectorizable_condition() for
|
||||
COND_EXPR. If reduction epilogue cannot be created do not fail for
|
||||
nested cycles (if it is not double reduction). Assert that there
|
||||
is only one type in the loop in case of COND_EXPR. Call
|
||||
vectorizable_condition() to vectorize COND_EXPR.
|
||||
* tree-vect-stmts.c (vectorizable_condition): Update comment.
|
||||
Add parameters. Allow nested cycles if called from
|
||||
vectorizable_reduction(). Use reduction vector variable if provided.
|
||||
(vect_analyze_stmt): Call vectorizable_reduction() before
|
||||
vectorizable_condition().
|
||||
(vect_transform_stmt): Update call to vectorizable_condition().
|
||||
|
||||
2009-07-20 Christian Bruel <christian.bruel@st.com>
|
||||
|
||||
* config/sh/sh.opt (-mfmovd): Resurrect and document.
|
||||
|
@ -1,3 +1,9 @@
|
||||
2009-07-20 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/vect-cond-1.c, gcc.dg/vect/vect-cond-2.c,
|
||||
gcc.dg/vect/vect-cond-3.c, gcc.dg/vect/vect-cond-4.c,
|
||||
gcc.dg/vect/vect-cond-5.c, gcc.dg/vect/vect-cond-6.c: New tests.
|
||||
|
||||
2009-07-20 Christian Bruel <christian.bruel@st.com>
|
||||
|
||||
* gcc.target/sh/mfmovd.c: New test.
|
||||
|
58
gcc/testsuite/gcc.dg/vect/vect-cond-1.c
Normal file
58
gcc/testsuite/gcc.dg/vect/vect-cond-1.c
Normal file
@ -0,0 +1,58 @@
|
||||
/* { dg-require-effective-target vect_condition } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define M 32
|
||||
#define N 16
|
||||
|
||||
int x_in[M];
|
||||
int x_out[M];
|
||||
int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2};
|
||||
int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024};
|
||||
int check_result[M] = {1024,1024,1024,256,256,256,256,256,256,256,256,128,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48};
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo ()
|
||||
{
|
||||
int j, i, x;
|
||||
int curr_a, next_a;
|
||||
|
||||
for (j = 0; j < M; j++)
|
||||
{
|
||||
x = x_in[j];
|
||||
curr_a = a[0];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
next_a = a[i+1];
|
||||
curr_a = x > c[i] ? curr_a : next_a;
|
||||
}
|
||||
|
||||
x_out[j] = curr_a;
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (j = 0; j < M; j++)
|
||||
x_in[j] = j;
|
||||
|
||||
foo ();
|
||||
|
||||
for (j = 0; j < M; j++)
|
||||
if (x_out[j] != check_result[j])
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
49
gcc/testsuite/gcc.dg/vect/vect-cond-2.c
Normal file
49
gcc/testsuite/gcc.dg/vect/vect-cond-2.c
Normal file
@ -0,0 +1,49 @@
|
||||
/* { dg-require-effective-target vect_condition } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
|
||||
int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2};
|
||||
int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024};
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo (int *x)
|
||||
{
|
||||
int i;
|
||||
int curr_a, flag, next_a;
|
||||
|
||||
curr_a = a[0];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
flag = *x > c[i];
|
||||
next_a = a[i+1];
|
||||
curr_a = flag ? curr_a : next_a;
|
||||
}
|
||||
|
||||
*x = curr_a;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int x = 7;
|
||||
|
||||
check_vect ();
|
||||
|
||||
foo (&x);
|
||||
|
||||
if (x != 256)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The order of computation should not be changed for cond_expr, therefore,
|
||||
it cannot be vectorized in reduction. */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
66
gcc/testsuite/gcc.dg/vect/vect-cond-3.c
Normal file
66
gcc/testsuite/gcc.dg/vect/vect-cond-3.c
Normal file
@ -0,0 +1,66 @@
|
||||
/* { dg-require-effective-target vect_condition } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define M 32
|
||||
#define N 16
|
||||
|
||||
int x_in[M];
|
||||
int x_out_a[M], x_out_b[M];
|
||||
int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2};
|
||||
int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024};
|
||||
int b[N+1] = {17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1};
|
||||
int check_result_a[M] = {1024,1024,1024,256,256,256,256,256,256,256,256,128,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48};
|
||||
int check_result_b[M] = {17,17,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo ()
|
||||
{
|
||||
int j, i, x;
|
||||
int curr_a, flag, next_a, curr_b, next_b;
|
||||
|
||||
for (j = 0; j < M; j++)
|
||||
{
|
||||
x = x_in[j];
|
||||
curr_a = a[0];
|
||||
curr_b = b[0];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
flag = x > c[i];
|
||||
next_a = a[i+1];
|
||||
next_b = b[i+1];
|
||||
curr_a = flag ? curr_a : next_a;
|
||||
curr_b = flag ? next_b : curr_b;
|
||||
}
|
||||
|
||||
x_out_a[j] = curr_a;
|
||||
x_out_b[j] = curr_b;
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (j = 0; j < M; j++)
|
||||
x_in[j] = j;
|
||||
|
||||
foo ();
|
||||
|
||||
for (j = 0; j < M; j++)
|
||||
if (x_out_a[j] != check_result_a[j]
|
||||
|| x_out_b[j] != check_result_b[j])
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
63
gcc/testsuite/gcc.dg/vect/vect-cond-4.c
Normal file
63
gcc/testsuite/gcc.dg/vect/vect-cond-4.c
Normal file
@ -0,0 +1,63 @@
|
||||
/* { dg-require-effective-target vect_condition } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define M 32
|
||||
#define N 16
|
||||
|
||||
int x_in[M];
|
||||
int x_out_a[M], x_out_b[M];
|
||||
int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2};
|
||||
int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024};
|
||||
int b[N+1] = {17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1};
|
||||
int check_result_a[M] = {1024,1024,1024,256,256,256,256,256,256,256,256,128,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48};
|
||||
int check_result_b[M] = {17,17,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo (int z)
|
||||
{
|
||||
int j, i, x;
|
||||
int curr_a, flag, next_a, curr_b, next_b;
|
||||
|
||||
for (j = 0; j < M; j++)
|
||||
{
|
||||
x = x_in[j];
|
||||
curr_a = a[0];
|
||||
curr_b = b[0];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
curr_a = x > c[i] ? curr_a : z;
|
||||
curr_b = x > c[i] ? next_b : 5;
|
||||
}
|
||||
|
||||
x_out_a[j] = curr_a;
|
||||
x_out_b[j] = curr_b;
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (j = 0; j < M; j++)
|
||||
x_in[j] = j;
|
||||
|
||||
foo (125);
|
||||
|
||||
for (j = 0; j < M; j++)
|
||||
if (x_out_a[j] != 125
|
||||
|| x_out_b[j] != 5)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
62
gcc/testsuite/gcc.dg/vect/vect-cond-5.c
Normal file
62
gcc/testsuite/gcc.dg/vect/vect-cond-5.c
Normal file
@ -0,0 +1,62 @@
|
||||
/* { dg-require-effective-target vect_condition } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define K 32
|
||||
|
||||
int cond_array[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
int a[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
int out[K];
|
||||
int check_result[K] = {2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo (int c)
|
||||
{
|
||||
int res, i, j, k, next;
|
||||
|
||||
for (k = 0; k < K; k++)
|
||||
{
|
||||
res = 0;
|
||||
for (j = 0; j < K; j++)
|
||||
for (i = 0; i < K; i++)
|
||||
{
|
||||
next = a[i][j];
|
||||
res = c > cond_array[i+k][j] ? next : res;
|
||||
}
|
||||
|
||||
out[k] = res;
|
||||
}
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
int i, j, k;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (j = 0; j < K; j++)
|
||||
{
|
||||
for (i = 0; i < 2*K; i++)
|
||||
cond_array[i][j] = i+j;
|
||||
|
||||
for (i = 0; i < K; i++)
|
||||
a[i][j] = i+2;
|
||||
}
|
||||
|
||||
foo(5);
|
||||
|
||||
for (k = 0; k < K; k++)
|
||||
if (out[k] != check_result[k])
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Double reduction with cond_expr is not supported, since eventhough the order
|
||||
of computation is the same, but vector results should be reduced to scalar
|
||||
result, which can'be done for cond_expr. */
|
||||
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
60
gcc/testsuite/gcc.dg/vect/vect-cond-6.c
Normal file
60
gcc/testsuite/gcc.dg/vect/vect-cond-6.c
Normal file
@ -0,0 +1,60 @@
|
||||
/* { dg-require-effective-target vect_condition } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define K 32
|
||||
|
||||
int cond_array[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
int a[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
int out[K];
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo (int c)
|
||||
{
|
||||
int res, i, j, k, next;
|
||||
|
||||
for (k = 0; k < K; k++)
|
||||
{
|
||||
for (j = 0; j < K; j++)
|
||||
{
|
||||
res = 0;
|
||||
for (i = 0; i < K; i++)
|
||||
{
|
||||
next = a[i][j];
|
||||
res = c > cond_array[i+k][j] ? next : res;
|
||||
}
|
||||
|
||||
out[j] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
int i, j, k;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (j = 0; j < K; j++)
|
||||
{
|
||||
for (i = 0; i < 2*K; i++)
|
||||
cond_array[i][j] = i+j;
|
||||
|
||||
for (i = 0; i < K; i++)
|
||||
a[i][j] = i+2;
|
||||
}
|
||||
|
||||
foo(125);
|
||||
|
||||
for (k = 0; k < K; k++)
|
||||
if (out[k] != 33)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -1568,9 +1568,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
|
||||
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
|
||||
edge latch_e = loop_latch_edge (loop);
|
||||
tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
|
||||
gimple def_stmt, def1, def2;
|
||||
gimple def_stmt, def1 = NULL, def2 = NULL;
|
||||
enum tree_code code;
|
||||
tree op1, op2;
|
||||
tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
|
||||
tree type;
|
||||
int nloop_uses;
|
||||
tree name;
|
||||
@ -1695,25 +1695,52 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
|
||||
if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
report_vect_op (def_stmt, "reduction: not binary operation: ");
|
||||
return NULL;
|
||||
}
|
||||
if (code != COND_EXPR)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
report_vect_op (def_stmt, "reduction: not binary operation: ");
|
||||
|
||||
op1 = gimple_assign_rhs1 (def_stmt);
|
||||
op2 = gimple_assign_rhs2 (def_stmt);
|
||||
if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
op3 = TREE_OPERAND (TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0), 0);
|
||||
op4 = TREE_OPERAND (TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0), 1);
|
||||
op1 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 1);
|
||||
op2 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 2);
|
||||
|
||||
if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
op1 = gimple_assign_rhs1 (def_stmt);
|
||||
op2 = gimple_assign_rhs2 (def_stmt);
|
||||
|
||||
if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
type = TREE_TYPE (gimple_assign_lhs (def_stmt));
|
||||
if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
|
||||
|| TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
|
||||
if ((TREE_CODE (op1) == SSA_NAME
|
||||
&& TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1)))
|
||||
|| (TREE_CODE (op2) == SSA_NAME
|
||||
&& TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
|
||||
|| (op3 && TREE_CODE (op3) == SSA_NAME
|
||||
&& TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op3)))
|
||||
|| (op4 && TREE_CODE (op4) == SSA_NAME
|
||||
&& TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op4))))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
{
|
||||
@ -1723,7 +1750,15 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
|
||||
print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
|
||||
fprintf (vect_dump, ",");
|
||||
print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
|
||||
if (op3 && op4)
|
||||
{
|
||||
fprintf (vect_dump, ",");
|
||||
print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM);
|
||||
fprintf (vect_dump, ",");
|
||||
print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM);
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1765,9 +1800,14 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
|
||||
1) integer arithmetic and no trapv
|
||||
2) floating point arithmetic, and special flags permit this optimization
|
||||
3) nested cycle (i.e., outer loop vectorization). */
|
||||
def1 = SSA_NAME_DEF_STMT (op1);
|
||||
def2 = SSA_NAME_DEF_STMT (op2);
|
||||
if (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2))
|
||||
if (TREE_CODE (op1) == SSA_NAME)
|
||||
def1 = SSA_NAME_DEF_STMT (op1);
|
||||
|
||||
if (TREE_CODE (op2) == SSA_NAME)
|
||||
def2 = SSA_NAME_DEF_STMT (op2);
|
||||
|
||||
if (code != COND_EXPR
|
||||
&& (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2)))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
report_vect_op (def_stmt, "reduction: no defs for operands: ");
|
||||
@ -1778,28 +1818,31 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
|
||||
the other def is either defined in the loop ("vect_internal_def"),
|
||||
or it's an induction (defined by a loop-header phi-node). */
|
||||
|
||||
if (def2 == phi
|
||||
&& flow_bb_inside_loop_p (loop, gimple_bb (def1))
|
||||
&& (is_gimple_assign (def1)
|
||||
|| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def
|
||||
|| (gimple_code (def1) == GIMPLE_PHI
|
||||
&& STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
|
||||
== vect_internal_def
|
||||
&& !is_loop_header_bb_p (gimple_bb (def1)))))
|
||||
if (def2 && def2 == phi
|
||||
&& (code == COND_EXPR
|
||||
|| (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
|
||||
&& (is_gimple_assign (def1)
|
||||
|| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
|
||||
== vect_induction_def
|
||||
|| (gimple_code (def1) == GIMPLE_PHI
|
||||
&& STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
|
||||
== vect_internal_def
|
||||
&& !is_loop_header_bb_p (gimple_bb (def1)))))))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
report_vect_op (def_stmt, "detected reduction: ");
|
||||
return def_stmt;
|
||||
}
|
||||
else if (def1 == phi
|
||||
&& flow_bb_inside_loop_p (loop, gimple_bb (def2))
|
||||
&& (is_gimple_assign (def2)
|
||||
|| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
|
||||
== vect_induction_def
|
||||
|| (gimple_code (def2) == GIMPLE_PHI
|
||||
&& STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
|
||||
== vect_internal_def
|
||||
&& !is_loop_header_bb_p (gimple_bb (def2)))))
|
||||
else if (def1 && def1 == phi
|
||||
&& (code == COND_EXPR
|
||||
|| (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
|
||||
&& (is_gimple_assign (def2)
|
||||
|| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
|
||||
== vect_induction_def
|
||||
|| (gimple_code (def2) == GIMPLE_PHI
|
||||
&& STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
|
||||
== vect_internal_def
|
||||
&& !is_loop_header_bb_p (gimple_bb (def2)))))))
|
||||
{
|
||||
if (check_reduction)
|
||||
{
|
||||
@ -2584,16 +2627,16 @@ get_initial_def_for_induction (gimple iv_phi)
|
||||
vector of partial results.
|
||||
|
||||
Option1 (adjust in epilog): Initialize the vector as follows:
|
||||
add/bit or/xor: [0,0,...,0,0]
|
||||
mult/bit and: [1,1,...,1,1]
|
||||
min/max: [init_val,init_val,..,init_val,init_val]
|
||||
add/bit or/xor: [0,0,...,0,0]
|
||||
mult/bit and: [1,1,...,1,1]
|
||||
min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
|
||||
and when necessary (e.g. add/mult case) let the caller know
|
||||
that it needs to adjust the result by init_val.
|
||||
|
||||
Option2: Initialize the vector as follows:
|
||||
add/bit or/xor: [init_val,0,0,...,0]
|
||||
mult/bit and: [init_val,1,1,...,1]
|
||||
min/max: [init_val,init_val,...,init_val]
|
||||
add/bit or/xor: [init_val,0,0,...,0]
|
||||
mult/bit and: [init_val,1,1,...,1]
|
||||
min/max/cond_expr: [init_val,init_val,...,init_val]
|
||||
and no adjustments are needed.
|
||||
|
||||
For example, for the following code:
|
||||
@ -2726,6 +2769,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
|
||||
|
||||
case MIN_EXPR:
|
||||
case MAX_EXPR:
|
||||
case COND_EXPR:
|
||||
if (adjustment_def)
|
||||
{
|
||||
*adjustment_def = NULL_TREE;
|
||||
@ -3413,7 +3457,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
stmt_vec_info prev_stmt_info, prev_phi_info;
|
||||
gimple first_phi = NULL;
|
||||
bool single_defuse_cycle = false;
|
||||
tree reduc_def;
|
||||
tree reduc_def = NULL_TREE;
|
||||
gimple new_stmt = NULL;
|
||||
int j;
|
||||
tree ops[3];
|
||||
@ -3522,6 +3566,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
reduction variable. */
|
||||
for (i = 0; i < op_type-1; i++)
|
||||
{
|
||||
/* The condition of COND_EXPR is checked in vectorizable_condition(). */
|
||||
if (i == 0 && code == COND_EXPR)
|
||||
continue;
|
||||
|
||||
is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, NULL, &def_stmt,
|
||||
&def, &dt);
|
||||
gcc_assert (is_simple_use);
|
||||
@ -3529,7 +3577,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
&& dt != vect_external_def
|
||||
&& dt != vect_constant_def
|
||||
&& dt != vect_induction_def
|
||||
&& dt != vect_nested_cycle)
|
||||
&& !(dt == vect_nested_cycle && nested_cycle))
|
||||
return false;
|
||||
|
||||
if (dt == vect_nested_cycle)
|
||||
@ -3564,37 +3612,56 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
|
||||
return false;
|
||||
|
||||
/* 4. Supportable by target? */
|
||||
|
||||
/* 4.1. check support for the operation in the loop */
|
||||
optab = optab_for_tree_code (code, vectype, optab_default);
|
||||
if (!optab)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "no optab.");
|
||||
return false;
|
||||
}
|
||||
vec_mode = TYPE_MODE (vectype);
|
||||
if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "op not supported by target.");
|
||||
if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
|
||||
|| LOOP_VINFO_VECT_FACTOR (loop_vinfo)
|
||||
< vect_min_worthwhile_factor (code))
|
||||
return false;
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "proceeding using word mode.");
|
||||
}
|
||||
|
||||
/* Worthwhile without SIMD support? */
|
||||
if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|
||||
&& LOOP_VINFO_VECT_FACTOR (loop_vinfo)
|
||||
< vect_min_worthwhile_factor (code))
|
||||
if (code == COND_EXPR)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "not worthwhile without SIMD support.");
|
||||
return false;
|
||||
if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "unsupported condition in reduction");
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* 4. Supportable by target? */
|
||||
|
||||
/* 4.1. check support for the operation in the loop */
|
||||
optab = optab_for_tree_code (code, vectype, optab_default);
|
||||
if (!optab)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "no optab.");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "op not supported by target.");
|
||||
|
||||
if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
|
||||
|| LOOP_VINFO_VECT_FACTOR (loop_vinfo)
|
||||
< vect_min_worthwhile_factor (code))
|
||||
return false;
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "proceeding using word mode.");
|
||||
}
|
||||
|
||||
/* Worthwhile without SIMD support? */
|
||||
if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|
||||
&& LOOP_VINFO_VECT_FACTOR (loop_vinfo)
|
||||
< vect_min_worthwhile_factor (code))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "not worthwhile without SIMD support.");
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* 4.2. Check support for the epilog operation.
|
||||
@ -3656,26 +3723,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
orig_code = code;
|
||||
}
|
||||
|
||||
if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
|
||||
return false;
|
||||
|
||||
reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype,
|
||||
optab_default);
|
||||
if (!reduc_optab)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "no optab for reduction.");
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
}
|
||||
|
||||
if (reduc_optab
|
||||
&& optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "reduc op not supported by target.");
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
}
|
||||
|
||||
if (nested_cycle)
|
||||
{
|
||||
def_bb = gimple_bb (reduc_def_stmt);
|
||||
@ -3692,6 +3739,40 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
double_reduc = true;
|
||||
}
|
||||
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
|
||||
{
|
||||
reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype,
|
||||
optab_default);
|
||||
if (!reduc_optab)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "no optab for reduction.");
|
||||
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
}
|
||||
|
||||
if (reduc_optab
|
||||
&& optab_handler (reduc_optab, vec_mode)->insn_code
|
||||
== CODE_FOR_nothing)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "reduc op not supported by target.");
|
||||
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!nested_cycle || double_reduc)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "no reduc code for scalar code.");
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (double_reduc && ncopies > 1)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
@ -3713,6 +3794,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "transform reduction.");
|
||||
|
||||
/* FORNOW: Multiple types are not supported for condition. */
|
||||
if (code == COND_EXPR)
|
||||
gcc_assert (ncopies == 1);
|
||||
|
||||
/* Create the destination vector */
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
|
||||
@ -3761,8 +3846,19 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
new_phi = create_phi_node (vec_dest, loop->header);
|
||||
set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo,
|
||||
NULL));
|
||||
/* Get the vector def for the reduction variable from the phi
|
||||
node. */
|
||||
reduc_def = PHI_RESULT (new_phi);
|
||||
}
|
||||
|
||||
if (code == COND_EXPR)
|
||||
{
|
||||
first_phi = new_phi;
|
||||
vectorizable_condition (stmt, gsi, vec_stmt, reduc_def, reduc_index);
|
||||
/* Multiple types are not supported for condition. */
|
||||
break;
|
||||
}
|
||||
|
||||
/* Handle uses. */
|
||||
if (j == 0)
|
||||
{
|
||||
@ -3780,7 +3876,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
|
||||
/* Get the vector def for the reduction variable from the phi
|
||||
node. */
|
||||
reduc_def = PHI_RESULT (new_phi);
|
||||
first_phi = new_phi;
|
||||
}
|
||||
else
|
||||
@ -3798,8 +3893,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
|
||||
}
|
||||
|
||||
|
||||
/* Arguments are ready. create the new vector stmt. */
|
||||
/* Arguments are ready. Create the new vector stmt. */
|
||||
if (op_type == binary_op)
|
||||
{
|
||||
if (reduc_index == 0)
|
||||
@ -3827,18 +3921,19 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
|
||||
if (j == 0)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
prev_phi_info = vinfo_for_stmt (new_phi);
|
||||
}
|
||||
|
||||
/* Finalize the reduction-phi (set its arguments) and create the
|
||||
epilog reduction code. */
|
||||
if (!single_defuse_cycle)
|
||||
if (!single_defuse_cycle || code == COND_EXPR)
|
||||
new_temp = gimple_assign_lhs (*vec_stmt);
|
||||
|
||||
vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies,
|
||||
|
@ -3775,13 +3775,17 @@ vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
|
||||
Check if STMT is conditional modify expression that can be vectorized.
|
||||
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
|
||||
stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
|
||||
at BSI.
|
||||
at GSI.
|
||||
|
||||
When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
|
||||
to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
|
||||
else caluse if it is 2).
|
||||
|
||||
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
|
||||
|
||||
static bool
|
||||
bool
|
||||
vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
gimple *vec_stmt)
|
||||
gimple *vec_stmt, tree reduc_def, int reduc_index)
|
||||
{
|
||||
tree scalar_dest = NULL_TREE;
|
||||
tree vec_dest = NULL_TREE;
|
||||
@ -3810,7 +3814,9 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
if (!STMT_VINFO_RELEVANT_P (stmt_info))
|
||||
return false;
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
|
||||
&& !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
|
||||
&& reduc_def))
|
||||
return false;
|
||||
|
||||
/* FORNOW: SLP not supported. */
|
||||
@ -3818,7 +3824,7 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
return false;
|
||||
|
||||
/* FORNOW: not yet supported. */
|
||||
if (STMT_VINFO_LIVE_P (stmt_info))
|
||||
if (STMT_VINFO_LIVE_P (stmt_info))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "value used after loop.");
|
||||
@ -3892,8 +3898,14 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
|
||||
vec_cond_rhs =
|
||||
vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
|
||||
vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
|
||||
vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
|
||||
if (reduc_index == 1)
|
||||
vec_then_clause = reduc_def;
|
||||
else
|
||||
vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
|
||||
if (reduc_index == 2)
|
||||
vec_else_clause = reduc_def;
|
||||
else
|
||||
vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
|
||||
|
||||
/* Arguments are ready. Create the new vector stmt. */
|
||||
vec_compare = build2 (TREE_CODE (cond_expr), vectype,
|
||||
@ -4023,8 +4035,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
|
||||
|| vectorizable_load (stmt, NULL, NULL, NULL, NULL)
|
||||
|| vectorizable_call (stmt, NULL, NULL)
|
||||
|| vectorizable_store (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_condition (stmt, NULL, NULL)
|
||||
|| vectorizable_reduction (stmt, NULL, NULL));
|
||||
|| vectorizable_reduction (stmt, NULL, NULL)
|
||||
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0));
|
||||
else
|
||||
{
|
||||
if (bb_vinfo)
|
||||
@ -4165,7 +4177,7 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
|
||||
case condition_vec_info_type:
|
||||
gcc_assert (!slp_node);
|
||||
done = vectorizable_condition (stmt, gsi, &vec_stmt);
|
||||
done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
|
@ -786,7 +786,9 @@ extern bool vect_transform_stmt (gimple, gimple_stmt_iterator *,
|
||||
bool *, slp_tree, slp_instance);
|
||||
extern void vect_remove_stores (gimple);
|
||||
extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
|
||||
|
||||
extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
|
||||
tree, int);
|
||||
|
||||
/* In tree-vect-data-refs.c. */
|
||||
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
|
||||
extern enum dr_alignment_support vect_supportable_dr_alignment
|
||||
|
Loading…
Reference in New Issue
Block a user