diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e3818f3ba86..7e349d5f202 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2009-07-20 Ira Rosen + + * tree-vectorizer.h (vectorizable_condition): Add parameters. + * tree-vect-loop.c (vect_is_simple_reduction): Support COND_EXPR. + (get_initial_def_for_reduction): Likewise. + (vectorizable_reduction): Skip the check of first operand in case + of COND_EXPR. Add check that it is outer loop vectorization if + nested cycle was detected. Call vectorizable_condition() for + COND_EXPR. If reduction epilogue cannot be created do not fail for + nested cycles (if it is not double reduction). Assert that there + is only one type in the loop in case of COND_EXPR. Call + vectorizable_condition() to vectorize COND_EXPR. + * tree-vect-stmts.c (vectorizable_condition): Update comment. + Add parameters. Allow nested cycles if called from + vectorizable_reduction(). Use reduction vector variable if provided. + (vect_analyze_stmt): Call vectorizable_reduction() before + vectorizable_condition(). + (vect_transform_stmt): Update call to vectorizable_condition(). + 2009-07-20 Christian Bruel * config/sh/sh.opt (-mfmovd): Resurrect and document. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4bc5b0a27fc..daac3e82e4e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2009-07-20 Ira Rosen + + * gcc.dg/vect/vect-cond-1.c, gcc.dg/vect/vect-cond-2.c, + gcc.dg/vect/vect-cond-3.c, gcc.dg/vect/vect-cond-4.c, + gcc.dg/vect/vect-cond-5.c, gcc.dg/vect/vect-cond-6.c: New tests. + 2009-07-20 Christian Bruel * gcc.target/sh/mfmovd.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-1.c b/gcc/testsuite/gcc.dg/vect/vect-cond-1.c new file mode 100644 index 00000000000..4ee67132a82 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-1.c @@ -0,0 +1,58 @@ +/* { dg-require-effective-target vect_condition } */ + +#include +#include +#include "tree-vect.h" + +#define M 32 +#define N 16 + +int x_in[M]; +int x_out[M]; +int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2}; +int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024}; +int check_result[M] = {1024,1024,1024,256,256,256,256,256,256,256,256,128,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48}; + +__attribute__ ((noinline)) void +foo () +{ + int j, i, x; + int curr_a, next_a; + + for (j = 0; j < M; j++) + { + x = x_in[j]; + curr_a = a[0]; + + for (i = 0; i < N; i++) + { + next_a = a[i+1]; + curr_a = x > c[i] ? curr_a : next_a; + } + + x_out[j] = curr_a; + } +} + +int main (void) +{ + int i,j; + + check_vect (); + + for (j = 0; j < M; j++) + x_in[j] = j; + + foo (); + + for (j = 0; j < M; j++) + if (x_out[j] != check_result[j]) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-2.c b/gcc/testsuite/gcc.dg/vect/vect-cond-2.c new file mode 100644 index 00000000000..c4dc5abaabc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-2.c @@ -0,0 +1,49 @@ +/* { dg-require-effective-target vect_condition } */ + +#include +#include +#include "tree-vect.h" + +#define N 16 + +int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2}; +int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024}; + +__attribute__ ((noinline)) void +foo (int *x) +{ + int i; + int curr_a, flag, next_a; + + curr_a = a[0]; + + for (i = 0; i < N; i++) + { + flag = *x > c[i]; + next_a = a[i+1]; + curr_a = flag ? curr_a : next_a; + } + + *x = curr_a; +} + +int main (void) +{ + int x = 7; + + check_vect (); + + foo (&x); + + if (x != 256) + abort (); + + return 0; +} + +/* The order of computation should not be changed for cond_expr, therefore, + it cannot be vectorized in reduction. */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-3.c b/gcc/testsuite/gcc.dg/vect/vect-cond-3.c new file mode 100644 index 00000000000..56cfbb2e068 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-3.c @@ -0,0 +1,66 @@ +/* { dg-require-effective-target vect_condition } */ + +#include +#include +#include "tree-vect.h" + +#define M 32 +#define N 16 + +int x_in[M]; +int x_out_a[M], x_out_b[M]; +int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2}; +int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024}; +int b[N+1] = {17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1}; +int check_result_a[M] = {1024,1024,1024,256,256,256,256,256,256,256,256,128,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48}; +int check_result_b[M] = {17,17,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; + +__attribute__ ((noinline)) void +foo () +{ + int j, i, x; + int curr_a, flag, next_a, curr_b, next_b; + + for (j = 0; j < M; j++) + { + x = x_in[j]; + curr_a = a[0]; + curr_b = b[0]; + + for (i = 0; i < N; i++) + { + flag = x > c[i]; + next_a = a[i+1]; + next_b = b[i+1]; + curr_a = flag ? curr_a : next_a; + curr_b = flag ? next_b : curr_b; + } + + x_out_a[j] = curr_a; + x_out_b[j] = curr_b; + } +} + +int main (void) +{ + int i,j; + + check_vect (); + + for (j = 0; j < M; j++) + x_in[j] = j; + + foo (); + + for (j = 0; j < M; j++) + if (x_out_a[j] != check_result_a[j] + || x_out_b[j] != check_result_b[j]) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-4.c b/gcc/testsuite/gcc.dg/vect/vect-cond-4.c new file mode 100644 index 00000000000..c3a1585839c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-4.c @@ -0,0 +1,63 @@ +/* { dg-require-effective-target vect_condition } */ + +#include +#include +#include "tree-vect.h" + +#define M 32 +#define N 16 + +int x_in[M]; +int x_out_a[M], x_out_b[M]; +int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2}; +int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024}; +int b[N+1] = {17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1}; +int check_result_a[M] = {1024,1024,1024,256,256,256,256,256,256,256,256,128,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48}; +int check_result_b[M] = {17,17,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; + +__attribute__ ((noinline)) void +foo (int z) +{ + int j, i, x; + int curr_a, flag, next_a, curr_b, next_b; + + for (j = 0; j < M; j++) + { + x = x_in[j]; + curr_a = a[0]; + curr_b = b[0]; + + for (i = 0; i < N; i++) + { + curr_a = x > c[i] ? curr_a : z; + curr_b = x > c[i] ? next_b : 5; + } + + x_out_a[j] = curr_a; + x_out_b[j] = curr_b; + } +} + +int main (void) +{ + int i,j; + + check_vect (); + + for (j = 0; j < M; j++) + x_in[j] = j; + + foo (125); + + for (j = 0; j < M; j++) + if (x_out_a[j] != 125 + || x_out_b[j] != 5) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-5.c b/gcc/testsuite/gcc.dg/vect/vect-cond-5.c new file mode 100644 index 00000000000..0996a92d18d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-5.c @@ -0,0 +1,62 @@ +/* { dg-require-effective-target vect_condition } */ + +#include +#include +#include "tree-vect.h" + +#define K 32 + +int cond_array[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int a[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int out[K]; +int check_result[K] = {2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + +__attribute__ ((noinline)) void +foo (int c) +{ + int res, i, j, k, next; + + for (k = 0; k < K; k++) + { + res = 0; + for (j = 0; j < K; j++) + for (i = 0; i < K; i++) + { + next = a[i][j]; + res = c > cond_array[i+k][j] ? next : res; + } + + out[k] = res; + } +} + +int main () +{ + int i, j, k; + + check_vect (); + + for (j = 0; j < K; j++) + { + for (i = 0; i < 2*K; i++) + cond_array[i][j] = i+j; + + for (i = 0; i < K; i++) + a[i][j] = i+2; + } + + foo(5); + + for (k = 0; k < K; k++) + if (out[k] != check_result[k]) + abort (); + + return 0; +} + +/* Double reduction with cond_expr is not supported, since eventhough the order + of computation is the same, but vector results should be reduced to scalar + result, which can'be done for cond_expr. */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-6.c b/gcc/testsuite/gcc.dg/vect/vect-cond-6.c new file mode 100644 index 00000000000..e5e93919b3a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-6.c @@ -0,0 +1,60 @@ +/* { dg-require-effective-target vect_condition } */ + +#include +#include +#include "tree-vect.h" + +#define K 32 + +int cond_array[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int a[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int out[K]; + +__attribute__ ((noinline)) void +foo (int c) +{ + int res, i, j, k, next; + + for (k = 0; k < K; k++) + { + for (j = 0; j < K; j++) + { + res = 0; + for (i = 0; i < K; i++) + { + next = a[i][j]; + res = c > cond_array[i+k][j] ? next : res; + } + + out[j] = res; + } + } +} + +int main () +{ + int i, j, k; + + check_vect (); + + for (j = 0; j < K; j++) + { + for (i = 0; i < 2*K; i++) + cond_array[i][j] = i+j; + + for (i = 0; i < K; i++) + a[i][j] = i+2; + } + + foo(125); + + for (k = 0; k < K; k++) + if (out[k] != 33) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 1db80e43efd..b7b9d7893e5 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1568,9 +1568,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); edge latch_e = loop_latch_edge (loop); tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); - gimple def_stmt, def1, def2; + gimple def_stmt, def1 = NULL, def2 = NULL; enum tree_code code; - tree op1, op2; + tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; tree type; int nloop_uses; tree name; @@ -1695,25 +1695,52 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, return NULL; } - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) + if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) { - if (vect_print_dump_info (REPORT_DETAILS)) - report_vect_op (def_stmt, "reduction: not binary operation: "); - return NULL; - } + if (code != COND_EXPR) + { + if (vect_print_dump_info (REPORT_DETAILS)) + report_vect_op (def_stmt, "reduction: not binary operation: "); - op1 = gimple_assign_rhs1 (def_stmt); - op2 = gimple_assign_rhs2 (def_stmt); - if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME) - { - if (vect_print_dump_info (REPORT_DETAILS)) - report_vect_op (def_stmt, "reduction: uses not ssa_names: "); - return NULL; + return NULL; + } + + op3 = TREE_OPERAND (TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0), 0); + op4 = TREE_OPERAND (TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0), 1); + op1 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 1); + op2 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 2); + + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) + { + if (vect_print_dump_info (REPORT_DETAILS)) + report_vect_op (def_stmt, "reduction: uses not ssa_names: "); + + return NULL; + } } + else + { + op1 = gimple_assign_rhs1 (def_stmt); + op2 = gimple_assign_rhs2 (def_stmt); + + if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME) + { + if (vect_print_dump_info (REPORT_DETAILS)) + report_vect_op (def_stmt, "reduction: uses not ssa_names: "); + + return NULL; + } + } type = TREE_TYPE (gimple_assign_lhs (def_stmt)); - if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1)) - || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2))) + if ((TREE_CODE (op1) == SSA_NAME + && TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))) + || (TREE_CODE (op2) == SSA_NAME + && TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2))) + || (op3 && TREE_CODE (op3) == SSA_NAME + && TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op3))) + || (op4 && TREE_CODE (op4) == SSA_NAME + && TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op4)))) { if (vect_print_dump_info (REPORT_DETAILS)) { @@ -1723,7 +1750,15 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM); fprintf (vect_dump, ","); print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM); + if (op3 && op4) + { + fprintf (vect_dump, ","); + print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM); + fprintf (vect_dump, ","); + print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM); + } } + return NULL; } @@ -1765,9 +1800,14 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, 1) integer arithmetic and no trapv 2) floating point arithmetic, and special flags permit this optimization 3) nested cycle (i.e., outer loop vectorization). */ - def1 = SSA_NAME_DEF_STMT (op1); - def2 = SSA_NAME_DEF_STMT (op2); - if (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2)) + if (TREE_CODE (op1) == SSA_NAME) + def1 = SSA_NAME_DEF_STMT (op1); + + if (TREE_CODE (op2) == SSA_NAME) + def2 = SSA_NAME_DEF_STMT (op2); + + if (code != COND_EXPR + && (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2))) { if (vect_print_dump_info (REPORT_DETAILS)) report_vect_op (def_stmt, "reduction: no defs for operands: "); @@ -1778,28 +1818,31 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, the other def is either defined in the loop ("vect_internal_def"), or it's an induction (defined by a loop-header phi-node). */ - if (def2 == phi - && flow_bb_inside_loop_p (loop, gimple_bb (def1)) - && (is_gimple_assign (def1) - || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def - || (gimple_code (def1) == GIMPLE_PHI - && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) - == vect_internal_def - && !is_loop_header_bb_p (gimple_bb (def1))))) + if (def2 && def2 == phi + && (code == COND_EXPR + || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1)) + && (is_gimple_assign (def1) + || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) + == vect_induction_def + || (gimple_code (def1) == GIMPLE_PHI + && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) + == vect_internal_def + && !is_loop_header_bb_p (gimple_bb (def1))))))) { if (vect_print_dump_info (REPORT_DETAILS)) report_vect_op (def_stmt, "detected reduction: "); return def_stmt; } - else if (def1 == phi - && flow_bb_inside_loop_p (loop, gimple_bb (def2)) - && (is_gimple_assign (def2) - || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) - == vect_induction_def - || (gimple_code (def2) == GIMPLE_PHI - && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) - == vect_internal_def - && !is_loop_header_bb_p (gimple_bb (def2))))) + else if (def1 && def1 == phi + && (code == COND_EXPR + || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2)) + && (is_gimple_assign (def2) + || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) + == vect_induction_def + || (gimple_code (def2) == GIMPLE_PHI + && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) + == vect_internal_def + && !is_loop_header_bb_p (gimple_bb (def2))))))) { if (check_reduction) { @@ -2584,16 +2627,16 @@ get_initial_def_for_induction (gimple iv_phi) vector of partial results. Option1 (adjust in epilog): Initialize the vector as follows: - add/bit or/xor: [0,0,...,0,0] - mult/bit and: [1,1,...,1,1] - min/max: [init_val,init_val,..,init_val,init_val] + add/bit or/xor: [0,0,...,0,0] + mult/bit and: [1,1,...,1,1] + min/max/cond_expr: [init_val,init_val,..,init_val,init_val] and when necessary (e.g. add/mult case) let the caller know that it needs to adjust the result by init_val. Option2: Initialize the vector as follows: - add/bit or/xor: [init_val,0,0,...,0] - mult/bit and: [init_val,1,1,...,1] - min/max: [init_val,init_val,...,init_val] + add/bit or/xor: [init_val,0,0,...,0] + mult/bit and: [init_val,1,1,...,1] + min/max/cond_expr: [init_val,init_val,...,init_val] and no adjustments are needed. For example, for the following code: @@ -2726,6 +2769,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, case MIN_EXPR: case MAX_EXPR: + case COND_EXPR: if (adjustment_def) { *adjustment_def = NULL_TREE; @@ -3413,7 +3457,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, stmt_vec_info prev_stmt_info, prev_phi_info; gimple first_phi = NULL; bool single_defuse_cycle = false; - tree reduc_def; + tree reduc_def = NULL_TREE; gimple new_stmt = NULL; int j; tree ops[3]; @@ -3522,6 +3566,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, reduction variable. */ for (i = 0; i < op_type-1; i++) { + /* The condition of COND_EXPR is checked in vectorizable_condition(). */ + if (i == 0 && code == COND_EXPR) + continue; + is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, NULL, &def_stmt, &def, &dt); gcc_assert (is_simple_use); @@ -3529,7 +3577,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, && dt != vect_external_def && dt != vect_constant_def && dt != vect_induction_def - && dt != vect_nested_cycle) + && !(dt == vect_nested_cycle && nested_cycle)) return false; if (dt == vect_nested_cycle) @@ -3564,37 +3612,56 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt))) return false; - /* 4. Supportable by target? */ - - /* 4.1. check support for the operation in the loop */ - optab = optab_for_tree_code (code, vectype, optab_default); - if (!optab) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "no optab."); - return false; - } vec_mode = TYPE_MODE (vectype); - if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "op not supported by target."); - if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD - || LOOP_VINFO_VECT_FACTOR (loop_vinfo) - < vect_min_worthwhile_factor (code)) - return false; - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "proceeding using word mode."); - } - /* Worthwhile without SIMD support? */ - if (!VECTOR_MODE_P (TYPE_MODE (vectype)) - && LOOP_VINFO_VECT_FACTOR (loop_vinfo) - < vect_min_worthwhile_factor (code)) + if (code == COND_EXPR) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "not worthwhile without SIMD support."); - return false; + if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "unsupported condition in reduction"); + + return false; + } + } + else + { + /* 4. Supportable by target? */ + + /* 4.1. check support for the operation in the loop */ + optab = optab_for_tree_code (code, vectype, optab_default); + if (!optab) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "no optab."); + + return false; + } + + if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "op not supported by target."); + + if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD + || LOOP_VINFO_VECT_FACTOR (loop_vinfo) + < vect_min_worthwhile_factor (code)) + return false; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "proceeding using word mode."); + } + + /* Worthwhile without SIMD support? */ + if (!VECTOR_MODE_P (TYPE_MODE (vectype)) + && LOOP_VINFO_VECT_FACTOR (loop_vinfo) + < vect_min_worthwhile_factor (code)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "not worthwhile without SIMD support."); + + return false; + } } /* 4.2. Check support for the epilog operation. @@ -3656,26 +3723,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, orig_code = code; } - if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) - return false; - - reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, - optab_default); - if (!reduc_optab) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "no optab for reduction."); - epilog_reduc_code = ERROR_MARK; - } - - if (reduc_optab - && optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "reduc op not supported by target."); - epilog_reduc_code = ERROR_MARK; - } - if (nested_cycle) { def_bb = gimple_bb (reduc_def_stmt); @@ -3692,6 +3739,40 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, double_reduc = true; } + epilog_reduc_code = ERROR_MARK; + if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) + { + reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, + optab_default); + if (!reduc_optab) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "no optab for reduction."); + + epilog_reduc_code = ERROR_MARK; + } + + if (reduc_optab + && optab_handler (reduc_optab, vec_mode)->insn_code + == CODE_FOR_nothing) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "reduc op not supported by target."); + + epilog_reduc_code = ERROR_MARK; + } + } + else + { + if (!nested_cycle || double_reduc) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "no reduc code for scalar code."); + + return false; + } + } + if (double_reduc && ncopies > 1) { if (vect_print_dump_info (REPORT_DETAILS)) @@ -3713,6 +3794,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "transform reduction."); + /* FORNOW: Multiple types are not supported for condition. */ + if (code == COND_EXPR) + gcc_assert (ncopies == 1); + /* Create the destination vector */ vec_dest = vect_create_destination_var (scalar_dest, vectype); @@ -3761,8 +3846,19 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, new_phi = create_phi_node (vec_dest, loop->header); set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo, NULL)); + /* Get the vector def for the reduction variable from the phi + node. */ + reduc_def = PHI_RESULT (new_phi); } + if (code == COND_EXPR) + { + first_phi = new_phi; + vectorizable_condition (stmt, gsi, vec_stmt, reduc_def, reduc_index); + /* Multiple types are not supported for condition. */ + break; + } + /* Handle uses. */ if (j == 0) { @@ -3780,7 +3876,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, /* Get the vector def for the reduction variable from the phi node. */ - reduc_def = PHI_RESULT (new_phi); first_phi = new_phi; } else @@ -3798,8 +3893,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi; } - - /* Arguments are ready. create the new vector stmt. */ + /* Arguments are ready. Create the new vector stmt. */ if (op_type == binary_op) { if (reduc_index == 0) @@ -3827,18 +3921,19 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, new_temp = make_ssa_name (vec_dest, new_stmt); gimple_assign_set_lhs (new_stmt, new_temp); vect_finish_stmt_generation (stmt, new_stmt, gsi); - + if (j == 0) STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; else STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); prev_phi_info = vinfo_for_stmt (new_phi); } /* Finalize the reduction-phi (set its arguments) and create the epilog reduction code. */ - if (!single_defuse_cycle) + if (!single_defuse_cycle || code == COND_EXPR) new_temp = gimple_assign_lhs (*vec_stmt); vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies, diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 891ee1860f0..8e1c973bba7 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3775,13 +3775,17 @@ vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo) Check if STMT is conditional modify expression that can be vectorized. If VEC_STMT is also passed, vectorize the STMT: create a vectorized stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it - at BSI. + at GSI. + + When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable + to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in + else caluse if it is 2). Return FALSE if not a vectorizable STMT, TRUE otherwise. */ -static bool +bool vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, - gimple *vec_stmt) + gimple *vec_stmt, tree reduc_def, int reduc_index) { tree scalar_dest = NULL_TREE; tree vec_dest = NULL_TREE; @@ -3810,7 +3814,9 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; - if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def + && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + && reduc_def)) return false; /* FORNOW: SLP not supported. */ @@ -3818,7 +3824,7 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, return false; /* FORNOW: not yet supported. */ - if (STMT_VINFO_LIVE_P (stmt_info)) + if (STMT_VINFO_LIVE_P (stmt_info)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "value used after loop."); @@ -3892,8 +3898,14 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL); vec_cond_rhs = vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL); - vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL); - vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL); + if (reduc_index == 1) + vec_then_clause = reduc_def; + else + vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL); + if (reduc_index == 2) + vec_else_clause = reduc_def; + else + vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL); /* Arguments are ready. Create the new vector stmt. */ vec_compare = build2 (TREE_CODE (cond_expr), vectype, @@ -4023,8 +4035,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) || vectorizable_load (stmt, NULL, NULL, NULL, NULL) || vectorizable_call (stmt, NULL, NULL) || vectorizable_store (stmt, NULL, NULL, NULL) - || vectorizable_condition (stmt, NULL, NULL) - || vectorizable_reduction (stmt, NULL, NULL)); + || vectorizable_reduction (stmt, NULL, NULL) + || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); else { if (bb_vinfo) @@ -4165,7 +4177,7 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi, case condition_vec_info_type: gcc_assert (!slp_node); - done = vectorizable_condition (stmt, gsi, &vec_stmt); + done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0); gcc_assert (done); break; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index c7dab10c13f..31e9c185005 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -786,7 +786,9 @@ extern bool vect_transform_stmt (gimple, gimple_stmt_iterator *, bool *, slp_tree, slp_instance); extern void vect_remove_stores (gimple); extern bool vect_analyze_stmt (gimple, bool *, slp_tree); - +extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, + tree, int); + /* In tree-vect-data-refs.c. */ extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); extern enum dr_alignment_support vect_supportable_dr_alignment