From a023975e421f23fc2799a2c7fcc6e19ed68227f7 Mon Sep 17 00:00:00 2001 From: Olga Golovanevsky Date: Thu, 14 Oct 2004 17:21:35 +0000 Subject: [PATCH] 2004-10-14 Olga Golovanevsky * tree-vectorizer.c (vect_generate_tmps_on_preheader): (vect_update_ivs_after_vectorizer): (vect_transform_for_unknown_loop_bound): (tree_duplicate_loop_to_edge): (allocate_new_names): (rename_use_op): (rename_def_op): (rename_variables_in_bb): (free_new_names): (rename_variables_in_loop): (copy_phi_nodes): (update_phis_for_duplicate_loop): (update_phi_nodes_for_guard): (make_loop_iterate_ntimes): (tree_duplicate_loop_to_edge_cfg): (add_loop_guard): (vect_analyze_loop_with_symbolic_num_of_iters): (verify_loop_for_duplication): (vect_gen_niters_for_prolog_loop): (vect_update_niters_after_peeling): (vect_update_inits_of_dr): (vect_update_inits_of_drs): (vect_build_loop_niters): (vect_do_peeling_for_alignment): New functions. (vect_transform_loop): Add unknown and known but indivisible loop bound support; add peeling for unalignment support. (vect_analyze_loop_form): Support symbolic number of iterations. (vect_transform_loop_bound): New input parameter. (vect_get_loop_niters): Change input parameter type. (new_loop_vec_info): LOOP_VINFO_NITERS is tree now. (vectorizable_store): Allow unaligned access. (vectorize_loops): Add rewrite_into_loop_closed_ssa. (vect_analyze_data_refs_alignment): Allowed one unaligned store. * tree-vectorizer.h (LOOP_VINFO_NITERS_KNOWN_P): Redefined to use tree. (LOOP_VINFO_INT_NITERS): New macro. (MAX_NUMBER_OF_UNALIGNED_DATA_REFS): New define. (do_peeling_for_alignment): (unaligned_drs): New members of _loop_vec_info. (LOOP_DO_PEELING_FOR_ALIGNMENT): New macro. From-SVN: r89040 --- gcc/ChangeLog | 44 + gcc/testsuite/ChangeLog | 21 + gcc/testsuite/gcc.dg/vect/vect-26.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-28.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-30.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-31.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-33.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-44.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-46.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-50.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-52.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-54.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-58.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-60.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-64.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-66.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-68.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-69.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-8.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-80.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-none.c | 4 +- gcc/tree-vectorizer.c | 1647 +++++++++++++++++++++++-- gcc/tree-vectorizer.h | 20 +- 23 files changed, 1676 insertions(+), 96 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bd72a60d290..13d6db424d7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,47 @@ +2004-10-14 Olga Golovanevsky + + * tree-vectorizer.c (vect_generate_tmps_on_preheader): + (vect_update_ivs_after_vectorizer): + (vect_transform_for_unknown_loop_bound): + (tree_duplicate_loop_to_edge): + (allocate_new_names): + (rename_use_op): + (rename_def_op): + (rename_variables_in_bb): + (free_new_names): + (rename_variables_in_loop): + (copy_phi_nodes): + (update_phis_for_duplicate_loop): + (update_phi_nodes_for_guard): + (make_loop_iterate_ntimes): + (tree_duplicate_loop_to_edge_cfg): + (add_loop_guard): + (vect_analyze_loop_with_symbolic_num_of_iters): + (verify_loop_for_duplication): + (vect_gen_niters_for_prolog_loop): + (vect_update_niters_after_peeling): + (vect_update_inits_of_dr): + (vect_update_inits_of_drs): + (vect_build_loop_niters): + (vect_do_peeling_for_alignment): New functions. + (vect_transform_loop): Add unknown and known but indivisible loop + bound support; add peeling for unalignment support. + (vect_analyze_loop_form): Support symbolic number of iterations. + (vect_transform_loop_bound): New input parameter. + (vect_get_loop_niters): Change input parameter type. + (new_loop_vec_info): LOOP_VINFO_NITERS is tree now. + (vectorizable_store): Allow unaligned access. + (vectorize_loops): Add rewrite_into_loop_closed_ssa. + (vect_analyze_data_refs_alignment): Allowed one unaligned + store. + * tree-vectorizer.h (LOOP_VINFO_NITERS_KNOWN_P): Redefined + to use tree. + (LOOP_VINFO_INT_NITERS): New macro. + (MAX_NUMBER_OF_UNALIGNED_DATA_REFS): New define. + (do_peeling_for_alignment): + (unaligned_drs): New members of _loop_vec_info. + (LOOP_DO_PEELING_FOR_ALIGNMENT): New macro. + 2004-10-14 Ranjit Mathew * tree.h (TREE_STRING_POINTER): Wrap in "const char *". diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 141024acf07..2919fb34b07 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,24 @@ +2004-10-14 Olga GOlovanevsky + + * testsuite/gcc.dg/vect/vect-28.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-30.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-31.c : Vectorize 4 loops instead of 2. + * testsuite/gcc.dg/vect/vect-33.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-44.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-46.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-50.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-52.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-54.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-58.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-60.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-64.c : Vectorize 3 loops instead of 1. + * testsuite/gcc.dg/vect/vect-66.c : Vectorize 3 loops instead of 2. + * testsuite/gcc.dg/vect/vect-68.c : Vectorize 4 loops instead of 2. + * testsuite/gcc.dg/vect/vect-69.c : Vectorize 4 loops instead of 2. + * testsuite/gcc.dg/vect/vect-8.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-80.c : Remove xfail. + * testsuite/gcc.dg/vect/vect-none.c : Vectorize 1 loops instead of 0. + 2004-10-14 Dorit Naishlos * gcc.dg/vect/vect-82.c: New testcase. diff --git a/gcc/testsuite/gcc.dg/vect/vect-26.c b/gcc/testsuite/gcc.dg/vect/vect-26.c index fd21beddb78..cebfc5a24f5 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-26.c +++ b/gcc/testsuite/gcc.dg/vect/vect-26.c @@ -37,5 +37,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-28.c b/gcc/testsuite/gcc.dg/vect/vect-28.c index cb8d7cb0c76..26ebfc1a6e5 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-28.c +++ b/gcc/testsuite/gcc.dg/vect/vect-28.c @@ -40,5 +40,5 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-30.c b/gcc/testsuite/gcc.dg/vect/vect-30.c index ca42f7b3fb0..b2c18ef81a3 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-30.c +++ b/gcc/testsuite/gcc.dg/vect/vect-30.c @@ -64,4 +64,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-31.c b/gcc/testsuite/gcc.dg/vect/vect-31.c index 4742ca7510f..b15f1569ef4 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-31.c +++ b/gcc/testsuite/gcc.dg/vect/vect-31.c @@ -88,4 +88,4 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-33.c b/gcc/testsuite/gcc.dg/vect/vect-33.c index 137e6982124..8742ef5d243 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-33.c +++ b/gcc/testsuite/gcc.dg/vect/vect-33.c @@ -38,4 +38,4 @@ int main (void) } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-44.c b/gcc/testsuite/gcc.dg/vect/vect-44.c index 61e26030868..3549faf11eb 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-44.c +++ b/gcc/testsuite/gcc.dg/vect/vect-44.c @@ -59,4 +59,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-46.c b/gcc/testsuite/gcc.dg/vect/vect-46.c index ac9aa276479..d28c0151ce9 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-46.c +++ b/gcc/testsuite/gcc.dg/vect/vect-46.c @@ -55,4 +55,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-50.c b/gcc/testsuite/gcc.dg/vect/vect-50.c index 28caf0aa3ea..1104b3fa0c1 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-50.c +++ b/gcc/testsuite/gcc.dg/vect/vect-50.c @@ -54,4 +54,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-52.c b/gcc/testsuite/gcc.dg/vect/vect-52.c index bcb4275edc1..f6204296935 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-52.c +++ b/gcc/testsuite/gcc.dg/vect/vect-52.c @@ -56,4 +56,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-54.c b/gcc/testsuite/gcc.dg/vect/vect-54.c index 801a4c72634..cd322d1063d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-54.c +++ b/gcc/testsuite/gcc.dg/vect/vect-54.c @@ -55,4 +55,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-58.c b/gcc/testsuite/gcc.dg/vect/vect-58.c index f31d237a196..06a9fd29dcb 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-58.c +++ b/gcc/testsuite/gcc.dg/vect/vect-58.c @@ -56,4 +56,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-60.c b/gcc/testsuite/gcc.dg/vect/vect-60.c index 0d7eca1e59a..71d60839505 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-60.c +++ b/gcc/testsuite/gcc.dg/vect/vect-60.c @@ -57,4 +57,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-64.c b/gcc/testsuite/gcc.dg/vect/vect-64.c index eaed89229f8..67a659cfec5 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-64.c +++ b/gcc/testsuite/gcc.dg/vect/vect-64.c @@ -83,4 +83,4 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-66.c b/gcc/testsuite/gcc.dg/vect/vect-66.c index 9773953043a..593a12bbd59 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-66.c +++ b/gcc/testsuite/gcc.dg/vect/vect-66.c @@ -79,4 +79,4 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-68.c b/gcc/testsuite/gcc.dg/vect/vect-68.c index 3812cead7e9..0237e2b78c2 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-68.c +++ b/gcc/testsuite/gcc.dg/vect/vect-68.c @@ -87,4 +87,4 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-69.c b/gcc/testsuite/gcc.dg/vect/vect-69.c index 92b4ef298d5..753371fd563 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-69.c +++ b/gcc/testsuite/gcc.dg/vect/vect-69.c @@ -114,4 +114,4 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-8.c b/gcc/testsuite/gcc.dg/vect/vect-8.c index 7559e602588..d3513f76ba8 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-8.c +++ b/gcc/testsuite/gcc.dg/vect/vect-8.c @@ -39,4 +39,4 @@ int main (void) return main1 (N); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-80.c b/gcc/testsuite/gcc.dg/vect/vect-80.c index 78269d0da7d..39981be4d09 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-80.c +++ b/gcc/testsuite/gcc.dg/vect/vect-80.c @@ -47,4 +47,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-none.c b/gcc/testsuite/gcc.dg/vect/vect-none.c index 8680d21b629..a9d44759d1d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-none.c +++ b/gcc/testsuite/gcc.dg/vect/vect-none.c @@ -189,5 +189,5 @@ foo (int n) } /* { dg-final { scan-tree-dump-times "vectorized " 3 "vect"} } */ -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 3 "vect"} } */ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect"} } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 382139f4e35..d12d140307a 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -139,6 +139,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "cfglayout.h" #include "expr.h" #include "optabs.h" +#include "toplev.h" #include "tree-chrec.h" #include "tree-data-ref.h" #include "tree-scalar-evolution.h" @@ -158,7 +159,7 @@ static bool vect_analyze_operations (loop_vec_info); /* Main code transformation functions. */ static void vect_transform_loop (loop_vec_info, struct loops *); -static void vect_transform_loop_bound (loop_vec_info); +static void vect_transform_loop_bound (loop_vec_info, tree niters); static bool vect_transform_stmt (tree, block_stmt_iterator *); static bool vectorizable_load (tree, block_stmt_iterator *, tree *); static bool vectorizable_store (tree, block_stmt_iterator *, tree *); @@ -173,7 +174,7 @@ static bool exist_non_indexing_operands_for_use_p (tree, tree); static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool); static void vect_mark_relevant (varray_type, tree); static bool vect_stmt_relevant_p (tree, loop_vec_info); -static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *); +static tree vect_get_loop_niters (struct loop *, tree *); static bool vect_compute_data_ref_alignment (struct data_reference *, loop_vec_info); static bool vect_analyze_data_ref_access (struct data_reference *); @@ -181,6 +182,8 @@ static bool vect_get_first_index (tree, tree *); static bool vect_can_force_dr_alignment_p (tree, unsigned int); static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, bool); +static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters, + struct loop *loop); static tree vect_get_base_and_bit_offset (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*); static struct data_reference * vect_analyze_pointer_ref_access @@ -203,9 +206,52 @@ static tree get_vectype_for_scalar_type (tree); static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); static tree vect_get_vec_def_for_operand (tree, tree); static tree vect_init_vector (tree, tree); +static tree vect_build_symbol_bound (tree, int, struct loop *); static void vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); +static void vect_generate_tmps_on_preheader (loop_vec_info, + tree *, tree *, + tree *); +static tree vect_build_loop_niters (loop_vec_info); +static void vect_update_ivs_after_vectorizer (struct loop *, tree); + +/* Loop transformations prior to vectorizeration. */ + +/* Loop transformations entry point function. + It can be used outside of the vectorizer + in case the loop to be manipulated answers conditions specified + in function documentation. */ +struct loop *tree_duplicate_loop_to_edge (struct loop *, + struct loops *, edge, + tree, tree, bool); + +static void allocate_new_names (bitmap); +static void rename_use_op (use_operand_p); +static void rename_def_op (def_operand_p, tree); +static void rename_variables_in_bb (basic_block); +static void free_new_names (bitmap); +static void rename_variables_in_loop (struct loop *); +static void copy_phi_nodes (struct loop *, struct loop *, bool); +static void update_phis_for_duplicate_loop (struct loop *, + struct loop *, + bool after); +static void update_phi_nodes_for_guard (edge, struct loop *); +static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree); +static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *, + struct loops *, + edge); +static edge add_loop_guard (basic_block, tree, basic_block); +static bool verify_loop_for_duplication (struct loop *, bool, edge); + +/* Utilities dealing with loop peeling (not peeling itself). */ +static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); +static void vect_update_niters_after_peeling (loop_vec_info, tree); +static void vect_update_inits_of_dr (struct data_reference *, struct loop *, + tree niters); +static void vect_update_inits_of_drs (loop_vec_info, tree); +static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *); + /* Utilities for creation and deletion of vec_info structs. */ loop_vec_info new_loop_vec_info (struct loop *loop); void destroy_loop_vec_info (loop_vec_info); @@ -214,6 +260,793 @@ stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop); static bool vect_debug_stats (struct loop *loop); static bool vect_debug_details (struct loop *loop); + +/* Utilities to support loop peeling for vectorization purposes. */ + + +/* For each definition in DEFINITIONS this function allocates + new ssa name. */ + +static void +allocate_new_names (bitmap definitions) +{ + unsigned ver; + bitmap_iterator bi; + + EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi) + { + tree def = ssa_name (ver); + tree *new_name_ptr = xmalloc (sizeof (tree)); + + bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def); + + *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def)); + SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal; + + SSA_NAME_AUX (def) = new_name_ptr; + } +} + + +/* Renames the use *OP_P. */ + +static void +rename_use_op (use_operand_p op_p) +{ + tree *new_name_ptr; + + if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME) + return; + + new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p)); + + /* Something defined outside of the loop. */ + if (!new_name_ptr) + return; + + /* An ordinary ssa name defined in the loop. */ + + SET_USE (op_p, *new_name_ptr); +} + + +/* Renames the def *OP_P in statement STMT. */ + +static void +rename_def_op (def_operand_p op_p, tree stmt) +{ + tree *new_name_ptr; + + if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME) + return; + + new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p)); + + /* Something defined outside of the loop. */ + if (!new_name_ptr) + return; + + /* An ordinary ssa name defined in the loop. */ + + SET_DEF (op_p, *new_name_ptr); + SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt; +} + + +/* Renames the variables in basic block BB. */ + +static void +rename_variables_in_bb (basic_block bb) +{ + tree phi; + block_stmt_iterator bsi; + tree stmt; + stmt_ann_t ann; + use_optype uses; + vuse_optype vuses; + def_optype defs; + v_may_def_optype v_may_defs; + v_must_def_optype v_must_defs; + unsigned i; + edge e; + edge_iterator ei; + + for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) + rename_def_op (PHI_RESULT_PTR (phi), phi); + + for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) + { + stmt = bsi_stmt (bsi); + get_stmt_operands (stmt); + ann = stmt_ann (stmt); + + uses = USE_OPS (ann); + for (i = 0; i < NUM_USES (uses); i++) + rename_use_op (USE_OP_PTR (uses, i)); + + defs = DEF_OPS (ann); + for (i = 0; i < NUM_DEFS (defs); i++) + rename_def_op (DEF_OP_PTR (defs, i), stmt); + + vuses = VUSE_OPS (ann); + for (i = 0; i < NUM_VUSES (vuses); i++) + rename_use_op (VUSE_OP_PTR (vuses, i)); + + v_may_defs = V_MAY_DEF_OPS (ann); + for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++) + { + rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i)); + rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt); + } + + v_must_defs = V_MUST_DEF_OPS (ann); + for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++) + rename_def_op (V_MUST_DEF_OP_PTR (v_must_defs, i), stmt); + } + + FOR_EACH_EDGE (e, ei, bb->succs) + for (phi = phi_nodes (e->dest); phi; phi = TREE_CHAIN (phi)) + rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e)); +} + + +/* Releases the structures holding the new ssa names. */ + +static void +free_new_names (bitmap definitions) +{ + unsigned ver; + bitmap_iterator bi; + + EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi) + { + tree def = ssa_name (ver); + + if (SSA_NAME_AUX (def)) + { + free (SSA_NAME_AUX (def)); + SSA_NAME_AUX (def) = NULL; + } + } +} + + +/* Renames variables in new generated LOOP. */ + +static void +rename_variables_in_loop (struct loop *loop) +{ + unsigned i; + basic_block *bbs; + + bbs = get_loop_body (loop); + + for (i = 0; i < loop->num_nodes; i++) + rename_variables_in_bb (bbs[i]); + + free (bbs); +} + + +/* This function copies phis from LOOP header to + NEW_LOOP header. AFTER is as + in update_phis_for_duplicate_loop function. */ + +static void +copy_phi_nodes (struct loop *loop, struct loop *new_loop, + bool after) +{ + tree phi, new_phi, def; + edge new_e; + edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop)); + + /* Second add arguments to newly created phi nodes. */ + for (phi = phi_nodes (loop->header), + new_phi = phi_nodes (new_loop->header); + phi; + phi = TREE_CHAIN (phi), + new_phi = TREE_CHAIN (new_phi)) + { + new_e = loop_preheader_edge (new_loop); + def = PHI_ARG_DEF_FROM_EDGE (phi, e); + add_phi_arg (&new_phi, def, new_e); + } +} + + +/* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP + executes after LOOP, and false if it executes before it. */ + +static void +update_phis_for_duplicate_loop (struct loop *loop, + struct loop *new_loop, bool after) +{ + edge old_latch; + tree *new_name_ptr, new_ssa_name; + tree phi_new, phi_old, def; + edge orig_entry_e = loop_preheader_edge (loop); + + /* Copy phis from loop->header to new_loop->header. */ + copy_phi_nodes (loop, new_loop, after); + + old_latch = loop_latch_edge (loop); + + /* Update PHI args for the new loop latch edge, and + the old loop preheader edge, we know that the PHI nodes + are ordered appropriately in copy_phi_nodes. */ + for (phi_new = phi_nodes (new_loop->header), + phi_old = phi_nodes (loop->header); + phi_new && phi_old; + phi_new = TREE_CHAIN (phi_new), phi_old = TREE_CHAIN (phi_old)) + { + def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch); + + if (TREE_CODE (def) != SSA_NAME) + continue; + + new_name_ptr = SSA_NAME_AUX (def); + + /* Something defined outside of the loop. */ + if (!new_name_ptr) + continue; + + /* An ordinary ssa name defined in the loop. */ + new_ssa_name = *new_name_ptr; + + add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop)); + + /* Update PHI args for the original loop pre-header edge. */ + if (! after) + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e), + new_ssa_name); + } +} + + +/* Update PHI nodes for a guard of the LOOP. + + LOOP is supposed to have a preheader bb at which a guard condition is + located. The true edge of this condition skips the LOOP and ends + at the destination of the (unique) LOOP exit. The loop exit bb is supposed + to be an empty bb (created by this transformation) with one successor. + + This function creates phi nodes at the LOOP exit bb. These phis need to be + created as a result of adding true edge coming from guard. + + FORNOW: Only phis which have corresponding phi nodes at the header of the + LOOP are created. Here we use the assumption that after the LOOP there + are no uses of defs generated in LOOP. + + After the phis creation, the function updates the values of phi nodes at + the LOOP exit successor bb: + + Original loop: + + bb0: loop preheader + goto bb1 + bb1: loop header + if (exit_cond) goto bb3 else goto bb2 + bb2: loop latch + goto bb1 + bb3: + + + After guard creation (the loop before this function): + + bb0: loop preheader + if (guard_condition) goto bb4 else goto bb1 + bb1: loop header + if (exit_cond) goto bb4 else goto bb2 + bb2: loop latch + goto bb1 + bb4: loop exit + (new empty bb) + goto bb3 + bb3: + + This function updates the phi nodes in bb4 and in bb3, to account for the + new edge from bb0 to bb4. */ + +static void +update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop) +{ + tree phi, phi1; + + for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi)) + { + tree new_phi; + tree phi_arg; + + /* Generate new phi node. */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), + loop->exit_edges[0]->dest); + + /* Add argument coming from guard true edge. */ + phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]); + add_phi_arg (&new_phi, phi_arg, guard_true_edge); + + /* Add argument coming from loop exit edge. */ + phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)); + add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]); + + /* Update all phi nodes at the loop exit successor. */ + for (phi1 = phi_nodes (EDGE_SUCC (loop->exit_edges[0]->dest, 0)->dest); + phi1; + phi1 = TREE_CHAIN (phi1)) + { + tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, + EDGE_SUCC (loop->exit_edges[0]->dest, 0)); + if (old_arg == phi_arg) + { + edge e = EDGE_SUCC (loop->exit_edges[0]->dest, 0); + + SET_PHI_ARG_DEF (phi1, + phi_arg_from_edge (phi1, e), + PHI_RESULT (new_phi)); + } + } + } +} + + +/* Make the LOOP iterate NITERS times. This is done by adding a new IV + that starts at zero, increases by one and its limit is NITERS. */ + +static void +make_loop_iterate_ntimes (struct loop *loop, tree niters, + tree begin_label, tree exit_label) +{ + tree indx_before_incr, indx_after_incr, cond_stmt, cond; + tree orig_cond; + edge exit_edge = loop->exit_edges[0]; + block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src); + + /* Flow loop scan does not update loop->single_exit field. */ + loop->single_exit = loop->exit_edges[0]; + orig_cond = get_loop_exit_condition (loop); + gcc_assert (orig_cond); + create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, + &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr); + + /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get + back to the exit condition statement. */ + bsi_next (&loop_exit_bsi); + gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond); + + + if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ + cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters); + else /* 'then' edge loops back. */ + cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters); + + begin_label = build1 (GOTO_EXPR, void_type_node, begin_label); + exit_label = build1 (GOTO_EXPR, void_type_node, exit_label); + cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond, + begin_label, exit_label); + bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT); + + /* Remove old loop exit test: */ + bsi_remove (&loop_exit_bsi); + + if (vect_debug_stats (loop) || vect_debug_details (loop)) + print_generic_expr (dump_file, cond_stmt, TDF_SLIM); +} + + +/* Given LOOP this function generates a new copy of it and puts it + on E which is either the entry or exit of LOOP. */ + +static struct loop * +tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, + edge e) +{ + struct loop *new_loop; + basic_block *new_bbs, *bbs; + bool at_exit; + bool was_imm_dom; + basic_block exit_dest; + tree phi, phi_arg; + + at_exit = (e == loop->exit_edges[0]); + if (!at_exit && e != loop_preheader_edge (loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Edge is not an entry nor an exit edge.\n"); + return NULL; + } + + bbs = get_loop_body (loop); + + /* Check whether duplication is possible. */ + if (!can_copy_bbs_p (bbs, loop->num_nodes)) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "Cannot copy basic blocks.\n"); + free (bbs); + return NULL; + } + + /* Generate new loop structure. */ + new_loop = duplicate_loop (loops, loop, loop->outer); + if (!new_loop) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "The duplicate_loop returns NULL.\n"); + free (bbs); + return NULL; + } + + exit_dest = loop->exit_edges[0]->dest; + was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, + exit_dest) == loop->header ? + true : false); + + new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes); + + copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL); + + /* Duplicating phi args at exit bbs as coming + also from exit of duplicated loop. */ + for (phi = phi_nodes (exit_dest); phi; phi = TREE_CHAIN (phi)) + { + phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]); + if (phi_arg) + { + edge new_loop_exit_edge; + + if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch) + new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1); + else + new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0); + + add_phi_arg (&phi, phi_arg, new_loop_exit_edge); + } + } + + if (at_exit) /* Add the loop copy at exit. */ + { + redirect_edge_and_branch_force (e, new_loop->header); + set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src); + if (was_imm_dom) + set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header); + } + else /* Add the copy at entry. */ + { + edge new_exit_e; + edge entry_e = loop_preheader_edge (loop); + basic_block preheader = entry_e->src; + + if (!flow_bb_inside_loop_p (new_loop, + EDGE_SUCC (new_loop->header, 0)->dest)) + new_exit_e = EDGE_SUCC (new_loop->header, 0); + else + new_exit_e = EDGE_SUCC (new_loop->header, 1); + + redirect_edge_and_branch_force (new_exit_e, loop->header); + set_immediate_dominator (CDI_DOMINATORS, loop->header, + new_exit_e->src); + + /* We have to add phi args to the loop->header here as coming + from new_exit_e edge. */ + for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi)) + { + phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e); + if (phi_arg) + add_phi_arg (&phi, phi_arg, new_exit_e); + } + + redirect_edge_and_branch_force (entry_e, new_loop->header); + set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader); + } + + flow_loop_scan (new_loop, LOOP_ALL); + flow_loop_scan (loop, LOOP_ALL); + free (new_bbs); + free (bbs); + + return new_loop; +} + + +/* Given the condition statement COND, put it as the last statement + of GUARD_BB; EXIT_BB is the basic block to skip the loop; + Assumes that this is the single exit of the guarded loop. + Returns the skip edge. */ + +static edge +add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb) +{ + block_stmt_iterator bsi; + edge new_e, enter_e; + tree cond_stmt, then_label, else_label; + + enter_e = EDGE_SUCC (guard_bb, 0); + enter_e->flags &= ~EDGE_FALLTHRU; + enter_e->flags |= EDGE_FALSE_VALUE; + bsi = bsi_last (guard_bb); + + then_label = build1 (GOTO_EXPR, void_type_node, + tree_block_label (exit_bb)); + else_label = build1 (GOTO_EXPR, void_type_node, + tree_block_label (enter_e->dest)); + cond_stmt = build (COND_EXPR, void_type_node, cond, + then_label, else_label); + bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); + /* Add new edge to connect entry block to the second loop. */ + new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); + set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb); + return new_e; +} + + +/* This function verifies that certain restrictions apply to LOOP. */ + +static bool +verify_loop_for_duplication (struct loop *loop, + bool update_first_loop_count, edge e) +{ + edge exit_e = loop->exit_edges [0]; + edge entry_e = loop_preheader_edge (loop); + + /* We duplicate only innermost loops. */ + if (loop->inner) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "Loop duplication failed. Loop is not innermost.\n"); + return false; + } + + /* Only loops with 1 exit. */ + if (loop->num_exits != 1) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "More than one exit from loop.\n"); + return false; + } + + /* Only loops with 1 entry. */ + if (loop->num_entries != 1) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "More than one exit from loop.\n"); + return false; + } + + /* All loops has outers, the only case loop->outer is NULL is for + the function itself. */ + if (!loop->outer) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "Loop is outer-most loop.\n"); + return false; + } + + /* Verify that new IV can be created and loop condition + can be changed to make first loop iterate first_niters times. */ + if (!update_first_loop_count) + { + tree orig_cond = get_loop_exit_condition (loop); + block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src); + + if (!orig_cond) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "Loop has no exit condition.\n"); + return false; + } + if (orig_cond != bsi_stmt (loop_exit_bsi)) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "Loop exit condition is not loop header last stmt.\n"); + return false; + } + } + + /* Make sure E is either an entry or an exit edge. */ + if (e != exit_e && e != entry_e) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "E is not loop entry or exit edge.\n"); + return false; + } + + return true; +} + + +/* Given LOOP this function duplicates it to the edge E. + + This transformation takes place before the loop is vectorized. + For now, there are two main cases when it's used + by the vectorizer: to support loops with unknown loop bounds + (or loop bounds indivisible by vectorization factor) and to force the + alignment of data references in the loop. In the first case, LOOP is + duplicated to the exit edge, producing epilog loop. In the second case, LOOP + is duplicated to the preheader edge thus generating prolog loop. In both + cases, the original loop will be vectorized after the transformation. + + The edge E is supposed to be either preheader edge of the LOOP or + its exit edge. If preheader edge is specified, the LOOP copy + will precede the original one. Otherwise the copy will be located + at the exit of the LOOP. + + FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate + the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first + loop will be iterated FIRST_NITERS times by introducing additional + induction variable and replacing loop exit condition. If + UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and + the caller to tree_duplicate_loop_to_edge is responsible for updating + the first loop count. + + NITERS (also SSA_NAME) parameter defines the number of iteration the + original loop iterated. The function generates two if-then guards: + one prior to the first loop and the other prior to the second loop. + The first guard will be: + + if (FIRST_NITERS == 0) then skip the first loop + + The second guard will be: + + if (FIRST_NITERS == NITERS) then skip the second loop + + Thus the equivalence to the original code is guaranteed by correct values + of NITERS and FIRST_NITERS and generation of if-then loop guards. + + For now this function supports only loop forms that are candidate for + vectorization. Such types are the following: + + (1) only innermost loops + (2) loops built from 2 basic blocks + (3) loops with one entry and one exit + (4) loops without function calls + (5) loops without defs that are used after the loop + + (1), (3) are checked in this function; (2) - in function + vect_analyze_loop_form; (4) - in function vect_analyze_data_refs; + (5) is checked as part of the function vect_mark_stmts_to_be_vectorized, + when excluding induction/reduction support. + + The function returns NULL in case one of these checks or + transformations failed. */ + +struct loop* +tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops, + edge e, tree first_niters, + tree niters, bool update_first_loop_count) +{ + struct loop *new_loop = NULL, *first_loop, *second_loop; + edge skip_e; + tree pre_condition; + bitmap definitions; + basic_block first_exit_bb, second_exit_bb; + basic_block pre_header_bb; + edge exit_e = loop->exit_edges [0]; + + gcc_assert (!any_marked_for_rewrite_p ()); + + if (!verify_loop_for_duplication (loop, update_first_loop_count, e)) + return NULL; + + /* We have to initialize cfg_hooks. Then, when calling + cfg_hooks->split_edge, the function tree_split_edge + is actually called and, when calling cfg_hooks->duplicate_block, + the function tree_duplicate_bb is called. */ + tree_register_cfg_hooks (); + + /* 1. Generate a copy of LOOP and put it on E (entry or exit). */ + if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e))) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "The tree_duplicate_loop_to_edge_cfg failed.\n"); + return NULL; + } + + definitions = marked_ssa_names (); + allocate_new_names (definitions); + update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); + /* Here, using assumption (5), we do not propagate new names futher + than on phis of the exit from the second loop. */ + rename_variables_in_loop (new_loop); + free_new_names (definitions); + + if (e == exit_e) + { + first_loop = loop; + second_loop = new_loop; + } + else + { + first_loop = new_loop; + second_loop = loop; + } + + /* 2. Generate bb between the loops. */ + first_exit_bb = split_edge (first_loop->exit_edges[0]); + add_bb_to_loop (first_exit_bb, first_loop->outer); + + /* We need to update here first loop exit edge + and second loop preheader edge. */ + flow_loop_scan (first_loop, LOOP_ALL); + flow_loop_scan (second_loop, LOOP_ALL); + + /* 3. Make first loop iterate FIRST_NITERS times, if needed. */ + if (!update_first_loop_count) + { + tree first_loop_latch_lbl = tree_block_label (first_loop->latch); + tree first_loop_exit_lbl = tree_block_label (first_exit_bb); + + make_loop_iterate_ntimes (first_loop, first_niters, + first_loop_latch_lbl, + first_loop_exit_lbl); + } + + /* 4. Add the guard before first loop: + + if FIRST_NITERS == 0 + skip first loop + else + enter first loop */ + + /* 4a. Generate bb before first loop. */ + pre_header_bb = split_edge (loop_preheader_edge (first_loop)); + add_bb_to_loop (pre_header_bb, first_loop->outer); + + /* First loop preheader edge is changed. */ + flow_loop_scan (first_loop, LOOP_ALL); + + /* 4b. Generate guard condition. */ + pre_condition = build (LE_EXPR, boolean_type_node, + first_niters, integer_zero_node); + + /* 4c. Add condition at the end of preheader bb. */ + skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb); + + /* 4d. Updtae phis at first loop exit and propagate changes + to the phis of second loop. */ + update_phi_nodes_for_guard (skip_e, first_loop); + + /* 5. Add the guard before second loop: + + if FIRST_NITERS == NITERS SKIP + skip second loop + else + enter second loop */ + + /* 5a. Generate empty bb at the exit from the second loop. */ + second_exit_bb = split_edge (second_loop->exit_edges[0]); + add_bb_to_loop (second_exit_bb, second_loop->outer); + + /* Second loop preheader edge is changed. */ + flow_loop_scan (second_loop, LOOP_ALL); + + /* 5b. Generate guard condition. */ + pre_condition = build (EQ_EXPR, boolean_type_node, + first_niters, niters); + + /* 5c. Add condition at the end of preheader bb. */ + skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb); + update_phi_nodes_for_guard (skip_e, second_loop); + + BITMAP_XFREE (definitions); + unmark_all_for_rewrite (); + + return new_loop; +} + + + +/* Here the proper Vectorizer starts. */ /* Function new_stmt_vec_info. @@ -274,13 +1107,17 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_LOOP (res) = loop; LOOP_VINFO_BBS (res) = bbs; LOOP_VINFO_EXIT_COND (res) = NULL; - LOOP_VINFO_NITERS (res) = -1; + LOOP_VINFO_NITERS (res) = NULL; LOOP_VINFO_VECTORIZABLE_P (res) = 0; + LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false; LOOP_VINFO_VECT_FACTOR (res) = 0; VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, "loop_write_datarefs"); VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20, "loop_read_datarefs"); + + for (i=0; inum, access_fn, &init_oval, &step, true); + ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, + true); if (!ok) init_oval = integer_zero_node; @@ -1519,8 +2356,6 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info))) - return false; if (!vec_stmt) /* transformation not required. */ { @@ -1613,7 +2448,8 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) software_pipeline_loads_p = true; else if (!targetm.vectorize.misaligned_mem_ok (mode)) { - /* Possibly unaligned access, and can't software pipeline the loads */ + /* Possibly unaligned access, and can't software pipeline the loads. + */ if (vect_debug_details (loop)) fprintf (dump_file, "Arbitrary load not supported."); return false; @@ -1731,7 +2567,8 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } else { - /* Use current address instead of init_addr for reduced reg pressure. */ + /* Use current address instead of init_addr for reduced reg pressure. + */ magic = dataref_ptr; } @@ -1805,12 +2642,151 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) } +/* This function builds ni_name = number of iterations loop executes + on the loop preheader. */ + +static tree +vect_build_loop_niters (loop_vec_info loop_vinfo) +{ + tree ni_name, stmt, var; + edge pe; + basic_block new_bb; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo)); + + var = create_tmp_var (TREE_TYPE (ni), "niters"); + add_referenced_tmp_var (var); + if (TREE_CODE (ni) == INTEGER_CST) + { + /* This case is generated when treating a known loop bound + indivisible by VF. Here we cannot use force_gimple_operand. */ + stmt = build (MODIFY_EXPR, void_type_node, var, ni); + ni_name = make_ssa_name (var, stmt); + TREE_OPERAND (stmt, 0) = ni_name; + } + else + ni_name = force_gimple_operand (ni, &stmt, false, var); + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, stmt); + if (new_bb) + add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father); + + return ni_name; +} + + +/* This function generates the following statements: + + ni_name = number of iterations loop executes + ratio = ni_name / vf + ratio_mult_vf_name = ratio * vf + + and places them at the loop preheader edge. */ + +static void +vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p, + tree *ratio_mult_vf_name_p, tree *ratio_p) +{ + + edge pe; + basic_block new_bb; + tree stmt, ni_name; + tree ratio; + tree ratio_mult_vf_name, ratio_mult_vf; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree ni = LOOP_VINFO_NITERS(loop_vinfo); + + int vf, i; + + /* Generate temporary variable that contains + number of iterations loop executes. */ + + ni_name = vect_build_loop_niters (loop_vinfo); + + /* ratio = ni / vf. + vf is power of 2; then if ratio = = n >> log2 (vf). */ + vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + ratio = vect_build_symbol_bound (ni_name, vf, loop); + + /* Update initial conditions of loop copy. */ + + /* ratio_mult_vf = ratio * vf; + then if ratio_mult_vf = ratio << log2 (vf). */ + + i = exact_log2 (vf); + ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); + add_referenced_tmp_var (ratio_mult_vf); + + ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE); + + stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name, + build2 (LSHIFT_EXPR, TREE_TYPE (ratio), + ratio, build_int_cst (unsigned_type_node, + i))); + + SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt; + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, stmt); + if (new_bb) + add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father); + + *ni_name_p = ni_name; + *ratio_mult_vf_name_p = ratio_mult_vf_name; + *ratio_p = ratio; + + return; +} + + +/* This function generates stmt + + tmp = n / vf; + + and attaches it to preheader of LOOP. */ + +static tree +vect_build_symbol_bound (tree n, int vf, struct loop * loop) +{ + tree var, stmt, var_name; + edge pe; + basic_block new_bb; + int i; + + /* create temporary variable */ + var = create_tmp_var (TREE_TYPE (n), "bnd"); + add_referenced_tmp_var (var); + + var_name = make_ssa_name (var, NULL_TREE); + + /* vf is power of 2; then n/vf = n >> log2 (vf). */ + + i = exact_log2 (vf); + stmt = build2 (MODIFY_EXPR, void_type_node, var_name, + build2 (RSHIFT_EXPR, TREE_TYPE (n), + n, build_int_cst (unsigned_type_node,i))); + + SSA_NAME_DEF_STMT (var_name) = stmt; + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, stmt); + if (new_bb) + add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father); + else + if (vect_debug_details (NULL)) + fprintf (dump_file, "New bb on preheader edge was not generated."); + + return var_name; +} + + /* Function vect_transform_loop_bound. Create a new exit condition for the loop. */ static void -vect_transform_loop_bound (loop_vec_info loop_vinfo) +vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); edge exit_edge = loop->single_exit; @@ -1821,19 +2797,21 @@ vect_transform_loop_bound (loop_vec_info loop_vinfo) int vf; tree cond_stmt; tree new_loop_bound; + bool symbol_niters; tree cond; tree lb_type; - gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)); - old_N = LOOP_VINFO_NITERS (loop_vinfo); + symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo); + + if (!symbol_niters) + old_N = LOOP_VINFO_INT_NITERS (loop_vinfo); + vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - /* FORNOW: - assuming number-of-iterations divides by the vectorization factor. */ - gcc_assert (!(old_N % vf)); - orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo); +#ifdef ENABLE_CHECKING gcc_assert (orig_cond_expr); +#endif gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi)); create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, @@ -1846,12 +2824,19 @@ vect_transform_loop_bound (loop_vec_info loop_vinfo) /* new loop exit test: */ lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1)); - new_loop_bound = build_int_cst (lb_type, old_N/vf); + if (!symbol_niters) + new_loop_bound = fold_convert (lb_type, + build_int_cst (unsigned_type_node, + old_N/vf)); + else + new_loop_bound = niters; if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ - cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound); + cond = build2 (GE_EXPR, boolean_type_node, + indx_after_incr, new_loop_bound); else /* 'then' edge loops back. */ - cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound); + cond = build2 (LT_EXPR, boolean_type_node, + indx_after_incr, new_loop_bound); cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond, TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2)); @@ -1866,6 +2851,346 @@ vect_transform_loop_bound (loop_vec_info loop_vinfo) } +/* Advance IVs of the loop (to be vectorized later) to correct position. + + When loop is vectorized, its IVs are not always advanced + correctly since vectorization changes the loop count. It's ok + in case epilog loop was not produced after original one before + vectorization process (the vectorizer checks that there is no uses + of IVs after the loop). However, in case the epilog loop was peeled, + IVs from original loop are used in epilog loop and should be + advanced correctly. + + Here we use access functions of IVs and number of + iteration loop executes in order to bring IVs to correct position. + + Function also update phis of basic block at the exit + from the loop. */ + +static void +vect_update_ivs_after_vectorizer (struct loop *loop, tree niters) +{ + edge exit = loop->exit_edges[0]; + tree phi; + edge latch = loop_latch_edge (loop); + + /* Generate basic block at the exit from the loop. */ + basic_block new_bb = split_edge (exit); + add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father); + + loop->exit_edges[0] = EDGE_PRED (new_bb, 0); + + for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi)) + { + tree access_fn = NULL; + tree evolution_part; + tree init_expr; + tree step_expr; + tree var, stmt, ni, ni_name; + int i, j, num_elem1, num_elem2; + tree phi1; + block_stmt_iterator last_bsi; + + /* Skip virtual phi's. The data dependences that are associated with + virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ + + if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) + { + if (vect_debug_details (NULL)) + fprintf (dump_file, "virtual phi. skip."); + continue; + } + + access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); + + evolution_part = evolution_part_in_loop_num (access_fn, loop->num); + + /* FORNOW: We do not transform initial conditions of IVs + which evolution functions are a polynomial of degree >= 2 or + exponential. */ + + step_expr = evolution_part; + init_expr = initial_condition (access_fn); + + ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr), + build2 (MULT_EXPR, TREE_TYPE (niters), + niters, step_expr), init_expr); + + var = create_tmp_var (TREE_TYPE (init_expr), "tmp"); + add_referenced_tmp_var (var); + + ni_name = force_gimple_operand (ni, &stmt, false, var); + + /* Insert stmt into new_bb. */ + last_bsi = bsi_last (new_bb); + bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT); + + /* Fix phi expressions in duplicated loop. */ + num_elem1 = PHI_NUM_ARGS (phi); + for (i = 0; i < num_elem1; i++) + if (PHI_ARG_EDGE (phi, i) == latch) + { + tree def = PHI_ARG_DEF (phi, i); + + for (phi1 = phi_nodes (EDGE_SUCC (new_bb, 0)->dest); phi1; + phi1 = TREE_CHAIN (phi1)) + { + num_elem2 = PHI_NUM_ARGS (phi1); + for (j = 0; j < num_elem2; j++) + if (PHI_ARG_DEF (phi1, j) == def) + { + SET_PHI_ARG_DEF (phi1, j, ni_name); + PHI_ARG_EDGE (phi1, j) = EDGE_SUCC (new_bb, 0); + break; + } + } + break; + } + } + +} + + +/* This function is the main driver of tranformation + to be done for loop before vectorizing it in case of + unknown loop bound. */ + +static void +vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio, + struct loops *loops) +{ + + tree ni_name, ratio_mult_vf_name; +#ifdef ENABLE_CHECKING + int loop_num; +#endif + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *new_loop; + + if (vect_debug_details (NULL)) + fprintf (dump_file, "\n<>\n"); + + /* Generate the following variables on the preheader of original loop: + + ni_name = number of iteration the original loop executes + ratio = ni_name / vf + ratio_mult_vf_name = ratio * vf */ + vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, + &ratio_mult_vf_name, ratio); + + /* Update loop info. */ + loop->pre_header = loop_preheader_edge (loop)->src; + loop->pre_header_edges[0] = loop_preheader_edge (loop); + +#ifdef ENABLE_CHECKING + loop_num = loop->num; +#endif + new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0], + ratio_mult_vf_name, ni_name, true); +#ifdef ENABLE_CHECKING + gcc_assert (new_loop); + gcc_assert (loop_num == loop->num); +#endif + + /* Update IVs of original loop as if they were advanced + by ratio_mult_vf_name steps. */ + +#ifdef ENABLE_CHECKING + /* Check existence of intermediate bb. */ + gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header); +#endif + vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name); + + return; + +} + + +/* Function vect_gen_niters_for_prolog_loop + + Set the number of iterations for the loop represented by LOOP_VINFO + to the minimum between NITERS (the original iteration count of the loop) + and the misalignment DR - the first data reference in the list + LOOP_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of this + loop, the data reference DR will refer to an aligned location. */ + +static tree +vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters) +{ + struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0); + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree var, stmt; + tree iters, iters_name; + edge pe; + basic_block new_bb; + tree dr_stmt = DR_STMT (dr); + stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); + tree start_addr, byte_miss_align, elem_miss_align; + int vec_type_align = + GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info))) + / BITS_PER_UNIT; + tree tmp1, tmp2; + tree new_stmt_list = NULL_TREE; + + start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, + &new_stmt_list, NULL_TREE); + + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); + if (new_bb) + add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father); + + byte_miss_align = + build (BIT_AND_EXPR, integer_type_node, start_addr, + build (MINUS_EXPR, integer_type_node, + build_int_cst (unsigned_type_node, + vec_type_align), integer_one_node)); + tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf); + elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node, + byte_miss_align, tmp1); + + tmp2 = + build (BIT_AND_EXPR, integer_type_node, + build (MINUS_EXPR, integer_type_node, + build_int_cst (unsigned_type_node, vf), elem_miss_align), + build (MINUS_EXPR, integer_type_node, + build_int_cst (unsigned_type_node, vf), integer_one_node)); + + iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters); + var = create_tmp_var (TREE_TYPE (iters), "iters"); + add_referenced_tmp_var (var); + iters_name = force_gimple_operand (iters, &stmt, false, var); + + /* Insert stmt on loop preheader edge. */ + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, stmt); + if (new_bb) + add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father); + + return iters_name; +} + + +/* Function vect_update_niters_after_peeling + + NITERS iterations were peeled from the loop represented by LOOP_VINFO. + The new number of iterations is therefore original_niters - NITERS. + Record the new number of iterations in LOOP_VINFO. */ + +static void +vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters) +{ + tree n_iters = LOOP_VINFO_NITERS (loop_vinfo); + LOOP_VINFO_NITERS (loop_vinfo) = + build (MINUS_EXPR, integer_type_node, n_iters, niters); +} + + +/* Function vect_update_inits_of_dr + + NITERS iterations were peeled from LOOP. DR represents a data reference + in LOOP. This function updates the information recorded in DR to + account for the fact that the first NITERS iterations had already been + executed. Specifically, it updates the initial_condition of the + access_function of DR. */ + +static void +vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop, + tree niters) +{ + tree access_fn = DR_ACCESS_FN (dr, 0); + tree init, init_new, step; + + step = evolution_part_in_loop_num (access_fn, loop->num); + init = initial_condition (access_fn); + + init_new = build (PLUS_EXPR, TREE_TYPE (init), + build (MULT_EXPR, TREE_TYPE (niters), + niters, step), init); + DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new); + + return; +} + + +/* Function vect_update_inits_of_drs + + NITERS iterations were peeled from the loop represented by LOOP_VINFO. + This function updates the information recorded for the data references in + the loop to account for the fact that the first NITERS iterations had + already been executed. Specifically, it updates the initial_condition of the + access_function of all the data_references in the loop. */ + +static void +vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) +{ + unsigned int i; + varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); + varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n<>\n"); + + for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) + { + struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); + vect_update_inits_of_dr (dr, loop, niters); + } + + for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) + { + struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); + vect_update_inits_of_dr (dr, loop, niters); + DR_MISALIGNMENT (dr) = -1; + } +} + + +/* Function vect_do_peeling_for_alignment + + Peel the first 'niters' iterations of the loop represented by LOOP_VINFO. + 'niters' is set to the misalignment of one of the data references in the + loop, thereby forcing it to refer to an aligned location at the beginning + of the execution of this loop. The data reference for which we are + peeling is chosen from LOOP_UNALIGNED_DR. */ + +static void +vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree niters_of_prolog_loop, ni_name; + struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0); + + if (vect_debug_details (NULL)) + fprintf (dump_file, "\n<>\n"); + + ni_name = vect_build_loop_niters (loop_vinfo); + niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); + + + /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ + tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop), + niters_of_prolog_loop, ni_name, false); + + + /* Update stmt info of dr according to which we peeled. */ + DR_MISALIGNMENT (dr) = 0; + + /* Update number of times loop executes. */ + vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop); + + /* Update all inits of access functions of all data refs. */ + vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); + + /* After peeling we have to reset scalar evolution analyzer. */ + scev_reset (); + + return; +} + + /* Function vect_transform_loop. The analysis phase has determined that the loop is vectorizable. @@ -1881,6 +3206,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, int nbbs = loop->num_nodes; block_stmt_iterator si; int i; + tree ratio = NULL; #ifdef ENABLE_CHECKING int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); #endif @@ -1888,6 +3214,37 @@ vect_transform_loop (loop_vec_info loop_vinfo, if (vect_debug_details (NULL)) fprintf (dump_file, "\n<>\n"); + + /* Peel the loop if there are data refs with unknown alignment. + Only one data ref with unknown store is allowed. */ + + + if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo)) + vect_do_peeling_for_alignment (loop_vinfo, loops); + + /* If the loop has a symbolic number of iterations 'n' + (i.e. it's not a compile time constant), + then an epilog loop needs to be created. We therefore duplicate + the initial loop. The original loop will be vectorized, and will compute + the first (n/VF) iterations. The second copy of the loop will remain + serial and will compute the remaining (n%VF) iterations. + (VF is the vectorization factor). */ + + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops); + + /* FORNOW: we'll treat the case where niters is constant and + + niters % vf != 0 + + in the way similar to one with symbolic niters. + For this we'll generate variable which value is equal to niters. */ + + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)) + vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops); + + /* 1) Make sure the loop header has exactly two entries 2) Make sure we have a preheader basic block. */ @@ -1948,7 +3305,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, } /* stmts in BB */ } /* BBs in loop */ - vect_transform_loop_bound (loop_vinfo); + vect_transform_loop_bound (loop_vinfo, ratio); if (vect_debug_details (loop)) fprintf (dump_file,"Success! loop vectorized."); @@ -2174,30 +3531,27 @@ vect_analyze_operations (loop_vec_info loop_vinfo) } LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; - /* FORNOW: handle only cases where the loop bound divides by the - vectorization factor. */ - - if (vect_debug_details (NULL)) + + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && vect_debug_details (NULL)) fprintf (dump_file, "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, - vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo)); + vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo)); - if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: Unknown loop bound."); - return false; + /* In this case we have to generate epilog loop, that + can be done only for loops with one entry edge. */ + if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1 + || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header)) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, "not vectorized: more than one entry."); + return false; + } } - - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.", - vectorization_factor); - return false; - } - + return true; } @@ -2272,7 +3626,7 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, return false; step_expr = evolution_part; - init_expr = initial_condition (access_fn); + init_expr = unshare_expr (initial_condition (access_fn)); if (vect_debug_details (NULL)) { @@ -2832,11 +4186,12 @@ vect_compute_array_ref_alignment (struct data_reference *dr, tree nbits; if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE) - /* The reference is an array without its last index. */ - next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, &misalign); + /* The reference is an array without its last index. */ + next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, + &misalign); else - next_ref = - vect_compute_array_base_alignment (oprnd0, vectype, &dims, &misalign); + next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims, + &misalign); if (!vectype) /* Alignment is not requested. Just return the base. */ return next_ref; @@ -3037,9 +4392,11 @@ static bool vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) { varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/ unsigned int i; + unsigned int decide_peeling_count = 0; if (vect_debug_details (NULL)) fprintf (dump_file, "\n<>\n"); @@ -3062,19 +4419,33 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) /* Finally, check that loop can be vectorized. - FOR NOW: Until support for misaligned accesses is in place, only if all - accesses are aligned can the loop be vectorized. This restriction will be - relaxed. */ + FOR NOW: Until support for misaligned stores is in place, only if all + stores are aligned can the loop be vectorized. This restriction will be + relaxed. In the meantime, we can force the alignment of on of the + data-references in the loop using peeling. We currently use a heuristic + that peels the first misaligned store, but we plan to develop a + better cost model to guide the decision on which data-access to peel for. + */ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) { struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); if (!aligned_access_p (dr)) { - if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) - || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) - fprintf (dump_file, "not vectorized: unaligned store."); - return false; + /* Decide here whether we need peeling for alignment. */ + decide_peeling_count++; + if (decide_peeling_count > MAX_NUMBER_OF_UNALIGNED_DATA_REFS) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "not vectorized: multiple misaligned stores."); + return false; + } + else + { + LOOP_UNALIGNED_DR (loop_vinfo, decide_peeling_count - 1) = dr; + LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true; + } } } @@ -3125,12 +4496,12 @@ vect_analyze_data_ref_access (struct data_reference *dr) if (evolution_part_in_loop_num (access_fn, loop_containing_stmt (DR_STMT (dr))->num)) { - /* Evolution part is not NULL in this loop (it is neither constant nor - invariant). */ + /* Evolution part is not NULL in this loop (it is neither constant + nor invariant). */ if (vect_debug_details (NULL)) { fprintf (dump_file, - "not vectorized: complicated multidimensional array access."); + "not vectorized: complicated multidim. array access."); print_generic_expr (dump_file, access_fn, TDF_SLIM); } return false; @@ -3144,7 +4515,7 @@ vect_analyze_data_ref_access (struct data_reference *dr) { if (vect_debug_details (NULL)) { - fprintf (dump_file, "not vectorized: too complicated access function."); + fprintf (dump_file, "not vectorized: complicated access function."); print_generic_expr (dump_file, access_fn, TDF_SLIM); } return false; @@ -3521,7 +4892,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo) /* Analyze MEMREF. If it is of a supported form, build data_reference struct for it (DR) and find the relevant symbol for aliasing purposes. */ - symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, &dr); + symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, + &dr); if (!symbl) { if (vect_debug_stats (loop) || vect_debug_details (loop)) @@ -3563,7 +4935,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo) switch (TREE_CODE (address_base)) { case ARRAY_REF: - dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), DR_IS_READ(dr)); + dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), + DR_IS_READ(dr)); STMT_VINFO_MEMTAG (stmt_info) = vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE, loop_vinfo, &offset, @@ -3577,7 +4950,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo) default: if (vect_debug_stats (loop) || vect_debug_details (loop)) { - fprintf (dump_file, "not vectorized: unhandled address expression: "); + fprintf (dump_file, + "not vectorized: unhandled address expr: "); print_generic_expr (dump_file, stmt, TDF_SLIM); } return false; @@ -3851,12 +5225,109 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) } +/* Function vect_analyze_loop_with_symbolic_num_of_iters. + + In case the number of iterations that LOOP iterates in unknown at compile + time, an epilog loop will be generated, and the loop induction variables + (IVs) will be "advanced" to the value they are supposed to take just before + the epilog loop. Here we check that the access function of the loop IVs + and the expression that represents the loop bound are simple enough. + These restrictions will be relaxed in the future. */ + +static bool +vect_analyze_loop_with_symbolic_num_of_iters (tree niters, + struct loop *loop) +{ + basic_block bb = loop->header; + tree phi; + + if (vect_debug_details (NULL)) + fprintf (dump_file, + "\n<>\n"); + + if (chrec_contains_undetermined (niters)) + { + if (vect_debug_details (NULL)) + fprintf (dump_file, "Infinite number of iterations."); + return false; + } + + if (!niters) + { + if (vect_debug_details (NULL)) + fprintf (dump_file, "niters is NULL pointer."); + return false; + } + + if (vect_debug_details (NULL)) + { + fprintf (dump_file, "Symbolic number of iterations is "); + print_generic_expr (dump_file, niters, TDF_DETAILS); + } + + /* Analyze phi functions of the loop header. */ + + for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) + { + tree access_fn = NULL; + tree evolution_part; + + if (vect_debug_details (NULL)) + { + fprintf (dump_file, "Analyze phi: "); + print_generic_expr (dump_file, phi, TDF_SLIM); + } + + /* Skip virtual phi's. The data dependences that are associated with + virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ + + if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) + { + if (vect_debug_details (NULL)) + fprintf (dump_file, "virtual phi. skip."); + continue; + } + + /* Analyze the evolution function. */ + + access_fn = instantiate_parameters + (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); + + if (!access_fn) + { + if (vect_debug_details (NULL)) + fprintf (dump_file, "No Access function."); + return false; + } + + if (vect_debug_details (NULL)) + { + fprintf (dump_file, "Access function of PHI: "); + print_generic_expr (dump_file, access_fn, TDF_SLIM); + } + + evolution_part = evolution_part_in_loop_num (access_fn, loop->num); + + if (evolution_part == NULL_TREE) + return false; + + /* FORNOW: We do not transform initial conditions of IVs + which evolution functions are a polynomial of degree >= 2. */ + + if (tree_is_chrec (evolution_part)) + return false; + } + + return true; +} + + /* Function vect_get_loop_niters. Determine how many iterations the loop is executed. */ static tree -vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations) +vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) { tree niters; @@ -3866,14 +5337,15 @@ vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations) niters = number_of_iterations_in_loop (loop); if (niters != NULL_TREE - && niters != chrec_dont_know - && host_integerp (niters,0)) + && niters != chrec_dont_know) { - *number_of_iterations = TREE_INT_CST_LOW (niters); + *number_of_iterations = niters; if (vect_debug_details (NULL)) - fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC, - *number_of_iterations); + { + fprintf (dump_file, "==> get_loop_niters:" ); + print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM); + } } return get_loop_exit_condition (loop); @@ -3895,7 +5367,7 @@ vect_analyze_loop_form (struct loop *loop) { loop_vec_info loop_vinfo; tree loop_cond; - HOST_WIDE_INT number_of_iterations = -1; + tree number_of_iterations = NULL; if (vect_debug_details (loop)) fprintf (dump_file, "\n<>\n"); @@ -3943,24 +5415,52 @@ vect_analyze_loop_form (struct loop *loop) fprintf (dump_file, "not vectorized: complicated exit condition."); return NULL; } - - if (number_of_iterations < 0) + + if (!number_of_iterations) { if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: unknown loop bound."); + fprintf (dump_file, + "not vectorized: number of iterations cannot be computed."); return NULL; } - if (number_of_iterations == 0) /* CHECKME: can this happen? */ + loop_vinfo = new_loop_vec_info (loop); + LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, "loop bound unknown."); + + /* Unknown loop bound. */ + if (!vect_analyze_loop_with_symbolic_num_of_iters + (number_of_iterations, loop)) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "not vectorized: can't determine loop bound."); + return NULL; + } + else + { + /* We need only one loop entry for unknown loop bound support. */ + if (loop->num_entries != 1 || !loop->pre_header) + { + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, + "not vectorized: more than one loop entry."); + return NULL; + } + } + } + else + if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0) { if (vect_debug_stats (loop) || vect_debug_details (loop)) fprintf (dump_file, "not vectorized: number of iterations = 0."); return NULL; } - loop_vinfo = new_loop_vec_info (loop); LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; - LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; return loop_vinfo; } @@ -4170,5 +5670,6 @@ vectorize_loops (struct loops *loops) Information in virtual phi nodes is sufficient for it. */ rewrite_into_loop_closed_ssa (); } + rewrite_into_loop_closed_ssa (); bitmap_clear (vars_to_rename); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 37c57067914..8ec9576544a 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -121,6 +121,7 @@ vinfo_for_stmt (tree stmt) /* The misalignment of the memory access in bytes. */ #define DR_MISALIGNMENT(DR) (DR)->aux +#define MAX_NUMBER_OF_UNALIGNED_DATA_REFS 1 static inline bool aligned_access_p (struct data_reference *data_ref_info) @@ -152,8 +153,8 @@ typedef struct _loop_vec_info { /* The loop exit_condition. */ tree exit_cond; - /* Number of iterations. -1 if unknown. */ - HOST_WIDE_INT num_iters; + /* Number of iterations. */ + tree num_iters; /* Is the loop vectorizable? */ bool vectorizable; @@ -161,6 +162,13 @@ typedef struct _loop_vec_info { /* Unrolling factor */ int vectorization_factor; + /* Unknown DRs according to which loop was peeled. */ + struct data_reference *unaligned_drs [MAX_NUMBER_OF_UNALIGNED_DATA_REFS]; + + /* If true, loop is peeled. + unaligned_drs show in this case DRs used for peeling. */ + bool do_peeling_for_alignment; + /* All data references in the loop that are being written to. */ varray_type data_ref_writes; @@ -177,8 +185,14 @@ typedef struct _loop_vec_info { #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor #define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes #define LOOP_VINFO_DATAREF_READS(L) (L)->data_ref_reads +#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) +#define LOOP_DO_PEELING_FOR_ALIGNMENT(L) (L)->do_peeling_for_alignment +#define LOOP_UNALIGNED_DR(L, I) (L)->unaligned_drs[(I)] + -#define LOOP_VINFO_NITERS_KNOWN_P(L) ((L)->num_iters > 0) +#define LOOP_VINFO_NITERS_KNOWN_P(L) \ +(host_integerp ((L)->num_iters,0) \ +&& TREE_INT_CST_LOW ((L)->num_iters) > 0) /*-----------------------------------------------------------------*/ /* Function prototypes. */