From 4f9a2b4e884acb7db0e9acb3c13b0adfbce65725 Mon Sep 17 00:00:00 2001 From: Alan Lawrence Date: Thu, 2 Jul 2015 11:47:31 +0000 Subject: [PATCH] tree-pass.h (make_pass_ch_vect): New. gcc/: * tree-pass.h (make_pass_ch_vect): New. * passes.def: Add pass_ch_vect just before pass_if_conversion. * tree-ssa-loop-ch.c (ch_base, pass_ch_vect, pass_data_ch_vect, pass_ch::process_loop_p, pass_ch_vect::process_loop_p, make_pass_ch_vect): New. (pass_ch): Extend ch_base. (pass_ch::execute): Move all but loop_optimizer_init/finalize to... (ch_base::copy_headers): ...here. gcc/testsuite/: * gcc.dg/vect/vect-strided-a-u16-i4.c (main1): Narrow scope of x,y,z,w. * gcc.dg/vect/vect-ifcvt-11.c: New testcase. From-SVN: r225311 --- gcc/ChangeLog | 13 ++ gcc/passes.def | 1 + gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.dg/vect/vect-ifcvt-11.c | 36 ++++ .../gcc.dg/vect/vect-strided-a-u16-i4.c | 2 +- gcc/tree-pass.h | 1 + gcc/tree-ssa-loop-ch.c | 154 ++++++++++++++++-- 7 files changed, 196 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-ifcvt-11.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 07e091bb20a..3b15e2e2479 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2015-07-02 Alan Lawrence + + * tree-pass.h (make_pass_ch_vect): New. + * passes.def: Add pass_ch_vect just before pass_if_conversion. + + * tree-ssa-loop-ch.c (ch_base, pass_ch_vect, pass_data_ch_vect, + pass_ch::process_loop_p, pass_ch_vect::process_loop_p, + make_pass_ch_vect): New. + (pass_ch): Extend ch_base. + + (pass_ch::execute): Move all but loop_optimizer_init/finalize to... + (ch_base::copy_headers): ...here. + 2015-07-02 Richard Biener * builtins.c (get_pointer_alignment_1): Handle POINTER_PLUS_EXPR. diff --git a/gcc/passes.def b/gcc/passes.def index 9ced6558000..0d8356b9bda 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -247,6 +247,7 @@ along with GCC; see the file COPYING3. If not see PUSH_INSERT_PASSES_WITHIN (pass_parallelize_loops) NEXT_PASS (pass_expand_omp_ssa); POP_INSERT_PASSES () + NEXT_PASS (pass_ch_vect); NEXT_PASS (pass_if_conversion); /* pass_vectorize must immediately follow pass_if_conversion. Please do not add any other passes in between. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index de7c440a81c..e7b749b1cf8 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-07-02 Alan Lawrence + + * gcc.dg/vect/vect-strided-a-u16-i4.c (main1): Narrow scope of x,y,z,w. + * gcc.dg/vect/vect-ifcvt-11.c: New testcase. + 2015-07-02 Richard Biener PR testsuite/66719 diff --git a/gcc/testsuite/gcc.dg/vect/vect-ifcvt-11.c b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-11.c new file mode 100644 index 00000000000..7e323693087 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-11.c @@ -0,0 +1,36 @@ +/* { dg-require-effective-target vect_condition } */ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#define N 16 + +extern void abort (void); + +int A[N] = {36, 39, 42, 45, 43, 32, 21, 12, 23, 34, 45, 56, 67, 78, 81, 11}; +int B[N] = {144,195,210,225,172,128,105,60, 92, 136,225,280,268,390,324,55}; + +__attribute__((noinline)) +void foo () +{ + for (int i = 0; i < N; i++) + { + int m = (A[i] & i) ? 5 : 4; + A[i] = A[i] * m; + } +} + +int main () +{ + + check_vect (); + foo (); + /* check results: */ + for (int i = 0; i < N; i++) + if (A[i] != B[i]) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c index af33ed4abcd..0be68b31198 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c @@ -21,7 +21,6 @@ main1 () s *ptr = arr; s res[N]; int i; - unsigned short x, y, z, w; for (i = 0; i < N; i++) { @@ -35,6 +34,7 @@ main1 () for (i = 0; i < N; i++) { + unsigned short x, y, z, w; x = ptr->b - ptr->a; y = ptr->d - ptr->c; res[i].c = x + y; diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 454555653db..2808dad2d7d 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -381,6 +381,7 @@ extern gimple_opt_pass *make_pass_loop_prefetch (gcc::context *ctxt); extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_ch_vect (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_phi_only_cprop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt); diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c index 98534ba8f6c..121e3d80bd3 100644 --- a/gcc/tree-ssa-loop-ch.c +++ b/gcc/tree-ssa-loop-ch.c @@ -135,12 +135,23 @@ do_while_loop_p (struct loop *loop) return true; } -/* For all loops, copy the condition at the end of the loop body in front - of the loop. This is beneficial since it increases efficiency of - code motion optimizations. It also saves one jump on entry to the loop. */ - namespace { +/* Common superclass for both header-copying phases. */ +class ch_base : public gimple_opt_pass +{ + protected: + ch_base (pass_data data, gcc::context *ctxt) + : gimple_opt_pass (data, ctxt) + {} + + /* Copies headers of all loops in FUN for which process_loop_p is true. */ + unsigned int copy_headers (function *fun); + + /* Return true to copy headers of LOOP or false to skip. */ + virtual bool process_loop_p (struct loop *loop) = 0; +}; + const pass_data pass_data_ch = { GIMPLE_PASS, /* type */ @@ -154,21 +165,68 @@ const pass_data pass_data_ch = 0, /* todo_flags_finish */ }; -class pass_ch : public gimple_opt_pass +class pass_ch : public ch_base { public: pass_ch (gcc::context *ctxt) - : gimple_opt_pass (pass_data_ch, ctxt) + : ch_base (pass_data_ch, ctxt) {} /* opt_pass methods: */ virtual bool gate (function *) { return flag_tree_ch != 0; } + + /* Initialize and finalize loop structures, copying headers inbetween. */ virtual unsigned int execute (function *); +protected: + /* ch_base method: */ + virtual bool process_loop_p (struct loop *loop); }; // class pass_ch +const pass_data pass_data_ch_vect = +{ + GIMPLE_PASS, /* type */ + "ch_vect", /* name */ + OPTGROUP_LOOP, /* optinfo_flags */ + TV_TREE_CH, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +/* This is a more aggressive version of the same pass, designed to run just + before if-conversion and vectorization, to put more loops into the form + required for those phases. */ +class pass_ch_vect : public ch_base +{ +public: + pass_ch_vect (gcc::context *ctxt) + : ch_base (pass_data_ch_vect, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *fun) + { + return flag_tree_ch != 0 + && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops); + } + + /* Just copy headers, no initialization/finalization of loop structures. */ + virtual unsigned int execute (function *); + +protected: + /* ch_base method: */ + virtual bool process_loop_p (struct loop *loop); +}; // class pass_ch_vect + +/* For all loops, copy the condition at the end of the loop body in front + of the loop. This is beneficial since it increases efficiency of + code motion optimizations. It also saves one jump on entry to the loop. */ + unsigned int -pass_ch::execute (function *fun) +ch_base::copy_headers (function *fun) { struct loop *loop; basic_block header; @@ -178,13 +236,8 @@ pass_ch::execute (function *fun) unsigned bbs_size; bool changed = false; - loop_optimizer_init (LOOPS_HAVE_PREHEADERS - | LOOPS_HAVE_SIMPLE_LATCHES); if (number_of_loops (fun) <= 1) - { - loop_optimizer_finalize (); return 0; - } bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); @@ -201,7 +254,7 @@ pass_ch::execute (function *fun) written as such, or because jump threading transformed it into one), we might be in fact peeling the first iteration of the loop. This in general is not a good idea. */ - if (do_while_loop_p (loop)) + if (!process_loop_p (loop)) continue; /* Iterate the header copying up to limit; this takes care of the cases @@ -288,16 +341,87 @@ pass_ch::execute (function *fun) changed = true; } - update_ssa (TODO_update_ssa); + if (changed) + update_ssa (TODO_update_ssa); free (bbs); free (copied_bbs); - loop_optimizer_finalize (); return changed ? TODO_cleanup_cfg : 0; } +/* Initialize the loop structures we need, and finalize after. */ + +unsigned int +pass_ch::execute (function *fun) +{ + loop_optimizer_init (LOOPS_HAVE_PREHEADERS + | LOOPS_HAVE_SIMPLE_LATCHES); + + unsigned int res = copy_headers (fun); + + loop_optimizer_finalize (); + return res; +} + +/* Assume an earlier phase has already initialized all the loop structures that + we need here (and perhaps others too), and that these will be finalized by + a later phase. */ + +unsigned int +pass_ch_vect::execute (function *fun) +{ + return copy_headers (fun); +} + +/* Apply header copying according to a very simple test of do-while shape. */ + +bool +pass_ch::process_loop_p (struct loop *loop) +{ + return !do_while_loop_p (loop); +} + +/* Apply header-copying to loops where we might enable vectorization. */ + +bool +pass_ch_vect::process_loop_p (struct loop *loop) +{ + if (!flag_tree_vectorize && !loop->force_vectorize) + return false; + + if (loop->dont_vectorize) + return false; + + if (!do_while_loop_p (loop)) + return true; + + /* The vectorizer won't handle anything with multiple exits, so skip. */ + edge exit = single_exit (loop); + if (!exit) + return false; + + /* Copy headers iff there looks to be code in the loop after the exit block, + i.e. the exit block has an edge to another block (besides the latch, + which should be empty). */ + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, exit->src->succs) + if (!loop_exit_edge_p (loop, e) + && e->dest != loop->header + && e->dest != loop->latch) + return true; + + return false; +} + } // anon namespace +gimple_opt_pass * +make_pass_ch_vect (gcc::context *ctxt) +{ + return new pass_ch_vect (ctxt); +} + gimple_opt_pass * make_pass_ch (gcc::context *ctxt) {