tree-ssa-loop-ivcanon.c (tree_estimate_loop_size): Add edge_to_cancel parameter and use it to estimate code optimized out in the...
* tree-ssa-loop-ivcanon.c (tree_estimate_loop_size): Add edge_to_cancel parameter and use it to estimate code optimized out in the final iteration. (loop_edge_to_cancel): New function. (try_unroll_loop_completely): New IRRED_IVALIDATED parameter; handle unrolling loops with bounds given via max_loop_iteratins; handle unrolling non-inner loops when code size shrinks; tidy dump output; when the last iteration loop still stays as loop in the CFG forcongly redirect the latch to __builtin_unreachable. (canonicalize_loop_induction_variables): Add irred_invlaidated parameter; record niter bound derrived; dump max_loop_iterations bounds; call try_unroll_loop_completely even if no niter bound is given. (canonicalize_induction_variables): Handle irred_invalidated. (tree_unroll_loops_completely): Handle non-innermost loops; handle irred_invalidated. * cfgloop.h (unlop): Declare. * cfgloopmanip.c (unloop): Export. * tree.c (build_common_builtin_nodes): Build BULTIN_UNREACHABLE. * gcc.target/i386/l_fma_float_?.c: Update. * gcc.target/i386/l_fma_double_?.c: Update. * gfortran.dg/do_1.f90: XFAIL * gcc.dg/tree-ssa/cunroll-1.c: New testcase. * gcc.dg/tree-ssa/cunroll-2.c: New testcase. * gcc.dg/tree-ssa/cunroll-3.c: New testcase. * gcc.dg/tree-ssa/cunroll-4.c: New testcase. * gcc.dg/tree-ssa/cunroll-5.c: New testcase. * gcc.dg/tree-ssa/ldist-17.c: Block cunroll to make testcase still valid. From-SVN: r192538
This commit is contained in:
parent
90922d3638
commit
b7442c2fe0
@ -1,3 +1,25 @@
|
||||
2012-10-17 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* tree-ssa-loop-ivcanon.c (tree_estimate_loop_size): Add edge_to_cancel
|
||||
parameter and use it to estimate code optimized out in the final iteration.
|
||||
(loop_edge_to_cancel): New function.
|
||||
(try_unroll_loop_completely): New IRRED_IVALIDATED parameter;
|
||||
handle unrolling loops with bounds given via max_loop_iteratins;
|
||||
handle unrolling non-inner loops when code size shrinks;
|
||||
tidy dump output; when the last iteration loop still stays
|
||||
as loop in the CFG forcongly redirect the latch to
|
||||
__builtin_unreachable.
|
||||
(canonicalize_loop_induction_variables): Add irred_invlaidated
|
||||
parameter; record niter bound derrived; dump
|
||||
max_loop_iterations bounds; call try_unroll_loop_completely
|
||||
even if no niter bound is given.
|
||||
(canonicalize_induction_variables): Handle irred_invalidated.
|
||||
(tree_unroll_loops_completely): Handle non-innermost loops;
|
||||
handle irred_invalidated.
|
||||
* cfgloop.h (unlop): Declare.
|
||||
* cfgloopmanip.c (unloop): Export.
|
||||
* tree.c (build_common_builtin_nodes): Build BULTIN_UNREACHABLE.
|
||||
|
||||
2012-10-17 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* opth-gen.awk (TARGET_* generation): Always generate TARGET_<xxx>
|
||||
|
@ -320,7 +320,8 @@ extern struct loop *loopify (edge, edge,
|
||||
struct loop * loop_version (struct loop *, void *,
|
||||
basic_block *, unsigned, unsigned, unsigned, bool);
|
||||
extern bool remove_path (edge);
|
||||
void scale_loop_frequencies (struct loop *, int, int);
|
||||
extern void unloop (struct loop *, bool *);
|
||||
extern void scale_loop_frequencies (struct loop *, int, int);
|
||||
|
||||
/* Induction variable analysis. */
|
||||
|
||||
|
@ -37,7 +37,6 @@ static int find_path (edge, basic_block **);
|
||||
static void fix_loop_placements (struct loop *, bool *);
|
||||
static bool fix_bb_placement (basic_block);
|
||||
static void fix_bb_placements (basic_block, bool *);
|
||||
static void unloop (struct loop *, bool *);
|
||||
|
||||
/* Checks whether basic block BB is dominated by DATA. */
|
||||
static bool
|
||||
@ -895,7 +894,7 @@ loopify (edge latch_edge, edge header_edge,
|
||||
If this may cause the information about irreducible regions to become
|
||||
invalid, IRRED_INVALIDATED is set to true. */
|
||||
|
||||
static void
|
||||
void
|
||||
unloop (struct loop *loop, bool *irred_invalidated)
|
||||
{
|
||||
basic_block *body;
|
||||
|
@ -1,3 +1,16 @@
|
||||
2012-10-16 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* gcc.target/i386/l_fma_float_?.c: Update.
|
||||
* gcc.target/i386/l_fma_double_?.c: Update.
|
||||
* gfortran.dg/do_1.f90: XFAIL
|
||||
* gcc.dg/tree-ssa/cunroll-1.c: New testcase.
|
||||
* gcc.dg/tree-ssa/cunroll-2.c: New testcase.
|
||||
* gcc.dg/tree-ssa/cunroll-3.c: New testcase.
|
||||
* gcc.dg/tree-ssa/cunroll-4.c: New testcase.
|
||||
* gcc.dg/tree-ssa/cunroll-5.c: New testcase.
|
||||
* gcc.dg/tree-ssa/ldist-17.c: Block cunroll to make testcase still
|
||||
valid.
|
||||
|
||||
2012-10-16 Manuel López-Ibáñez <manu@gcc.gnu.org>
|
||||
|
||||
PR c/53063
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details -fdisable-tree-cunroll -fdisable-tree-cunrolli" } */
|
||||
|
||||
typedef int mad_fixed_t;
|
||||
struct mad_pcm
|
||||
|
@ -16,11 +16,11 @@
|
||||
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
|
||||
|
@ -16,11 +16,11 @@
|
||||
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213sd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
|
||||
|
@ -16,11 +16,11 @@
|
||||
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
|
||||
|
@ -16,11 +16,11 @@
|
||||
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213ss" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
|
||||
|
@ -1,4 +1,5 @@
|
||||
! { dg-do run }
|
||||
! { dg-do run { xfail *-*-* } }
|
||||
! XFAIL is tracked in PR 54932
|
||||
! Program to check corner cases for DO statements.
|
||||
program do_1
|
||||
implicit none
|
||||
|
@ -192,7 +192,7 @@ constant_after_peeling (tree op, gimple stmt, struct loop *loop)
|
||||
Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. */
|
||||
|
||||
static void
|
||||
tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
|
||||
tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size)
|
||||
{
|
||||
basic_block *body = get_loop_body (loop);
|
||||
gimple_stmt_iterator gsi;
|
||||
@ -208,8 +208,8 @@ tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
|
||||
fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
if (exit && body[i] != exit->src
|
||||
&& dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
|
||||
if (edge_to_cancel && body[i] != edge_to_cancel->src
|
||||
&& dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
|
||||
after_exit = true;
|
||||
else
|
||||
after_exit = false;
|
||||
@ -231,7 +231,7 @@ tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
|
||||
/* Look for reasons why we might optimize this stmt away. */
|
||||
|
||||
/* Exit conditional. */
|
||||
if (body[i] == exit->src && stmt == last_stmt (exit->src))
|
||||
if (exit && body[i] == exit->src && stmt == last_stmt (exit->src))
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, " Exit condition will be eliminated.\n");
|
||||
@ -314,36 +314,161 @@ estimated_unrolled_size (struct loop_size *size,
|
||||
return unr_insns;
|
||||
}
|
||||
|
||||
/* Loop LOOP is known to not loop. See if there is an edge in the loop
|
||||
body that can be remove to make the loop to always exit and at
|
||||
the same time it does not make any code potentially executed
|
||||
during the last iteration dead.
|
||||
|
||||
After complette unrolling we still may get rid of the conditional
|
||||
on the exit in the last copy even if we have no idea what it does.
|
||||
This is quite common case for loops of form
|
||||
|
||||
int a[5];
|
||||
for (i=0;i<b;i++)
|
||||
a[i]=0;
|
||||
|
||||
Here we prove the loop to iterate 5 times but we do not know
|
||||
it from induction variable.
|
||||
|
||||
For now we handle only simple case where there is exit condition
|
||||
just before the latch block and the latch block contains no statements
|
||||
with side effect that may otherwise terminate the execution of loop
|
||||
(such as by EH or by terminating the program or longjmp).
|
||||
|
||||
In the general case we may want to cancel the paths leading to statements
|
||||
loop-niter identified as having undefined effect in the last iteration.
|
||||
The other cases are hopefully rare and will be cleaned up later. */
|
||||
|
||||
edge
|
||||
loop_edge_to_cancel (struct loop *loop)
|
||||
{
|
||||
VEC (edge, heap) *exits;
|
||||
unsigned i;
|
||||
edge edge_to_cancel;
|
||||
gimple_stmt_iterator gsi;
|
||||
|
||||
/* We want only one predecestor of the loop. */
|
||||
if (EDGE_COUNT (loop->latch->preds) > 1)
|
||||
return NULL;
|
||||
|
||||
exits = get_loop_exit_edges (loop);
|
||||
|
||||
FOR_EACH_VEC_ELT (edge, exits, i, edge_to_cancel)
|
||||
{
|
||||
/* Find the other edge than the loop exit
|
||||
leaving the conditoinal. */
|
||||
if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
|
||||
continue;
|
||||
if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
|
||||
edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
|
||||
else
|
||||
edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
|
||||
|
||||
/* We should never have conditionals in the loop latch. */
|
||||
gcc_assert (edge_to_cancel->dest != loop->header);
|
||||
|
||||
/* Check that it leads to loop latch. */
|
||||
if (edge_to_cancel->dest != loop->latch)
|
||||
continue;
|
||||
|
||||
VEC_free (edge, heap, exits);
|
||||
|
||||
/* Verify that the code in loop latch does nothing that may end program
|
||||
execution without really reaching the exit. This may include
|
||||
non-pure/const function calls, EH statements, volatile ASMs etc. */
|
||||
for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
if (gimple_has_side_effects (gsi_stmt (gsi)))
|
||||
return NULL;
|
||||
return edge_to_cancel;
|
||||
}
|
||||
VEC_free (edge, heap, exits);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Tries to unroll LOOP completely, i.e. NITER times.
|
||||
UL determines which loops we are allowed to unroll.
|
||||
EXIT is the exit of the loop that should be eliminated. */
|
||||
EXIT is the exit of the loop that should be eliminated.
|
||||
IRRED_INVALIDATED is used to bookkeep if information about
|
||||
irreducible regions may become invalid as a result
|
||||
of the transformation. */
|
||||
|
||||
static bool
|
||||
try_unroll_loop_completely (struct loop *loop,
|
||||
edge exit, tree niter,
|
||||
enum unroll_level ul)
|
||||
enum unroll_level ul,
|
||||
bool *irred_invalidated)
|
||||
{
|
||||
unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
|
||||
gimple cond;
|
||||
struct loop_size size;
|
||||
bool n_unroll_found = false;
|
||||
HOST_WIDE_INT maxiter;
|
||||
basic_block latch;
|
||||
edge latch_edge;
|
||||
location_t locus;
|
||||
int flags;
|
||||
gimple stmt;
|
||||
gimple_stmt_iterator gsi;
|
||||
edge edge_to_cancel = NULL;
|
||||
int num = loop->num;
|
||||
|
||||
if (loop->inner)
|
||||
return false;
|
||||
/* See if we proved number of iterations to be low constant.
|
||||
|
||||
if (!host_integerp (niter, 1))
|
||||
EXIT is an edge that will be removed in all but last iteration of
|
||||
the loop.
|
||||
|
||||
EDGE_TO_CACNEL is an edge that will be removed from the last iteration
|
||||
of the unrolled sequence and is expected to make the final loop not
|
||||
rolling.
|
||||
|
||||
If the number of execution of loop is determined by standard induction
|
||||
variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
|
||||
from the iv test. */
|
||||
if (host_integerp (niter, 1))
|
||||
{
|
||||
n_unroll = tree_low_cst (niter, 1);
|
||||
n_unroll_found = true;
|
||||
edge_to_cancel = EDGE_SUCC (exit->src, 0);
|
||||
if (edge_to_cancel == exit)
|
||||
edge_to_cancel = EDGE_SUCC (exit->src, 1);
|
||||
}
|
||||
/* We do not know the number of iterations and thus we can not eliminate
|
||||
the EXIT edge. */
|
||||
else
|
||||
exit = NULL;
|
||||
|
||||
/* See if we can improve our estimate by using recorded loop bounds. */
|
||||
maxiter = max_loop_iterations_int (loop);
|
||||
if (maxiter >= 0
|
||||
&& (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
|
||||
{
|
||||
n_unroll = maxiter;
|
||||
n_unroll_found = true;
|
||||
/* Loop terminates before the IV variable test, so we can not
|
||||
remove it in the last iteration. */
|
||||
edge_to_cancel = NULL;
|
||||
}
|
||||
|
||||
if (!n_unroll_found)
|
||||
return false;
|
||||
n_unroll = tree_low_cst (niter, 1);
|
||||
|
||||
max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
|
||||
if (n_unroll > max_unroll)
|
||||
return false;
|
||||
|
||||
if (!edge_to_cancel)
|
||||
edge_to_cancel = loop_edge_to_cancel (loop);
|
||||
|
||||
if (n_unroll)
|
||||
{
|
||||
sbitmap wont_exit;
|
||||
edge e;
|
||||
unsigned i;
|
||||
VEC (edge, heap) *to_remove = NULL;
|
||||
if (ul == UL_SINGLE_ITER)
|
||||
return false;
|
||||
|
||||
tree_estimate_loop_size (loop, exit, &size);
|
||||
tree_estimate_loop_size (loop, exit, edge_to_cancel, &size);
|
||||
ninsns = size.overall;
|
||||
|
||||
unr_insns = estimated_unrolled_size (&size, n_unroll);
|
||||
@ -354,6 +479,18 @@ try_unroll_loop_completely (struct loop *loop,
|
||||
(int) unr_insns);
|
||||
}
|
||||
|
||||
/* We unroll only inner loops, because we do not consider it profitable
|
||||
otheriwse. We still can cancel loopback edge of not rolling loop;
|
||||
this is always a good idea. */
|
||||
if (loop->inner && unr_insns > ninsns)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "Not unrolling loop %d:"
|
||||
"it is not innermost and code would grow.\n",
|
||||
loop->num);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unr_insns > ninsns
|
||||
&& (unr_insns
|
||||
> (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
|
||||
@ -369,17 +506,10 @@ try_unroll_loop_completely (struct loop *loop,
|
||||
&& unr_insns > ninsns)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
|
||||
fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
|
||||
loop->num);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (n_unroll)
|
||||
{
|
||||
sbitmap wont_exit;
|
||||
edge e;
|
||||
unsigned i;
|
||||
VEC (edge, heap) *to_remove = NULL;
|
||||
|
||||
initialize_original_copy_tables ();
|
||||
wont_exit = sbitmap_alloc (n_unroll + 1);
|
||||
@ -408,15 +538,67 @@ try_unroll_loop_completely (struct loop *loop,
|
||||
free_original_copy_tables ();
|
||||
}
|
||||
|
||||
cond = last_stmt (exit->src);
|
||||
if (exit->flags & EDGE_TRUE_VALUE)
|
||||
gimple_cond_make_true (cond);
|
||||
/* Remove the conditional from the last copy of the loop. */
|
||||
if (edge_to_cancel)
|
||||
{
|
||||
cond = last_stmt (edge_to_cancel->src);
|
||||
if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
|
||||
gimple_cond_make_false (cond);
|
||||
else
|
||||
gimple_cond_make_true (cond);
|
||||
update_stmt (cond);
|
||||
/* Do not remove the path. Doing so may remove outer loop
|
||||
and confuse bookkeeping code in tree_unroll_loops_completelly. */
|
||||
}
|
||||
/* We did not manage to cancel the loop.
|
||||
The loop latch remains reachable even if it will never be reached
|
||||
at runtime. We must redirect it to somewhere, so create basic
|
||||
block containg __builtin_unreachable call for this reason. */
|
||||
else
|
||||
gimple_cond_make_false (cond);
|
||||
update_stmt (cond);
|
||||
{
|
||||
latch = loop->latch;
|
||||
latch_edge = loop_latch_edge (loop);
|
||||
flags = latch_edge->flags;
|
||||
locus = latch_edge->goto_locus;
|
||||
|
||||
/* Unloop destroys the latch edge. */
|
||||
unloop (loop, irred_invalidated);
|
||||
|
||||
/* Create new basic block for the latch edge destination and wire
|
||||
it in. */
|
||||
stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
|
||||
latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
|
||||
latch_edge->probability = 0;
|
||||
latch_edge->count = 0;
|
||||
latch_edge->flags |= flags;
|
||||
latch_edge->goto_locus = locus;
|
||||
|
||||
latch_edge->dest->loop_father = current_loops->tree_root;
|
||||
latch_edge->dest->count = 0;
|
||||
latch_edge->dest->frequency = 0;
|
||||
set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
|
||||
|
||||
gsi = gsi_start_bb (latch_edge->dest);
|
||||
gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
|
||||
}
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
|
||||
{
|
||||
if (!n_unroll)
|
||||
fprintf (dump_file, "Turned loop %d to non-loop; it never loops.\n",
|
||||
num);
|
||||
else
|
||||
fprintf (dump_file, "Unrolled loop %d completely "
|
||||
"(duplicated %i times).\n", num, (int)n_unroll);
|
||||
if (exit)
|
||||
fprintf (dump_file, "Exit condition of peeled iterations was "
|
||||
"eliminated.\n");
|
||||
if (edge_to_cancel)
|
||||
fprintf (dump_file, "Last iteration exit edge was proved true.\n");
|
||||
else
|
||||
fprintf (dump_file, "Latch of last iteration was marked by "
|
||||
"__builtin_unreachable ().\n");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -425,12 +607,15 @@ try_unroll_loop_completely (struct loop *loop,
|
||||
CREATE_IV is true if we may create a new iv. UL determines
|
||||
which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
|
||||
to determine the number of iterations of a loop by direct evaluation.
|
||||
Returns true if cfg is changed. */
|
||||
Returns true if cfg is changed.
|
||||
|
||||
IRRED_INVALIDATED is used to keep if irreducible reginos needs to be recomputed. */
|
||||
|
||||
static bool
|
||||
canonicalize_loop_induction_variables (struct loop *loop,
|
||||
bool create_iv, enum unroll_level ul,
|
||||
bool try_eval)
|
||||
bool try_eval,
|
||||
bool *irred_invalidated)
|
||||
{
|
||||
edge exit = NULL;
|
||||
tree niter;
|
||||
@ -455,22 +640,34 @@ canonicalize_loop_induction_variables (struct loop *loop,
|
||||
|| TREE_CODE (niter) != INTEGER_CST))
|
||||
niter = find_loop_niter_by_eval (loop, &exit);
|
||||
|
||||
if (chrec_contains_undetermined (niter)
|
||||
|| TREE_CODE (niter) != INTEGER_CST)
|
||||
return false;
|
||||
if (TREE_CODE (niter) != INTEGER_CST)
|
||||
exit = NULL;
|
||||
}
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
/* We work exceptionally hard here to estimate the bound
|
||||
by find_loop_niter_by_eval. Be sure to keep it for future. */
|
||||
if (niter && TREE_CODE (niter) == INTEGER_CST)
|
||||
record_niter_bound (loop, tree_to_double_int (niter), false, true);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS)
|
||||
&& TREE_CODE (niter) == INTEGER_CST)
|
||||
{
|
||||
fprintf (dump_file, "Loop %d iterates ", loop->num);
|
||||
print_generic_expr (dump_file, niter, TDF_SLIM);
|
||||
fprintf (dump_file, " times.\n");
|
||||
}
|
||||
if (dump_file && (dump_flags & TDF_DETAILS)
|
||||
&& max_loop_iterations_int (loop) >= 0)
|
||||
{
|
||||
fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
|
||||
(int)max_loop_iterations_int (loop));
|
||||
}
|
||||
|
||||
if (try_unroll_loop_completely (loop, exit, niter, ul))
|
||||
if (try_unroll_loop_completely (loop, exit, niter, ul, irred_invalidated))
|
||||
return true;
|
||||
|
||||
if (create_iv)
|
||||
if (create_iv
|
||||
&& niter && !chrec_contains_undetermined (niter))
|
||||
create_canonical_iv (loop, exit, niter);
|
||||
|
||||
return false;
|
||||
@ -485,15 +682,21 @@ canonicalize_induction_variables (void)
|
||||
loop_iterator li;
|
||||
struct loop *loop;
|
||||
bool changed = false;
|
||||
bool irred_invalidated = false;
|
||||
|
||||
FOR_EACH_LOOP (li, loop, 0)
|
||||
{
|
||||
changed |= canonicalize_loop_induction_variables (loop,
|
||||
true, UL_SINGLE_ITER,
|
||||
true);
|
||||
true,
|
||||
&irred_invalidated);
|
||||
}
|
||||
gcc_assert (!need_ssa_update_p (cfun));
|
||||
|
||||
if (irred_invalidated
|
||||
&& loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
|
||||
mark_irreducible_loops ();
|
||||
|
||||
/* Clean up the information about numbers of iterations, since brute force
|
||||
evaluation could reveal new information. */
|
||||
scev_reset ();
|
||||
@ -594,9 +797,10 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
|
||||
|
||||
do
|
||||
{
|
||||
bool irred_invalidated = false;
|
||||
changed = false;
|
||||
|
||||
FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
|
||||
FOR_EACH_LOOP (li, loop, 0)
|
||||
{
|
||||
struct loop *loop_father = loop_outer (loop);
|
||||
|
||||
@ -609,7 +813,8 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
|
||||
ul = UL_NO_GROWTH;
|
||||
|
||||
if (canonicalize_loop_induction_variables (loop, false, ul,
|
||||
!flag_tree_loop_ivcanon))
|
||||
!flag_tree_loop_ivcanon,
|
||||
&irred_invalidated))
|
||||
{
|
||||
changed = true;
|
||||
/* If we'll continue unrolling, we need to propagate constants
|
||||
@ -629,6 +834,10 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
|
||||
struct loop **iter;
|
||||
unsigned i;
|
||||
|
||||
if (irred_invalidated
|
||||
&& loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
|
||||
mark_irreducible_loops ();
|
||||
|
||||
update_ssa (TODO_update_ssa);
|
||||
|
||||
/* Propagate the constants within the new basic blocks. */
|
||||
|
@ -9524,6 +9524,15 @@ build_common_builtin_nodes (void)
|
||||
tree tmp, ftype;
|
||||
int ecf_flags;
|
||||
|
||||
if (!builtin_decl_explicit_p (BUILT_IN_UNREACHABLE))
|
||||
{
|
||||
ftype = build_function_type (void_type_node, void_list_node);
|
||||
local_define_builtin ("__builtin_unreachable", ftype, BUILT_IN_UNREACHABLE,
|
||||
"__builtin_unreachable",
|
||||
ECF_NOTHROW | ECF_LEAF | ECF_NORETURN
|
||||
| ECF_CONST | ECF_LEAF);
|
||||
}
|
||||
|
||||
if (!builtin_decl_explicit_p (BUILT_IN_MEMCPY)
|
||||
|| !builtin_decl_explicit_p (BUILT_IN_MEMMOVE))
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user