loop splitting
(approved at https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00648.html ) * common.opt (-fsplit-loops): New flag. * passes.def (pass_loop_split): Add. * opts.c (default_options_table): Add OPT_fsplit_loops entry at -O3. (enable_fdo_optimizations): Add loop splitting. * timevar.def (TV_LOOP_SPLIT): Add. * tree-pass.h (make_pass_loop_split): Declare. * tree-ssa-loop-manip.h (rewrite_into_loop_closed_ssa_1): Declare. * tree-ssa-loop-unswitch.c: Include tree-ssa-loop-manip.h, * tree-ssa-loop-split.c: New file. * Makefile.in (OBJS): Add tree-ssa-loop-split.o. * doc/invoke.texi (fsplit-loops): Document. * doc/passes.texi (Loop optimization): Add paragraph about loop splitting. testsuite/ * gcc.dg/loop-split.c: New test. * gcc.dg/tree-ssa/ifc-9.c: Update. * gcc.dg/tree-ssa/ifc-10.c: Update. From-SVN: r241374
This commit is contained in:
parent
a2e0054e1d
commit
28df87300a
@ -1,3 +1,20 @@
|
||||
2016-10-20 Michael Matz <matz@suse.de>
|
||||
|
||||
Loop splitting.
|
||||
* common.opt (-fsplit-loops): New flag.
|
||||
* passes.def (pass_loop_split): Add.
|
||||
* opts.c (default_options_table): Add OPT_fsplit_loops entry at -O3.
|
||||
(enable_fdo_optimizations): Add loop splitting.
|
||||
* timevar.def (TV_LOOP_SPLIT): Add.
|
||||
* tree-pass.h (make_pass_loop_split): Declare.
|
||||
* tree-ssa-loop-manip.h (rewrite_into_loop_closed_ssa_1): Declare.
|
||||
* tree-ssa-loop-unswitch.c: Include tree-ssa-loop-manip.h,
|
||||
* tree-ssa-loop-split.c: New file.
|
||||
* Makefile.in (OBJS): Add tree-ssa-loop-split.o.
|
||||
* doc/invoke.texi (fsplit-loops): Document.
|
||||
* doc/passes.texi (Loop optimization): Add paragraph about loop
|
||||
splitting.
|
||||
|
||||
2016-10-20 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* cgraphunit.c (analyze_functions): Set node->definition to
|
||||
|
@ -1508,6 +1508,7 @@ OBJS = \
|
||||
tree-ssa-loop-manip.o \
|
||||
tree-ssa-loop-niter.o \
|
||||
tree-ssa-loop-prefetch.o \
|
||||
tree-ssa-loop-split.o \
|
||||
tree-ssa-loop-unswitch.o \
|
||||
tree-ssa-loop.o \
|
||||
tree-ssa-math-opts.o \
|
||||
|
@ -2576,6 +2576,10 @@ funswitch-loops
|
||||
Common Report Var(flag_unswitch_loops) Optimization
|
||||
Perform loop unswitching.
|
||||
|
||||
fsplit-loops
|
||||
Common Report Var(flag_split_loops) Optimization
|
||||
Perform loop splitting.
|
||||
|
||||
funwind-tables
|
||||
Common Report Var(flag_unwind_tables) Optimization
|
||||
Just generate unwind tables for exception handling.
|
||||
|
@ -402,7 +402,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol
|
||||
-fsemantic-interposition -fshrink-wrap -fshrink-wrap-separate @gol
|
||||
-fsignaling-nans @gol
|
||||
-fsingle-precision-constant -fsplit-ivs-in-unroller @gol
|
||||
-fsingle-precision-constant -fsplit-ivs-in-unroller -fsplit-loops@gol
|
||||
-fsplit-paths @gol
|
||||
-fsplit-wide-types -fssa-backprop -fssa-phiopt @gol
|
||||
-fstdarg-opt -fstrict-aliasing @gol
|
||||
@ -9224,6 +9224,11 @@ Enabled with @option{-O3} and/or @option{-fprofile-use}.
|
||||
Enables the loop invariant motion pass in the RTL loop optimizer. Enabled
|
||||
at level @option{-O1}
|
||||
|
||||
@item -fsplit-loops
|
||||
@opindex fsplit-loops
|
||||
Split a loop into two if it contains a condition that's always true
|
||||
for one side of the iteration space and false for the other.
|
||||
|
||||
@item -funswitch-loops
|
||||
@opindex funswitch-loops
|
||||
Move branches with loop invariant conditions out of the loop, with duplicates
|
||||
|
@ -484,6 +484,12 @@ out of the loops. To achieve this, a duplicate of the loop is created for
|
||||
each possible outcome of conditional jump(s). The pass is implemented in
|
||||
@file{tree-ssa-loop-unswitch.c}.
|
||||
|
||||
Loop splitting. If a loop contains a conditional statement that is
|
||||
always true for one part of the iteration space and false for the other
|
||||
this pass splits the loop into two, one dealing with one side the other
|
||||
only with the other, thereby removing one inner-loop conditional. The
|
||||
pass is implemented in @file{tree-ssa-loop-split.c}.
|
||||
|
||||
The optimizations also use various utility functions contained in
|
||||
@file{tree-ssa-loop-manip.c}, @file{cfgloop.c}, @file{cfgloopanal.c} and
|
||||
@file{cfgloopmanip.c}.
|
||||
|
@ -530,6 +530,7 @@ static const struct default_options default_options_table[] =
|
||||
regardless of them being declared inline. */
|
||||
{ OPT_LEVELS_3_PLUS_AND_SIZE, OPT_finline_functions, NULL, 1 },
|
||||
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
|
||||
@ -1427,6 +1428,8 @@ enable_fdo_optimizations (struct gcc_options *opts,
|
||||
opts->x_flag_ipa_bit_cp = value;
|
||||
if (!opts_set->x_flag_predictive_commoning)
|
||||
opts->x_flag_predictive_commoning = value;
|
||||
if (!opts_set->x_flag_split_loops)
|
||||
opts->x_flag_split_loops = value;
|
||||
if (!opts_set->x_flag_unswitch_loops)
|
||||
opts->x_flag_unswitch_loops = value;
|
||||
if (!opts_set->x_flag_gcse_after_reload)
|
||||
|
@ -269,6 +269,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
NEXT_PASS (pass_tree_loop_init);
|
||||
NEXT_PASS (pass_tree_unswitch);
|
||||
NEXT_PASS (pass_scev_cprop);
|
||||
NEXT_PASS (pass_loop_split);
|
||||
NEXT_PASS (pass_record_bounds);
|
||||
NEXT_PASS (pass_loop_distribution);
|
||||
NEXT_PASS (pass_copy_prop);
|
||||
|
@ -1,3 +1,9 @@
|
||||
2016-10-20 Michael Matz <matz@suse.de>
|
||||
|
||||
* gcc.dg/loop-split.c: New test.
|
||||
* gcc.dg/tree-ssa/ifc-9.c: Update.
|
||||
* gcc.dg/tree-ssa/ifc-10.c: Update.
|
||||
|
||||
2016-10-20 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* gcc.dg/graphite/pr35356-1.c: Update.
|
||||
|
147
gcc/testsuite/gcc.dg/loop-split.c
Normal file
147
gcc/testsuite/gcc.dg/loop-split.c
Normal file
@ -0,0 +1,147 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" int printf (const char *, ...);
|
||||
extern "C" void abort (void);
|
||||
#else
|
||||
extern int printf (const char *, ...);
|
||||
extern void abort (void);
|
||||
#endif
|
||||
|
||||
/* Define TRACE to 1 or 2 to get detailed tracing.
|
||||
Define SINGLE_TEST to 1 or 2 to get a simple routine with
|
||||
just one loop, called only one time or with multiple parameters,
|
||||
to make debugging easier. */
|
||||
#ifndef TRACE
|
||||
#define TRACE 0
|
||||
#endif
|
||||
|
||||
#define loop(beg,step,beg2,cond1,cond2) \
|
||||
do \
|
||||
{ \
|
||||
sum = 0; \
|
||||
for (i = (beg), j = (beg2); (cond1); i+=(step),j+=(step)) \
|
||||
{ \
|
||||
if (cond2) { \
|
||||
if (TRACE > 1) printf ("a: %d %d\n", i, j); \
|
||||
sum += a[i]; \
|
||||
} else { \
|
||||
if (TRACE > 1) printf ("b: %d %d\n", i, j); \
|
||||
sum += b[i]; \
|
||||
} \
|
||||
} \
|
||||
if (TRACE > 0) printf ("sum: %d\n", sum); \
|
||||
check = check * 47 + sum; \
|
||||
} while (0)
|
||||
|
||||
#ifndef SINGLE_TEST
|
||||
unsigned __attribute__((noinline, noclone)) dotest (int beg, int end, int step,
|
||||
int c, int *a, int *b, int beg2)
|
||||
{
|
||||
unsigned check = 0;
|
||||
int sum;
|
||||
int i, j;
|
||||
loop (beg, 1, beg2, i < end, j < c);
|
||||
loop (beg, 1, beg2, i <= end, j < c);
|
||||
loop (beg, 1, beg2, i < end, j <= c);
|
||||
loop (beg, 1, beg2, i <= end, j <= c);
|
||||
loop (beg, 1, beg2, i < end, j > c);
|
||||
loop (beg, 1, beg2, i <= end, j > c);
|
||||
loop (beg, 1, beg2, i < end, j >= c);
|
||||
loop (beg, 1, beg2, i <= end, j >= c);
|
||||
beg2 += end-beg;
|
||||
loop (end, -1, beg2, i >= beg, j >= c);
|
||||
loop (end, -1, beg2, i >= beg, j > c);
|
||||
loop (end, -1, beg2, i > beg, j >= c);
|
||||
loop (end, -1, beg2, i > beg, j > c);
|
||||
loop (end, -1, beg2, i >= beg, j <= c);
|
||||
loop (end, -1, beg2, i >= beg, j < c);
|
||||
loop (end, -1, beg2, i > beg, j <= c);
|
||||
loop (end, -1, beg2, i > beg, j < c);
|
||||
return check;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int __attribute__((noinline, noclone)) f (int beg, int end, int step,
|
||||
int c, int *a, int *b, int beg2)
|
||||
{
|
||||
int sum = 0;
|
||||
int i, j;
|
||||
//for (i = beg, j = beg2; i < end; i += 1, j++ /*step*/)
|
||||
for (i = end, j = beg2 + (end-beg); i > beg; i += -1, j-- /*step*/)
|
||||
{
|
||||
// i - j == X --> i = X + j
|
||||
// --> i < end == X+j < end == j < end - X
|
||||
// --> newend = end - (i_init - j_init)
|
||||
// j < end-X && j < c --> j < min(end-X,c)
|
||||
// j < end-X && j <= c --> j <= min(end-X-1,c) or j < min(end-X,c+1{OF!})
|
||||
//if (j < c)
|
||||
if (j >= c)
|
||||
printf ("a: %d %d\n", i, j);
|
||||
/*else
|
||||
printf ("b: %d %d\n", i, j);*/
|
||||
/*sum += a[i];
|
||||
else
|
||||
sum += b[i];*/
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int __attribute__((noinline, noclone)) f2 (int *beg, int *end, int step,
|
||||
int *c, int *a, int *b, int *beg2)
|
||||
{
|
||||
int sum = 0;
|
||||
int *i, *j;
|
||||
for (i = beg, j = beg2; i < end; i += 1, j++ /*step*/)
|
||||
{
|
||||
if (j <= c)
|
||||
printf ("%d %d\n", i - beg, j - beg);
|
||||
/*sum += a[i];
|
||||
else
|
||||
sum += b[i];*/
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern int printf (const char *, ...);
|
||||
|
||||
int main ()
|
||||
{
|
||||
int a[] = {0,0,0,0,0, 1,2,3,4,5,6,7,8,9, 0,0,0,0,0};
|
||||
int b[] = {0,0,0,0,0, -1,-2,-3,-4,-5,-6,-7,-8,-9, 0,0,0,0,0,};
|
||||
int c;
|
||||
int diff = 0;
|
||||
unsigned check = 0;
|
||||
#if defined(SINGLE_TEST) && (SINGLE_TEST == 1)
|
||||
//dotest (0, 9, 1, -1, a+5, b+5, -1);
|
||||
//return 0;
|
||||
f (0, 9, 1, 5, a+5, b+5, -1);
|
||||
return 0;
|
||||
#endif
|
||||
for (diff = -5; diff <= 5; diff++)
|
||||
{
|
||||
for (c = -1; c <= 10; c++)
|
||||
{
|
||||
#ifdef SINGLE_TEST
|
||||
int s = f (0, 9, 1, c, a+5, b+5, diff);
|
||||
//int s = f2 (a+0, a+9, 1, a+c, a+5, b+5, a+diff);
|
||||
printf ("%d ", s);
|
||||
#else
|
||||
if (TRACE > 0)
|
||||
printf ("check %d %d\n", c, diff);
|
||||
check = check * 51 + dotest (0, 9, 1, c, a+5, b+5, diff);
|
||||
#endif
|
||||
}
|
||||
//printf ("\n");
|
||||
}
|
||||
//printf ("%u\n", check);
|
||||
if (check != 3213344948)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* All 16 loops in dotest should be split. */
|
||||
/* { dg-final { scan-tree-dump-times "Loop split" 16 "lsplit" } } */
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast -fdump-tree-ifcvt-stats" } */
|
||||
/* { dg-options "-Ofast -fno-split-loops -fdump-tree-ifcvt-stats" } */
|
||||
/* { dg-require-visibility "" } */
|
||||
|
||||
int b[256] = {0}, y;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast -fdump-tree-ifcvt-stats" } */
|
||||
/* { dg-options "-Ofast -fno-split-loops -fdump-tree-ifcvt-stats" } */
|
||||
/* { dg-require-visibility "" } */
|
||||
|
||||
extern int b[256], y;
|
||||
|
@ -185,6 +185,7 @@ DEFTIMEVAR (TV_LIM , "tree loop invariant motion")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv")
|
||||
DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
|
||||
DEFTIMEVAR (TV_LOOP_SPLIT , "loop splitting")
|
||||
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
|
||||
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
|
||||
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
|
||||
|
@ -367,6 +367,7 @@ extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_predcom (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
|
||||
|
@ -24,6 +24,8 @@ typedef void (*transform_callback)(struct loop *, void *);
|
||||
|
||||
extern void create_iv (tree, tree, tree, struct loop *, gimple_stmt_iterator *,
|
||||
bool, tree *, tree *);
|
||||
extern void rewrite_into_loop_closed_ssa_1 (bitmap, unsigned, int,
|
||||
struct loop *);
|
||||
extern void rewrite_into_loop_closed_ssa (bitmap, unsigned);
|
||||
extern void rewrite_virtuals_into_loop_closed_ssa (struct loop *);
|
||||
extern void verify_loop_closed_ssa (bool);
|
||||
|
687
gcc/tree-ssa-loop-split.c
Normal file
687
gcc/tree-ssa-loop-split.c
Normal file
@ -0,0 +1,687 @@
|
||||
/* Loop splitting.
|
||||
Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 3, or (at your option) any
|
||||
later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "backend.h"
|
||||
#include "tree.h"
|
||||
#include "gimple.h"
|
||||
#include "tree-pass.h"
|
||||
#include "ssa.h"
|
||||
#include "fold-const.h"
|
||||
#include "tree-cfg.h"
|
||||
#include "tree-ssa.h"
|
||||
#include "tree-ssa-loop-niter.h"
|
||||
#include "tree-ssa-loop.h"
|
||||
#include "tree-ssa-loop-manip.h"
|
||||
#include "tree-into-ssa.h"
|
||||
#include "cfgloop.h"
|
||||
#include "tree-scalar-evolution.h"
|
||||
#include "gimple-iterator.h"
|
||||
#include "gimple-pretty-print.h"
|
||||
#include "cfghooks.h"
|
||||
#include "gimple-fold.h"
|
||||
#include "gimplify-me.h"
|
||||
|
||||
/* This file implements loop splitting, i.e. transformation of loops like
|
||||
|
||||
for (i = 0; i < 100; i++)
|
||||
{
|
||||
if (i < 50)
|
||||
A;
|
||||
else
|
||||
B;
|
||||
}
|
||||
|
||||
into:
|
||||
|
||||
for (i = 0; i < 50; i++)
|
||||
{
|
||||
A;
|
||||
}
|
||||
for (; i < 100; i++)
|
||||
{
|
||||
B;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
/* Return true when BB inside LOOP is a potential iteration space
|
||||
split point, i.e. ends with a condition like "IV < comp", which
|
||||
is true on one side of the iteration space and false on the other,
|
||||
and the split point can be computed. If so, also return the border
|
||||
point in *BORDER and the comparison induction variable in IV. */
|
||||
|
||||
static tree
|
||||
split_at_bb_p (struct loop *loop, basic_block bb, tree *border, affine_iv *iv)
|
||||
{
|
||||
gimple *last;
|
||||
gcond *stmt;
|
||||
affine_iv iv2;
|
||||
|
||||
/* BB must end in a simple conditional jump. */
|
||||
last = last_stmt (bb);
|
||||
if (!last || gimple_code (last) != GIMPLE_COND)
|
||||
return NULL_TREE;
|
||||
stmt = as_a <gcond *> (last);
|
||||
|
||||
enum tree_code code = gimple_cond_code (stmt);
|
||||
|
||||
/* Only handle relational comparisons, for equality and non-equality
|
||||
we'd have to split the loop into two loops and a middle statement. */
|
||||
switch (code)
|
||||
{
|
||||
case LT_EXPR:
|
||||
case LE_EXPR:
|
||||
case GT_EXPR:
|
||||
case GE_EXPR:
|
||||
break;
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
if (loop_exits_from_bb_p (loop, bb))
|
||||
return NULL_TREE;
|
||||
|
||||
tree op0 = gimple_cond_lhs (stmt);
|
||||
tree op1 = gimple_cond_rhs (stmt);
|
||||
|
||||
if (!simple_iv (loop, loop, op0, iv, false))
|
||||
return NULL_TREE;
|
||||
if (!simple_iv (loop, loop, op1, &iv2, false))
|
||||
return NULL_TREE;
|
||||
|
||||
/* Make it so that the first argument of the condition is
|
||||
the looping one. */
|
||||
if (!integer_zerop (iv2.step))
|
||||
{
|
||||
std::swap (op0, op1);
|
||||
std::swap (*iv, iv2);
|
||||
code = swap_tree_comparison (code);
|
||||
gimple_cond_set_condition (stmt, code, op0, op1);
|
||||
update_stmt (stmt);
|
||||
}
|
||||
else if (integer_zerop (iv->step))
|
||||
return NULL_TREE;
|
||||
if (!integer_zerop (iv2.step))
|
||||
return NULL_TREE;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Found potential split point: ");
|
||||
print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
|
||||
fprintf (dump_file, " { ");
|
||||
print_generic_expr (dump_file, iv->base, TDF_SLIM);
|
||||
fprintf (dump_file, " + I*");
|
||||
print_generic_expr (dump_file, iv->step, TDF_SLIM);
|
||||
fprintf (dump_file, " } %s ", get_tree_code_name (code));
|
||||
print_generic_expr (dump_file, iv2.base, TDF_SLIM);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
|
||||
*border = iv2.base;
|
||||
return op0;
|
||||
}
|
||||
|
||||
/* Given a GUARD conditional stmt inside LOOP, which we want to make always
|
||||
true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
|
||||
(a post-increment IV) and NEWBOUND (the comparator) adjust the loop
|
||||
exit test statement to loop back only if the GUARD statement will
|
||||
also be true/false in the next iteration. */
|
||||
|
||||
static void
|
||||
patch_loop_exit (struct loop *loop, gcond *guard, tree nextval, tree newbound,
|
||||
bool initial_true)
|
||||
{
|
||||
edge exit = single_exit (loop);
|
||||
gcond *stmt = as_a <gcond *> (last_stmt (exit->src));
|
||||
gimple_cond_set_condition (stmt, gimple_cond_code (guard),
|
||||
nextval, newbound);
|
||||
update_stmt (stmt);
|
||||
|
||||
edge stay = single_pred_edge (loop->latch);
|
||||
|
||||
exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
||||
stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
||||
|
||||
if (initial_true)
|
||||
{
|
||||
exit->flags |= EDGE_FALSE_VALUE;
|
||||
stay->flags |= EDGE_TRUE_VALUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
exit->flags |= EDGE_TRUE_VALUE;
|
||||
stay->flags |= EDGE_FALSE_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Give an induction variable GUARD_IV, and its affine descriptor IV,
|
||||
find the loop phi node in LOOP defining it directly, or create
|
||||
such phi node. Return that phi node. */
|
||||
|
||||
static gphi *
|
||||
find_or_create_guard_phi (struct loop *loop, tree guard_iv, affine_iv * /*iv*/)
|
||||
{
|
||||
gimple *def = SSA_NAME_DEF_STMT (guard_iv);
|
||||
gphi *phi;
|
||||
if ((phi = dyn_cast <gphi *> (def))
|
||||
&& gimple_bb (phi) == loop->header)
|
||||
return phi;
|
||||
|
||||
/* XXX Create the PHI instead. */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* This function updates the SSA form after connect_loops made a new
|
||||
edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
|
||||
conditional). I.e. the second loop can now be entered either
|
||||
via the original entry or via NEW_E, so the entry values of LOOP2
|
||||
phi nodes are either the original ones or those at the exit
|
||||
of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting
|
||||
this. */
|
||||
|
||||
static void
|
||||
connect_loop_phis (struct loop *loop1, struct loop *loop2, edge new_e)
|
||||
{
|
||||
basic_block rest = loop_preheader_edge (loop2)->src;
|
||||
gcc_assert (new_e->dest == rest);
|
||||
edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
|
||||
|
||||
edge firste = loop_preheader_edge (loop1);
|
||||
edge seconde = loop_preheader_edge (loop2);
|
||||
edge firstn = loop_latch_edge (loop1);
|
||||
gphi_iterator psi_first, psi_second;
|
||||
for (psi_first = gsi_start_phis (loop1->header),
|
||||
psi_second = gsi_start_phis (loop2->header);
|
||||
!gsi_end_p (psi_first);
|
||||
gsi_next (&psi_first), gsi_next (&psi_second))
|
||||
{
|
||||
tree init, next, new_init;
|
||||
use_operand_p op;
|
||||
gphi *phi_first = psi_first.phi ();
|
||||
gphi *phi_second = psi_second.phi ();
|
||||
|
||||
init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
|
||||
next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
|
||||
op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
|
||||
gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
|
||||
|
||||
/* Prefer using original variable as a base for the new ssa name.
|
||||
This is necessary for virtual ops, and useful in order to avoid
|
||||
losing debug info for real ops. */
|
||||
if (TREE_CODE (next) == SSA_NAME
|
||||
&& useless_type_conversion_p (TREE_TYPE (next),
|
||||
TREE_TYPE (init)))
|
||||
new_init = copy_ssa_name (next);
|
||||
else if (TREE_CODE (init) == SSA_NAME
|
||||
&& useless_type_conversion_p (TREE_TYPE (init),
|
||||
TREE_TYPE (next)))
|
||||
new_init = copy_ssa_name (init);
|
||||
else if (useless_type_conversion_p (TREE_TYPE (next),
|
||||
TREE_TYPE (init)))
|
||||
new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
|
||||
"unrinittmp");
|
||||
else
|
||||
new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
|
||||
"unrinittmp");
|
||||
|
||||
gphi * newphi = create_phi_node (new_init, rest);
|
||||
add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
|
||||
add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
|
||||
SET_USE (op, new_init);
|
||||
}
|
||||
}
|
||||
|
||||
/* The two loops LOOP1 and LOOP2 were just created by loop versioning,
|
||||
they are still equivalent and placed in two arms of a diamond, like so:
|
||||
|
||||
.------if (cond)------.
|
||||
v v
|
||||
pre1 pre2
|
||||
| |
|
||||
.--->h1 h2<----.
|
||||
| | | |
|
||||
| ex1---. .---ex2 |
|
||||
| / | | \ |
|
||||
'---l1 X | l2---'
|
||||
| |
|
||||
| |
|
||||
'--->join<---'
|
||||
|
||||
This function transforms the program such that LOOP1 is conditionally
|
||||
falling through to LOOP2, or skipping it. This is done by splitting
|
||||
the ex1->join edge at X in the diagram above, and inserting a condition
|
||||
whose one arm goes to pre2, resulting in this situation:
|
||||
|
||||
.------if (cond)------.
|
||||
v v
|
||||
pre1 .---------->pre2
|
||||
| | |
|
||||
.--->h1 | h2<----.
|
||||
| | | | |
|
||||
| ex1---. | .---ex2 |
|
||||
| / v | | \ |
|
||||
'---l1 skip---' | l2---'
|
||||
| |
|
||||
| |
|
||||
'--->join<---'
|
||||
|
||||
|
||||
The condition used is the exit condition of LOOP1, which effectively means
|
||||
that when the first loop exits (for whatever reason) but the real original
|
||||
exit expression is still false the second loop will be entered.
|
||||
The function returns the new edge cond->pre2.
|
||||
|
||||
This doesn't update the SSA form, see connect_loop_phis for that. */
|
||||
|
||||
static edge
|
||||
connect_loops (struct loop *loop1, struct loop *loop2)
|
||||
{
|
||||
edge exit = single_exit (loop1);
|
||||
basic_block skip_bb = split_edge (exit);
|
||||
gcond *skip_stmt;
|
||||
gimple_stmt_iterator gsi;
|
||||
edge new_e, skip_e;
|
||||
|
||||
gimple *stmt = last_stmt (exit->src);
|
||||
skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
|
||||
gimple_cond_lhs (stmt),
|
||||
gimple_cond_rhs (stmt),
|
||||
NULL_TREE, NULL_TREE);
|
||||
gsi = gsi_last_bb (skip_bb);
|
||||
gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
|
||||
|
||||
skip_e = EDGE_SUCC (skip_bb, 0);
|
||||
skip_e->flags &= ~EDGE_FALLTHRU;
|
||||
new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
|
||||
if (exit->flags & EDGE_TRUE_VALUE)
|
||||
{
|
||||
skip_e->flags |= EDGE_TRUE_VALUE;
|
||||
new_e->flags |= EDGE_FALSE_VALUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
skip_e->flags |= EDGE_FALSE_VALUE;
|
||||
new_e->flags |= EDGE_TRUE_VALUE;
|
||||
}
|
||||
|
||||
new_e->count = skip_bb->count;
|
||||
new_e->probability = PROB_LIKELY;
|
||||
new_e->count = apply_probability (skip_e->count, PROB_LIKELY);
|
||||
skip_e->count -= new_e->count;
|
||||
skip_e->probability = inverse_probability (PROB_LIKELY);
|
||||
|
||||
return new_e;
|
||||
}
|
||||
|
||||
/* This returns the new bound for iterations given the original iteration
|
||||
space in NITER, an arbitrary new bound BORDER, assumed to be some
|
||||
comparison value with a different IV, the initial value GUARD_INIT of
|
||||
that other IV, and the comparison code GUARD_CODE that compares
|
||||
that other IV with BORDER. We return an SSA name, and place any
|
||||
necessary statements for that computation into *STMTS.
|
||||
|
||||
For example for such a loop:
|
||||
|
||||
for (i = beg, j = guard_init; i < end; i++, j++)
|
||||
if (j < border) // this is supposed to be true/false
|
||||
...
|
||||
|
||||
we want to return a new bound (on j) that makes the loop iterate
|
||||
as long as the condition j < border stays true. We also don't want
|
||||
to iterate more often than the original loop, so we have to introduce
|
||||
some cut-off as well (via min/max), effectively resulting in:
|
||||
|
||||
newend = min (end+guard_init-beg, border)
|
||||
for (i = beg; j = guard_init; j < newend; i++, j++)
|
||||
if (j < c)
|
||||
...
|
||||
|
||||
Depending on the direction of the IVs and if the exit tests
|
||||
are strict or non-strict we need to use MIN or MAX,
|
||||
and add or subtract 1. This routine computes newend above. */
|
||||
|
||||
static tree
|
||||
compute_new_first_bound (gimple_seq *stmts, struct tree_niter_desc *niter,
|
||||
tree border,
|
||||
enum tree_code guard_code, tree guard_init)
|
||||
{
|
||||
/* The niter structure contains the after-increment IV, we need
|
||||
the loop-enter base, so subtract STEP once. */
|
||||
tree controlbase = force_gimple_operand (niter->control.base,
|
||||
stmts, true, NULL_TREE);
|
||||
tree controlstep = niter->control.step;
|
||||
tree enddiff;
|
||||
if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
|
||||
{
|
||||
controlstep = gimple_build (stmts, NEGATE_EXPR,
|
||||
TREE_TYPE (controlstep), controlstep);
|
||||
enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
|
||||
TREE_TYPE (controlbase),
|
||||
controlbase, controlstep);
|
||||
}
|
||||
else
|
||||
enddiff = gimple_build (stmts, MINUS_EXPR,
|
||||
TREE_TYPE (controlbase),
|
||||
controlbase, controlstep);
|
||||
|
||||
/* Compute beg-guard_init. */
|
||||
if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
|
||||
{
|
||||
tree tem = gimple_convert (stmts, sizetype, guard_init);
|
||||
tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
|
||||
enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
|
||||
TREE_TYPE (enddiff),
|
||||
enddiff, tem);
|
||||
}
|
||||
else
|
||||
enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
|
||||
enddiff, guard_init);
|
||||
|
||||
/* Compute end-(beg-guard_init). */
|
||||
gimple_seq stmts2;
|
||||
tree newbound = force_gimple_operand (niter->bound, &stmts2,
|
||||
true, NULL_TREE);
|
||||
gimple_seq_add_seq_without_update (stmts, stmts2);
|
||||
|
||||
if (POINTER_TYPE_P (TREE_TYPE (enddiff))
|
||||
|| POINTER_TYPE_P (TREE_TYPE (newbound)))
|
||||
{
|
||||
enddiff = gimple_convert (stmts, sizetype, enddiff);
|
||||
enddiff = gimple_build (stmts, NEGATE_EXPR, sizetype, enddiff);
|
||||
newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
|
||||
TREE_TYPE (newbound),
|
||||
newbound, enddiff);
|
||||
}
|
||||
else
|
||||
newbound = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
|
||||
newbound, enddiff);
|
||||
|
||||
/* Depending on the direction of the IVs the new bound for the first
|
||||
loop is the minimum or maximum of old bound and border.
|
||||
Also, if the guard condition isn't strictly less or greater,
|
||||
we need to adjust the bound. */
|
||||
int addbound = 0;
|
||||
enum tree_code minmax;
|
||||
if (niter->cmp == LT_EXPR)
|
||||
{
|
||||
/* GT and LE are the same, inverted. */
|
||||
if (guard_code == GT_EXPR || guard_code == LE_EXPR)
|
||||
addbound = -1;
|
||||
minmax = MIN_EXPR;
|
||||
}
|
||||
else
|
||||
{
|
||||
gcc_assert (niter->cmp == GT_EXPR);
|
||||
if (guard_code == GE_EXPR || guard_code == LT_EXPR)
|
||||
addbound = 1;
|
||||
minmax = MAX_EXPR;
|
||||
}
|
||||
|
||||
if (addbound)
|
||||
{
|
||||
tree type2 = TREE_TYPE (newbound);
|
||||
if (POINTER_TYPE_P (type2))
|
||||
type2 = sizetype;
|
||||
newbound = gimple_build (stmts,
|
||||
POINTER_TYPE_P (TREE_TYPE (newbound))
|
||||
? POINTER_PLUS_EXPR : PLUS_EXPR,
|
||||
TREE_TYPE (newbound),
|
||||
newbound,
|
||||
build_int_cst (type2, addbound));
|
||||
}
|
||||
|
||||
newbound = gimple_convert (stmts, TREE_TYPE (border), newbound);
|
||||
tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
|
||||
border, newbound);
|
||||
return newend;
|
||||
}
|
||||
|
||||
/* Checks if LOOP contains an conditional block whose condition
|
||||
depends on which side in the iteration space it is, and if so
|
||||
splits the iteration space into two loops. Returns true if the
|
||||
loop was split. NITER must contain the iteration descriptor for the
|
||||
single exit of LOOP. */
|
||||
|
||||
static bool
|
||||
split_loop (struct loop *loop1, struct tree_niter_desc *niter)
|
||||
{
|
||||
basic_block *bbs;
|
||||
unsigned i;
|
||||
bool changed = false;
|
||||
tree guard_iv;
|
||||
tree border;
|
||||
affine_iv iv;
|
||||
|
||||
bbs = get_loop_body (loop1);
|
||||
|
||||
/* Find a splitting opportunity. */
|
||||
for (i = 0; i < loop1->num_nodes; i++)
|
||||
if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
|
||||
{
|
||||
/* Handling opposite steps is not implemented yet. Neither
|
||||
is handling different step sizes. */
|
||||
if ((tree_int_cst_sign_bit (iv.step)
|
||||
!= tree_int_cst_sign_bit (niter->control.step))
|
||||
|| !tree_int_cst_equal (iv.step, niter->control.step))
|
||||
continue;
|
||||
|
||||
/* Find a loop PHI node that defines guard_iv directly,
|
||||
or create one doing that. */
|
||||
gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
|
||||
if (!phi)
|
||||
continue;
|
||||
gcond *guard_stmt = as_a<gcond *> (last_stmt (bbs[i]));
|
||||
tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
|
||||
loop_preheader_edge (loop1));
|
||||
enum tree_code guard_code = gimple_cond_code (guard_stmt);
|
||||
|
||||
/* Loop splitting is implemented by versioning the loop, placing
|
||||
the new loop after the old loop, make the first loop iterate
|
||||
as long as the conditional stays true (or false) and let the
|
||||
second (new) loop handle the rest of the iterations.
|
||||
|
||||
First we need to determine if the condition will start being true
|
||||
or false in the first loop. */
|
||||
bool initial_true;
|
||||
switch (guard_code)
|
||||
{
|
||||
case LT_EXPR:
|
||||
case LE_EXPR:
|
||||
initial_true = !tree_int_cst_sign_bit (iv.step);
|
||||
break;
|
||||
case GT_EXPR:
|
||||
case GE_EXPR:
|
||||
initial_true = tree_int_cst_sign_bit (iv.step);
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Build a condition that will skip the first loop when the
|
||||
guard condition won't ever be true (or false). */
|
||||
gimple_seq stmts2;
|
||||
border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
|
||||
if (stmts2)
|
||||
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
|
||||
stmts2);
|
||||
tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
|
||||
if (!initial_true)
|
||||
cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
|
||||
|
||||
/* Now version the loop, placing loop2 after loop1 connecting
|
||||
them, and fix up SSA form for that. */
|
||||
initialize_original_copy_tables ();
|
||||
basic_block cond_bb;
|
||||
struct loop *loop2 = loop_version (loop1, cond, &cond_bb,
|
||||
REG_BR_PROB_BASE, REG_BR_PROB_BASE,
|
||||
REG_BR_PROB_BASE, true);
|
||||
gcc_assert (loop2);
|
||||
update_ssa (TODO_update_ssa);
|
||||
|
||||
edge new_e = connect_loops (loop1, loop2);
|
||||
connect_loop_phis (loop1, loop2, new_e);
|
||||
|
||||
/* The iterations of the second loop is now already
|
||||
exactly those that the first loop didn't do, but the
|
||||
iteration space of the first loop is still the original one.
|
||||
Compute the new bound for the guarding IV and patch the
|
||||
loop exit to use it instead of original IV and bound. */
|
||||
gimple_seq stmts = NULL;
|
||||
tree newend = compute_new_first_bound (&stmts, niter, border,
|
||||
guard_code, guard_init);
|
||||
if (stmts)
|
||||
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
|
||||
stmts);
|
||||
tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
|
||||
patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
|
||||
|
||||
/* Finally patch out the two copies of the condition to be always
|
||||
true/false (or opposite). */
|
||||
gcond *force_true = as_a<gcond *> (last_stmt (bbs[i]));
|
||||
gcond *force_false = as_a<gcond *> (last_stmt (get_bb_copy (bbs[i])));
|
||||
if (!initial_true)
|
||||
std::swap (force_true, force_false);
|
||||
gimple_cond_make_true (force_true);
|
||||
gimple_cond_make_false (force_false);
|
||||
update_stmt (force_true);
|
||||
update_stmt (force_false);
|
||||
|
||||
free_original_copy_tables ();
|
||||
|
||||
/* We destroyed LCSSA form above. Eventually we might be able
|
||||
to fix it on the fly, for now simply punt and use the helper. */
|
||||
rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
|
||||
|
||||
changed = true;
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, ";; Loop split.\n");
|
||||
|
||||
/* Only deal with the first opportunity. */
|
||||
break;
|
||||
}
|
||||
|
||||
free (bbs);
|
||||
return changed;
|
||||
}
|
||||
|
||||
/* Main entry point. Perform loop splitting on all suitable loops. */
|
||||
|
||||
static unsigned int
|
||||
tree_ssa_split_loops (void)
|
||||
{
|
||||
struct loop *loop;
|
||||
bool changed = false;
|
||||
|
||||
gcc_assert (scev_initialized_p ());
|
||||
FOR_EACH_LOOP (loop, 0)
|
||||
loop->aux = NULL;
|
||||
|
||||
/* Go through all loops starting from innermost. */
|
||||
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
|
||||
{
|
||||
struct tree_niter_desc niter;
|
||||
if (loop->aux)
|
||||
{
|
||||
/* If any of our inner loops was split, don't split us,
|
||||
and mark our containing loop as having had splits as well. */
|
||||
loop_outer (loop)->aux = loop;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (single_exit (loop)
|
||||
/* ??? We could handle non-empty latches when we split
|
||||
the latch edge (not the exit edge), and put the new
|
||||
exit condition in the new block. OTOH this executes some
|
||||
code unconditionally that might have been skipped by the
|
||||
original exit before. */
|
||||
&& empty_block_p (loop->latch)
|
||||
&& !optimize_loop_for_size_p (loop)
|
||||
&& number_of_iterations_exit (loop, single_exit (loop), &niter,
|
||||
false, true)
|
||||
&& niter.cmp != ERROR_MARK
|
||||
/* We can't yet handle loops controlled by a != predicate. */
|
||||
&& niter.cmp != NE_EXPR)
|
||||
{
|
||||
if (split_loop (loop, &niter))
|
||||
{
|
||||
/* Mark our containing loop as having had some split inner
|
||||
loops. */
|
||||
loop_outer (loop)->aux = loop;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FOR_EACH_LOOP (loop, 0)
|
||||
loop->aux = NULL;
|
||||
|
||||
if (changed)
|
||||
return TODO_cleanup_cfg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Loop splitting pass. */
|
||||
|
||||
namespace {
|
||||
|
||||
const pass_data pass_data_loop_split =
|
||||
{
|
||||
GIMPLE_PASS, /* type */
|
||||
"lsplit", /* name */
|
||||
OPTGROUP_LOOP, /* optinfo_flags */
|
||||
TV_LOOP_SPLIT, /* tv_id */
|
||||
PROP_cfg, /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
0, /* todo_flags_finish */
|
||||
};
|
||||
|
||||
class pass_loop_split : public gimple_opt_pass
|
||||
{
|
||||
public:
|
||||
pass_loop_split (gcc::context *ctxt)
|
||||
: gimple_opt_pass (pass_data_loop_split, ctxt)
|
||||
{}
|
||||
|
||||
/* opt_pass methods: */
|
||||
virtual bool gate (function *) { return flag_split_loops != 0; }
|
||||
virtual unsigned int execute (function *);
|
||||
|
||||
}; // class pass_loop_split
|
||||
|
||||
unsigned int
|
||||
pass_loop_split::execute (function *fun)
|
||||
{
|
||||
if (number_of_loops (fun) <= 1)
|
||||
return 0;
|
||||
|
||||
return tree_ssa_split_loops ();
|
||||
}
|
||||
|
||||
} // anon namespace
|
||||
|
||||
gimple_opt_pass *
|
||||
make_pass_loop_split (gcc::context *ctxt)
|
||||
{
|
||||
return new pass_loop_split (ctxt);
|
||||
}
|
Loading…
Reference in New Issue
Block a user