loop-unroll.c: (decide_unrolling_and_peeling): Rename to
* loop-unroll.c: (decide_unrolling_and_peeling): Rename to (decide_unrolling): ... this one. (peel_loops_completely): Remove. (decide_peel_simple): Remove. (decide_peel_once_rolling): Remove. (decide_peel_completely): Remove. (peel_loop_simple): Remove. (peel_loop_completely): Remove. (unroll_and_peel_loops): Rename to ... (unroll_loops): ... this one; handle only unrolling. * cfgloop.h (lpt_dec): Remove LPT_PEEL_COMPLETELY and LPT_PEEL_SIMPLE. (UAP_PEEL): Remove. (unroll_and_peel_loops): Remove. (unroll_loops): New. * passes.def: Replace pass_rtl_unroll_and_peel_loops by pass_rtl_unroll_loops. * loop-init.c (gate_rtl_unroll_and_peel_loops, rtl_unroll_and_peel_loops): Rename to ... (gate_rtl_unroll_loops, rtl_unroll_loops): ... these; update. (pass_rtl_unroll_and_peel_loops): Rename to ... (pass_rtl_unroll_loops): ... this one. * tree-pass.h (make_pass_rtl_unroll_and_peel_loops): Remove. (make_pass_rtl_unroll_loops): New. * tree-ssa-loop-ivcanon.c: (estimated_peeled_sequence_size, try_peel_loop): New. (canonicalize_loop_induction_variables): Update. * gcc.dg/tree-prof/peel-1.c: Update. * gcc.dg/tree-prof/unroll-1.c: Update. * gcc.dg/gcc.dg/unroll_1.c: Update. * gcc.dg/gcc.dg/unroll_2.c: Update. * gcc.dg/gcc.dg/unroll_3.c: Update. * gcc.dg/gcc.dg/unroll_4.c: Update. From-SVN: r216238
This commit is contained in:
parent
fa7fa585ea
commit
f8934be787
@ -1,3 +1,32 @@
|
||||
2014-10-14 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
* loop-unroll.c: (decide_unrolling_and_peeling): Rename to
|
||||
(decide_unrolling): ... this one.
|
||||
(peel_loops_completely): Remove.
|
||||
(decide_peel_simple): Remove.
|
||||
(decide_peel_once_rolling): Remove.
|
||||
(decide_peel_completely): Remove.
|
||||
(peel_loop_simple): Remove.
|
||||
(peel_loop_completely): Remove.
|
||||
(unroll_and_peel_loops): Rename to ...
|
||||
(unroll_loops): ... this one; handle only unrolling.
|
||||
* cfgloop.h (lpt_dec): Remove LPT_PEEL_COMPLETELY and
|
||||
LPT_PEEL_SIMPLE.
|
||||
(UAP_PEEL): Remove.
|
||||
(unroll_and_peel_loops): Remove.
|
||||
(unroll_loops): New.
|
||||
* passes.def: Replace
|
||||
pass_rtl_unroll_and_peel_loops by pass_rtl_unroll_loops.
|
||||
* loop-init.c (gate_rtl_unroll_and_peel_loops,
|
||||
rtl_unroll_and_peel_loops): Rename to ...
|
||||
(gate_rtl_unroll_loops, rtl_unroll_loops): ... these; update.
|
||||
(pass_rtl_unroll_and_peel_loops): Rename to ...
|
||||
(pass_rtl_unroll_loops): ... this one.
|
||||
* tree-pass.h (make_pass_rtl_unroll_and_peel_loops): Remove.
|
||||
(make_pass_rtl_unroll_loops): New.
|
||||
* tree-ssa-loop-ivcanon.c: (estimated_peeled_sequence_size, try_peel_loop): New.
|
||||
(canonicalize_loop_induction_variables): Update.
|
||||
|
||||
2014-10-14 Max Filippov <jcmvbkbc@gmail.com>
|
||||
|
||||
* config/xtensa/xtensa.h (TARGET_HARD_FLOAT_POSTINC): new macro.
|
||||
|
@ -30,8 +30,6 @@ along with GCC; see the file COPYING3. If not see
|
||||
enum lpt_dec
|
||||
{
|
||||
LPT_NONE,
|
||||
LPT_PEEL_COMPLETELY,
|
||||
LPT_PEEL_SIMPLE,
|
||||
LPT_UNROLL_CONSTANT,
|
||||
LPT_UNROLL_RUNTIME,
|
||||
LPT_UNROLL_STUPID
|
||||
@ -731,12 +729,11 @@ extern void loop_optimizer_finalize (void);
|
||||
/* Optimization passes. */
|
||||
enum
|
||||
{
|
||||
UAP_PEEL = 1, /* Enables loop peeling. */
|
||||
UAP_UNROLL = 2, /* Enables unrolling of loops if it seems profitable. */
|
||||
UAP_UNROLL_ALL = 4 /* Enables unrolling of all loops. */
|
||||
UAP_UNROLL = 1, /* Enables unrolling of loops if it seems profitable. */
|
||||
UAP_UNROLL_ALL = 2 /* Enables unrolling of all loops. */
|
||||
};
|
||||
|
||||
extern void unroll_and_peel_loops (int);
|
||||
extern void unroll_loops (int);
|
||||
extern void doloop_optimize_loops (void);
|
||||
extern void move_loop_invariants (void);
|
||||
extern void scale_loop_profile (struct loop *loop, int scale, gcov_type iteration_bound);
|
||||
|
@ -357,7 +357,6 @@ pass_loop2::gate (function *fun)
|
||||
if (optimize > 0
|
||||
&& (flag_move_loop_invariants
|
||||
|| flag_unswitch_loops
|
||||
|| flag_peel_loops
|
||||
|| flag_unroll_loops
|
||||
#ifdef HAVE_doloop_end
|
||||
|| (flag_branch_on_count_reg && HAVE_doloop_end)
|
||||
@ -537,7 +536,7 @@ make_pass_rtl_move_loop_invariants (gcc::context *ctxt)
|
||||
|
||||
namespace {
|
||||
|
||||
const pass_data pass_data_rtl_unroll_and_peel_loops =
|
||||
const pass_data pass_data_rtl_unroll_loops =
|
||||
{
|
||||
RTL_PASS, /* type */
|
||||
"loop2_unroll", /* name */
|
||||
@ -550,11 +549,11 @@ const pass_data pass_data_rtl_unroll_and_peel_loops =
|
||||
0, /* todo_flags_finish */
|
||||
};
|
||||
|
||||
class pass_rtl_unroll_and_peel_loops : public rtl_opt_pass
|
||||
class pass_rtl_unroll_loops : public rtl_opt_pass
|
||||
{
|
||||
public:
|
||||
pass_rtl_unroll_and_peel_loops (gcc::context *ctxt)
|
||||
: rtl_opt_pass (pass_data_rtl_unroll_and_peel_loops, ctxt)
|
||||
pass_rtl_unroll_loops (gcc::context *ctxt)
|
||||
: rtl_opt_pass (pass_data_rtl_unroll_loops, ctxt)
|
||||
{}
|
||||
|
||||
/* opt_pass methods: */
|
||||
@ -565,10 +564,10 @@ public:
|
||||
|
||||
virtual unsigned int execute (function *);
|
||||
|
||||
}; // class pass_rtl_unroll_and_peel_loops
|
||||
}; // class pass_rtl_unroll_loops
|
||||
|
||||
unsigned int
|
||||
pass_rtl_unroll_and_peel_loops::execute (function *fun)
|
||||
pass_rtl_unroll_loops::execute (function *fun)
|
||||
{
|
||||
if (number_of_loops (fun) > 1)
|
||||
{
|
||||
@ -576,14 +575,12 @@ pass_rtl_unroll_and_peel_loops::execute (function *fun)
|
||||
if (dump_file)
|
||||
df_dump (dump_file);
|
||||
|
||||
if (flag_peel_loops)
|
||||
flags |= UAP_PEEL;
|
||||
if (flag_unroll_loops)
|
||||
flags |= UAP_UNROLL;
|
||||
if (flag_unroll_all_loops)
|
||||
flags |= UAP_UNROLL_ALL;
|
||||
|
||||
unroll_and_peel_loops (flags);
|
||||
unroll_loops (flags);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -591,9 +588,9 @@ pass_rtl_unroll_and_peel_loops::execute (function *fun)
|
||||
} // anon namespace
|
||||
|
||||
rtl_opt_pass *
|
||||
make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt)
|
||||
make_pass_rtl_unroll_loops (gcc::context *ctxt)
|
||||
{
|
||||
return new pass_rtl_unroll_and_peel_loops (ctxt);
|
||||
return new pass_rtl_unroll_loops (ctxt);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Loop unrolling and peeling.
|
||||
/* Loop unrolling.
|
||||
Copyright (C) 2002-2014 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
@ -34,8 +34,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "target.h"
|
||||
#include "dumpfile.h"
|
||||
|
||||
/* This pass performs loop unrolling and peeling. We only perform these
|
||||
optimizations on innermost loops (with single exception) because
|
||||
/* This pass performs loop unrolling. We only perform this
|
||||
optimization on innermost loops (with single exception) because
|
||||
the impact on performance is greatest here, and we want to avoid
|
||||
unnecessary code size growth. The gain is caused by greater sequentiality
|
||||
of code, better code to optimize for further passes and in some cases
|
||||
@ -44,12 +44,6 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
What we do:
|
||||
|
||||
-- complete peeling of once-rolling loops; this is the above mentioned
|
||||
exception, as this causes loop to be cancelled completely and
|
||||
does not cause code growth
|
||||
-- complete peeling of loops that roll (small) constant times.
|
||||
-- simple peeling of first iterations of loops that do not roll much
|
||||
(according to profile feedback)
|
||||
-- unrolling of loops that roll constant times; this is almost always
|
||||
win, as we get rid of exit condition tests.
|
||||
-- unrolling of loops that roll number of times that we can compute
|
||||
@ -62,7 +56,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
appropriate function below.
|
||||
|
||||
There is a lot of parameters (defined and described in params.def) that
|
||||
control how much we unroll/peel.
|
||||
control how much we unroll.
|
||||
|
||||
??? A great problem is that we don't have a good way how to determine
|
||||
how many times we should unroll the loop; the experiments I have made
|
||||
@ -170,17 +164,11 @@ struct opt_info
|
||||
basic_block loop_preheader; /* The loop preheader basic block. */
|
||||
};
|
||||
|
||||
static void decide_unrolling_and_peeling (int);
|
||||
static void peel_loops_completely (int);
|
||||
static void decide_peel_simple (struct loop *, int);
|
||||
static void decide_peel_once_rolling (struct loop *, int);
|
||||
static void decide_peel_completely (struct loop *, int);
|
||||
static void decide_unroll_stupid (struct loop *, int);
|
||||
static void decide_unroll_constant_iterations (struct loop *, int);
|
||||
static void decide_unroll_runtime_iterations (struct loop *, int);
|
||||
static void peel_loop_simple (struct loop *);
|
||||
static void peel_loop_completely (struct loop *);
|
||||
static void unroll_loop_stupid (struct loop *);
|
||||
static void decide_unrolling (int);
|
||||
static void unroll_loop_constant_iterations (struct loop *);
|
||||
static void unroll_loop_runtime_iterations (struct loop *);
|
||||
static struct opt_info *analyze_insns_in_loop (struct loop *);
|
||||
@ -197,15 +185,13 @@ static void combine_var_copies_in_loop_exit (struct var_to_expand *,
|
||||
basic_block);
|
||||
static rtx get_expansion (struct var_to_expand *);
|
||||
|
||||
/* Emit a message summarizing the unroll or peel that will be
|
||||
/* Emit a message summarizing the unroll that will be
|
||||
performed for LOOP, along with the loop's location LOCUS, if
|
||||
appropriate given the dump or -fopt-info settings. */
|
||||
|
||||
static void
|
||||
report_unroll_peel (struct loop *loop, location_t locus)
|
||||
report_unroll (struct loop *loop, location_t locus)
|
||||
{
|
||||
struct niter_desc *desc;
|
||||
int niters = 0;
|
||||
int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
|
||||
|
||||
if (loop->lpt_decision.decision == LPT_NONE)
|
||||
@ -214,169 +200,20 @@ report_unroll_peel (struct loop *loop, location_t locus)
|
||||
if (!dump_enabled_p ())
|
||||
return;
|
||||
|
||||
/* In the special case where the loop never iterated, emit
|
||||
a different message so that we don't report an unroll by 0.
|
||||
This matches the equivalent message emitted during tree unrolling. */
|
||||
if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY
|
||||
&& !loop->lpt_decision.times)
|
||||
{
|
||||
dump_printf_loc (report_flags, locus,
|
||||
"loop turned into non-loop; it never loops.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
desc = get_simple_loop_desc (loop);
|
||||
|
||||
if (desc->const_iter)
|
||||
niters = desc->niter;
|
||||
else if (loop->header->count)
|
||||
niters = expected_loop_iterations (loop);
|
||||
|
||||
if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
|
||||
dump_printf_loc (report_flags, locus,
|
||||
"loop with %d iterations completely unrolled",
|
||||
loop->lpt_decision.times + 1);
|
||||
else
|
||||
dump_printf_loc (report_flags, locus,
|
||||
"loop %s %d times",
|
||||
(loop->lpt_decision.decision == LPT_PEEL_SIMPLE
|
||||
? "peeled" : "unrolled"),
|
||||
loop->lpt_decision.times);
|
||||
dump_printf_loc (report_flags, locus,
|
||||
"loop unrolled %d times",
|
||||
loop->lpt_decision.times);
|
||||
if (profile_info)
|
||||
dump_printf (report_flags,
|
||||
" (header execution count %d",
|
||||
" (header execution count %d)",
|
||||
(int)loop->header->count);
|
||||
if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
|
||||
dump_printf (report_flags,
|
||||
"%s%s iterations %d)",
|
||||
profile_info ? ", " : " (",
|
||||
desc->const_iter ? "const" : "average",
|
||||
niters);
|
||||
else if (profile_info)
|
||||
dump_printf (report_flags, ")");
|
||||
|
||||
dump_printf (report_flags, "\n");
|
||||
}
|
||||
|
||||
/* Unroll and/or peel (depending on FLAGS) LOOPS. */
|
||||
void
|
||||
unroll_and_peel_loops (int flags)
|
||||
{
|
||||
struct loop *loop;
|
||||
bool changed = false;
|
||||
|
||||
/* First perform complete loop peeling (it is almost surely a win,
|
||||
and affects parameters for further decision a lot). */
|
||||
peel_loops_completely (flags);
|
||||
|
||||
/* Now decide rest of unrolling and peeling. */
|
||||
decide_unrolling_and_peeling (flags);
|
||||
|
||||
/* Scan the loops, inner ones first. */
|
||||
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
|
||||
{
|
||||
/* And perform the appropriate transformations. */
|
||||
switch (loop->lpt_decision.decision)
|
||||
{
|
||||
case LPT_PEEL_COMPLETELY:
|
||||
/* Already done. */
|
||||
gcc_unreachable ();
|
||||
case LPT_PEEL_SIMPLE:
|
||||
peel_loop_simple (loop);
|
||||
changed = true;
|
||||
break;
|
||||
case LPT_UNROLL_CONSTANT:
|
||||
unroll_loop_constant_iterations (loop);
|
||||
changed = true;
|
||||
break;
|
||||
case LPT_UNROLL_RUNTIME:
|
||||
unroll_loop_runtime_iterations (loop);
|
||||
changed = true;
|
||||
break;
|
||||
case LPT_UNROLL_STUPID:
|
||||
unroll_loop_stupid (loop);
|
||||
changed = true;
|
||||
break;
|
||||
case LPT_NONE:
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
if (changed)
|
||||
{
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
fix_loop_structure (NULL);
|
||||
}
|
||||
|
||||
iv_analysis_done ();
|
||||
}
|
||||
|
||||
/* Check whether exit of the LOOP is at the end of loop body. */
|
||||
|
||||
static bool
|
||||
loop_exit_at_end_p (struct loop *loop)
|
||||
{
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
rtx_insn *insn;
|
||||
|
||||
if (desc->in_edge->dest != loop->latch)
|
||||
return false;
|
||||
|
||||
/* Check that the latch is empty. */
|
||||
FOR_BB_INSNS (loop->latch, insn)
|
||||
{
|
||||
if (NONDEBUG_INSN_P (insn))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Depending on FLAGS, check whether to peel loops completely and do so. */
|
||||
/* Decide whether unroll loops and how much. */
|
||||
static void
|
||||
peel_loops_completely (int flags)
|
||||
{
|
||||
struct loop *loop;
|
||||
bool changed = false;
|
||||
|
||||
/* Scan the loops, the inner ones first. */
|
||||
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
|
||||
{
|
||||
loop->lpt_decision.decision = LPT_NONE;
|
||||
location_t locus = get_loop_location (loop);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (TDF_RTL, locus,
|
||||
";; *** Considering loop %d at BB %d for "
|
||||
"complete peeling ***\n",
|
||||
loop->num, loop->header->index);
|
||||
|
||||
loop->ninsns = num_loop_insns (loop);
|
||||
|
||||
decide_peel_once_rolling (loop, flags);
|
||||
if (loop->lpt_decision.decision == LPT_NONE)
|
||||
decide_peel_completely (loop, flags);
|
||||
|
||||
if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
|
||||
{
|
||||
report_unroll_peel (loop, locus);
|
||||
peel_loop_completely (loop);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (changed)
|
||||
{
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
fix_loop_structure (NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide whether unroll or peel loops (depending on FLAGS) and how much. */
|
||||
static void
|
||||
decide_unrolling_and_peeling (int flags)
|
||||
decide_unrolling (int flags)
|
||||
{
|
||||
struct loop *loop;
|
||||
|
||||
@ -389,7 +226,7 @@ decide_unrolling_and_peeling (int flags)
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (TDF_RTL, locus,
|
||||
";; *** Considering loop %d at BB %d for "
|
||||
"unrolling and peeling ***\n",
|
||||
"unrolling ***\n",
|
||||
loop->num, loop->header->index);
|
||||
|
||||
/* Do not peel cold areas. */
|
||||
@ -428,204 +265,77 @@ decide_unrolling_and_peeling (int flags)
|
||||
decide_unroll_runtime_iterations (loop, flags);
|
||||
if (loop->lpt_decision.decision == LPT_NONE)
|
||||
decide_unroll_stupid (loop, flags);
|
||||
if (loop->lpt_decision.decision == LPT_NONE)
|
||||
decide_peel_simple (loop, flags);
|
||||
|
||||
report_unroll_peel (loop, locus);
|
||||
report_unroll (loop, locus);
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide whether the LOOP is once rolling and suitable for complete
|
||||
peeling. */
|
||||
static void
|
||||
decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
|
||||
/* Unroll LOOPS. */
|
||||
void
|
||||
unroll_loops (int flags)
|
||||
{
|
||||
struct niter_desc *desc;
|
||||
struct loop *loop;
|
||||
bool changed = false;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
|
||||
/* Now decide rest of unrolling. */
|
||||
decide_unrolling (flags);
|
||||
|
||||
/* Is the loop small enough? */
|
||||
if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
|
||||
/* Scan the loops, inner ones first. */
|
||||
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Not considering loop, is too big\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check for simple loops. */
|
||||
desc = get_simple_loop_desc (loop);
|
||||
|
||||
/* Check number of iterations. */
|
||||
if (!desc->simple_p
|
||||
|| desc->assumptions
|
||||
|| desc->infinite
|
||||
|| !desc->const_iter
|
||||
|| (desc->niter != 0
|
||||
&& get_max_loop_iterations_int (loop) != 0))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
";; Unable to prove that the loop rolls exactly once\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Success. */
|
||||
loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
|
||||
}
|
||||
|
||||
/* Decide whether the LOOP is suitable for complete peeling. */
|
||||
static void
|
||||
decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
|
||||
{
|
||||
unsigned npeel;
|
||||
struct niter_desc *desc;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "\n;; Considering peeling completely\n");
|
||||
|
||||
/* Skip non-innermost loops. */
|
||||
if (loop->inner)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Not considering loop, is not innermost\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Do not peel cold areas. */
|
||||
if (optimize_loop_for_size_p (loop))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Not considering loop, cold area\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Can the loop be manipulated? */
|
||||
if (!can_duplicate_loop_p (loop))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
";; Not considering loop, cannot duplicate\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* npeel = number of iterations to peel. */
|
||||
npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
|
||||
if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
|
||||
npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
|
||||
|
||||
/* Is the loop small enough? */
|
||||
if (!npeel)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Not considering loop, is too big\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check for simple loops. */
|
||||
desc = get_simple_loop_desc (loop);
|
||||
|
||||
/* Check number of iterations. */
|
||||
if (!desc->simple_p
|
||||
|| desc->assumptions
|
||||
|| !desc->const_iter
|
||||
|| desc->infinite)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
";; Unable to prove that the loop iterates constant times\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (desc->niter > npeel - 1)
|
||||
{
|
||||
if (dump_file)
|
||||
/* And perform the appropriate transformations. */
|
||||
switch (loop->lpt_decision.decision)
|
||||
{
|
||||
fprintf (dump_file,
|
||||
";; Not peeling loop completely, rolls too much (");
|
||||
fprintf (dump_file, "%"PRId64, desc->niter);
|
||||
fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
|
||||
case LPT_UNROLL_CONSTANT:
|
||||
unroll_loop_constant_iterations (loop);
|
||||
changed = true;
|
||||
break;
|
||||
case LPT_UNROLL_RUNTIME:
|
||||
unroll_loop_runtime_iterations (loop);
|
||||
changed = true;
|
||||
break;
|
||||
case LPT_UNROLL_STUPID:
|
||||
unroll_loop_stupid (loop);
|
||||
changed = true;
|
||||
break;
|
||||
case LPT_NONE:
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Success. */
|
||||
loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
|
||||
if (changed)
|
||||
{
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
fix_loop_structure (NULL);
|
||||
}
|
||||
|
||||
iv_analysis_done ();
|
||||
}
|
||||
|
||||
/* Peel all iterations of LOOP, remove exit edges and cancel the loop
|
||||
completely. The transformation done:
|
||||
/* Check whether exit of the LOOP is at the end of loop body. */
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
body;
|
||||
|
||||
==>
|
||||
|
||||
i = 0;
|
||||
body; i++;
|
||||
body; i++;
|
||||
body; i++;
|
||||
body; i++;
|
||||
*/
|
||||
static void
|
||||
peel_loop_completely (struct loop *loop)
|
||||
static bool
|
||||
loop_exit_at_end_p (struct loop *loop)
|
||||
{
|
||||
sbitmap wont_exit;
|
||||
unsigned HOST_WIDE_INT npeel;
|
||||
unsigned i;
|
||||
edge ein;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
struct opt_info *opt_info = NULL;
|
||||
rtx_insn *insn;
|
||||
|
||||
npeel = desc->niter;
|
||||
/* We should never have conditional in latch block. */
|
||||
gcc_assert (desc->in_edge->dest != loop->header);
|
||||
|
||||
if (npeel)
|
||||
if (desc->in_edge->dest != loop->latch)
|
||||
return false;
|
||||
|
||||
/* Check that the latch is empty. */
|
||||
FOR_BB_INSNS (loop->latch, insn)
|
||||
{
|
||||
bool ok;
|
||||
|
||||
wont_exit = sbitmap_alloc (npeel + 1);
|
||||
bitmap_ones (wont_exit);
|
||||
bitmap_clear_bit (wont_exit, 0);
|
||||
if (desc->noloop_assumptions)
|
||||
bitmap_clear_bit (wont_exit, 1);
|
||||
|
||||
auto_vec<edge> remove_edges;
|
||||
if (flag_split_ivs_in_unroller)
|
||||
opt_info = analyze_insns_in_loop (loop);
|
||||
|
||||
opt_info_start_duplication (opt_info);
|
||||
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
npeel,
|
||||
wont_exit, desc->out_edge,
|
||||
&remove_edges,
|
||||
DLTHE_FLAG_UPDATE_FREQ
|
||||
| DLTHE_FLAG_COMPLETTE_PEEL
|
||||
| (opt_info
|
||||
? DLTHE_RECORD_COPY_NUMBER : 0));
|
||||
gcc_assert (ok);
|
||||
|
||||
free (wont_exit);
|
||||
|
||||
if (opt_info)
|
||||
{
|
||||
apply_opt_in_copies (opt_info, npeel, false, true);
|
||||
free_opt_info (opt_info);
|
||||
}
|
||||
|
||||
/* Remove the exit edges. */
|
||||
FOR_EACH_VEC_ELT (remove_edges, i, ein)
|
||||
remove_path (ein);
|
||||
if (INSN_P (insn) && active_insn_p (insn))
|
||||
return false;
|
||||
}
|
||||
|
||||
ein = desc->in_edge;
|
||||
free_simple_loop_desc (loop);
|
||||
|
||||
/* Now remove the unreachable part of the last iteration and cancel
|
||||
the loop. */
|
||||
remove_path (ein);
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Decide whether to unroll LOOP iterating constant number of times
|
||||
@ -1372,160 +1082,6 @@ unroll_loop_runtime_iterations (struct loop *loop)
|
||||
max_unroll, num_loop_insns (loop));
|
||||
}
|
||||
|
||||
/* Decide whether to simply peel LOOP and how much. */
|
||||
static void
|
||||
decide_peel_simple (struct loop *loop, int flags)
|
||||
{
|
||||
unsigned npeel;
|
||||
widest_int iterations;
|
||||
|
||||
if (!(flags & UAP_PEEL))
|
||||
{
|
||||
/* We were not asked to, just return back silently. */
|
||||
return;
|
||||
}
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "\n;; Considering simply peeling loop\n");
|
||||
|
||||
/* npeel = number of iterations to peel. */
|
||||
npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
|
||||
if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
|
||||
npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
|
||||
|
||||
/* Skip big loops. */
|
||||
if (!npeel)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Not considering loop, is too big\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Do not simply peel loops with branches inside -- it increases number
|
||||
of mispredicts.
|
||||
Exception is when we do have profile and we however have good chance
|
||||
to peel proper number of iterations loop will iterate in practice.
|
||||
TODO: this heuristic needs tunning; while for complette unrolling
|
||||
the branch inside loop mostly eliminates any improvements, for
|
||||
peeling it is not the case. Also a function call inside loop is
|
||||
also branch from branch prediction POV (and probably better reason
|
||||
to not unroll/peel). */
|
||||
if (num_loop_branches (loop) > 1
|
||||
&& profile_status_for_fn (cfun) != PROFILE_READ)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Not peeling, contains branches\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* If we have realistic estimate on number of iterations, use it. */
|
||||
if (get_estimated_loop_iterations (loop, &iterations))
|
||||
{
|
||||
if (wi::leu_p (npeel, iterations))
|
||||
{
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, ";; Not peeling loop, rolls too much (");
|
||||
fprintf (dump_file, "%"PRId64,
|
||||
(int64_t) (iterations.to_shwi () + 1));
|
||||
fprintf (dump_file, " iterations > %d [maximum peelings])\n",
|
||||
npeel);
|
||||
}
|
||||
return;
|
||||
}
|
||||
npeel = iterations.to_shwi () + 1;
|
||||
}
|
||||
/* If we have small enough bound on iterations, we can still peel (completely
|
||||
unroll). */
|
||||
else if (get_max_loop_iterations (loop, &iterations)
|
||||
&& wi::ltu_p (iterations, npeel))
|
||||
npeel = iterations.to_shwi () + 1;
|
||||
else
|
||||
{
|
||||
/* For now we have no good heuristics to decide whether loop peeling
|
||||
will be effective, so disable it. */
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
";; Not peeling loop, no evidence it will be profitable\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Success. */
|
||||
loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
|
||||
loop->lpt_decision.times = npeel;
|
||||
}
|
||||
|
||||
/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
|
||||
|
||||
while (cond)
|
||||
body;
|
||||
|
||||
==> (LOOP->LPT_DECISION.TIMES == 3)
|
||||
|
||||
if (!cond) goto end;
|
||||
body;
|
||||
if (!cond) goto end;
|
||||
body;
|
||||
if (!cond) goto end;
|
||||
body;
|
||||
while (cond)
|
||||
body;
|
||||
end: ;
|
||||
*/
|
||||
static void
|
||||
peel_loop_simple (struct loop *loop)
|
||||
{
|
||||
sbitmap wont_exit;
|
||||
unsigned npeel = loop->lpt_decision.times;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
struct opt_info *opt_info = NULL;
|
||||
bool ok;
|
||||
|
||||
if (flag_split_ivs_in_unroller && npeel > 1)
|
||||
opt_info = analyze_insns_in_loop (loop);
|
||||
|
||||
wont_exit = sbitmap_alloc (npeel + 1);
|
||||
bitmap_clear (wont_exit);
|
||||
|
||||
opt_info_start_duplication (opt_info);
|
||||
|
||||
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
npeel, wont_exit, NULL,
|
||||
NULL, DLTHE_FLAG_UPDATE_FREQ
|
||||
| (opt_info
|
||||
? DLTHE_RECORD_COPY_NUMBER
|
||||
: 0));
|
||||
gcc_assert (ok);
|
||||
|
||||
free (wont_exit);
|
||||
|
||||
if (opt_info)
|
||||
{
|
||||
apply_opt_in_copies (opt_info, npeel, false, false);
|
||||
free_opt_info (opt_info);
|
||||
}
|
||||
|
||||
if (desc->simple_p)
|
||||
{
|
||||
if (desc->const_iter)
|
||||
{
|
||||
desc->niter -= npeel;
|
||||
desc->niter_expr = GEN_INT (desc->niter);
|
||||
desc->noloop_assumptions = NULL_RTX;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We cannot just update niter_expr, as its value might be clobbered
|
||||
inside loop. We could handle this by counting the number into
|
||||
temporary just like we do in runtime unrolling, but it does not
|
||||
seem worthwhile. */
|
||||
free_simple_loop_desc (loop);
|
||||
}
|
||||
}
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
|
||||
}
|
||||
|
||||
/* Decide whether to unroll LOOP stupidly and how much. */
|
||||
static void
|
||||
decide_unroll_stupid (struct loop *loop, int flags)
|
||||
|
@ -359,7 +359,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
PUSH_INSERT_PASSES_WITHIN (pass_loop2)
|
||||
NEXT_PASS (pass_rtl_loop_init);
|
||||
NEXT_PASS (pass_rtl_move_loop_invariants);
|
||||
NEXT_PASS (pass_rtl_unroll_and_peel_loops);
|
||||
NEXT_PASS (pass_rtl_unroll_loops);
|
||||
NEXT_PASS (pass_rtl_doloop);
|
||||
NEXT_PASS (pass_rtl_loop_done);
|
||||
TERMINATE_PASS_LIST ()
|
||||
|
@ -1,3 +1,12 @@
|
||||
2014-10-14 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
* gcc.dg/tree-prof/peel-1.c: Update.
|
||||
* gcc.dg/tree-prof/unroll-1.c: Update.
|
||||
* gcc.dg/gcc.dg/unroll_1.c: Update.
|
||||
* gcc.dg/gcc.dg/unroll_2.c: Update.
|
||||
* gcc.dg/gcc.dg/unroll_3.c: Update.
|
||||
* gcc.dg/gcc.dg/unroll_4.c: Update.
|
||||
|
||||
2014-10-14 DJ Delorie <dj@redhat.com>
|
||||
|
||||
* g++.dg/abi/mangle64.C: New.
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* { dg-options "-O3 -fdump-rtl-loop2_unroll -fno-unroll-loops -fpeel-loops" } */
|
||||
/* { dg-options "-O3 -fdump-tree-cunroll-details -fno-unroll-loops -fpeel-loops" } */
|
||||
void abort();
|
||||
|
||||
int a[1000];
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-rtl-loop2_unroll=stderr -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fdisable-tree-cunrolli -fenable-rtl-loop2 -fenable-rtl-loop2_unroll" } */
|
||||
/* { dg-options "-O2 -fdump-tree-cunrolli-details=stderr -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli" } */
|
||||
|
||||
unsigned a[100], b[100];
|
||||
inline void bar()
|
||||
@ -11,7 +11,7 @@ int foo(void)
|
||||
{
|
||||
int i;
|
||||
bar();
|
||||
for (i = 0; i < 2; i++) /* { dg-message "note: loop turned into non-loop; it never loops" } */
|
||||
for (i = 0; i < 2; i++) /* { dg-message "note: loop with 3 iterations completely unrolled" } */
|
||||
{
|
||||
a[i]= b[i] + 1;
|
||||
}
|
||||
@ -21,7 +21,7 @@ int foo(void)
|
||||
int foo2(void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 2; i++) /* { dg-message "note: loop turned into non-loop; it never loops" } */
|
||||
for (i = 0; i < 2; i++) /* { dg-message "note: loop with 3 iterations completely unrolled" } */
|
||||
{
|
||||
a[i]= b[i] + 1;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll=foo -fdisable-tree-cunrolli=foo -fenable-rtl-loop2_unroll" } */
|
||||
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */
|
||||
|
||||
unsigned a[100], b[100];
|
||||
inline void bar()
|
||||
@ -28,5 +28,5 @@ int foo2(void)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-rtl-dump-times "loop turned into non-loop; it never loops" 1 "loop2_unroll" } } */
|
||||
/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
|
||||
/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
|
||||
/* { dg-final { cleanup-tree-dump "cunrolli" } } */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fdisable-tree-cunrolli -fenable-rtl-loop2_unroll=foo" } */
|
||||
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */
|
||||
|
||||
unsigned a[100], b[100];
|
||||
inline void bar()
|
||||
@ -28,5 +28,5 @@ int foo2(void)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-rtl-dump-times "loop turned into non-loop; it never loops" 1 "loop2_unroll" } } */
|
||||
/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
|
||||
/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
|
||||
/* { dg-final { cleanup-tree-dump "cunrolli" } } */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fdisable-tree-cunrolli -fenable-rtl-loop2_unroll=foo2" } */
|
||||
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */
|
||||
|
||||
unsigned a[100], b[100];
|
||||
inline void bar()
|
||||
@ -28,5 +28,5 @@ int foo2(void)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-rtl-dump-times "loop turned into non-loop; it never loops" 1 "loop2_unroll" } } */
|
||||
/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
|
||||
/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
|
||||
/* { dg-final { cleanup-tree-dump "cunrolli" } } */
|
||||
|
@ -504,7 +504,7 @@ extern rtl_opt_pass *make_pass_outof_cfg_layout_mode (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_loop2 (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_rtl_loop_init (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_rtl_move_loop_invariants (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_rtl_unroll_loops (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_rtl_doloop (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_rtl_loop_done (gcc::context *ctxt);
|
||||
|
||||
|
@ -28,9 +28,12 @@ along with GCC; see the file COPYING3. If not see
|
||||
variables. In that case the created optimization possibilities are likely
|
||||
to pay up.
|
||||
|
||||
Additionally in case we detect that it is beneficial to unroll the
|
||||
loop completely, we do it right here to expose the optimization
|
||||
possibilities to the following passes. */
|
||||
We also perform
|
||||
- complette unrolling (or peeling) when the loops is rolling few enough
|
||||
times
|
||||
- simple peeling (i.e. copying few initial iterations prior the loop)
|
||||
when number of iteration estimate is known (typically by the profile
|
||||
info). */
|
||||
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
@ -657,11 +660,12 @@ try_unroll_loop_completely (struct loop *loop,
|
||||
HOST_WIDE_INT maxiter,
|
||||
location_t locus)
|
||||
{
|
||||
unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
|
||||
unsigned HOST_WIDE_INT n_unroll = 0, ninsns, max_unroll, unr_insns;
|
||||
gimple cond;
|
||||
struct loop_size size;
|
||||
bool n_unroll_found = false;
|
||||
edge edge_to_cancel = NULL;
|
||||
int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
|
||||
|
||||
/* See if we proved number of iterations to be low constant.
|
||||
|
||||
@ -821,6 +825,8 @@ try_unroll_loop_completely (struct loop *loop,
|
||||
loop->num);
|
||||
return false;
|
||||
}
|
||||
dump_printf_loc (report_flags, locus,
|
||||
"loop turned into non-loop; it never loops.\n");
|
||||
|
||||
initialize_original_copy_tables ();
|
||||
wont_exit = sbitmap_alloc (n_unroll + 1);
|
||||
@ -902,6 +908,133 @@ try_unroll_loop_completely (struct loop *loop,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return number of instructions after peeling. */
|
||||
static unsigned HOST_WIDE_INT
|
||||
estimated_peeled_sequence_size (struct loop_size *size,
|
||||
unsigned HOST_WIDE_INT npeel)
|
||||
{
|
||||
return MAX (npeel * (HOST_WIDE_INT) (size->overall
|
||||
- size->eliminated_by_peeling), 1);
|
||||
}
|
||||
|
||||
/* If the loop is expected to iterate N times and is
|
||||
small enough, duplicate the loop body N+1 times before
|
||||
the loop itself. This way the hot path will never
|
||||
enter the loop.
|
||||
Parameters are the same as for try_unroll_loops_completely */
|
||||
|
||||
static bool
|
||||
try_peel_loop (struct loop *loop,
|
||||
edge exit, tree niter,
|
||||
HOST_WIDE_INT maxiter)
|
||||
{
|
||||
int npeel;
|
||||
struct loop_size size;
|
||||
int peeled_size;
|
||||
sbitmap wont_exit;
|
||||
unsigned i;
|
||||
vec<edge> to_remove = vNULL;
|
||||
edge e;
|
||||
|
||||
/* If the iteration bound is known and large, then we can safely eliminate
|
||||
the check in peeled copies. */
|
||||
if (TREE_CODE (niter) != INTEGER_CST)
|
||||
exit = NULL;
|
||||
|
||||
if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
|
||||
return false;
|
||||
|
||||
/* Peel only innermost loops. */
|
||||
if (loop->inner)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Not peeling: outer loop\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!optimize_loop_for_speed_p (loop))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Not peeling: cold loop\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check if there is an estimate on the number of iterations. */
|
||||
npeel = estimated_loop_iterations_int (loop);
|
||||
if (npeel < 0)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Not peeling: number of iterations is not "
|
||||
"estimated\n");
|
||||
return false;
|
||||
}
|
||||
if (maxiter >= 0 && maxiter <= npeel)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Not peeling: upper bound is known so can "
|
||||
"unroll complettely\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* We want to peel estimated number of iterations + 1 (so we never
|
||||
enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
|
||||
and be sure to avoid overflows. */
|
||||
if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Not peeling: rolls too much "
|
||||
"(%i + 1 > --param max-peel-times)\n", npeel);
|
||||
return false;
|
||||
}
|
||||
npeel++;
|
||||
|
||||
/* Check peeled loops size. */
|
||||
tree_estimate_loop_size (loop, exit, NULL, &size,
|
||||
PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
|
||||
if ((peeled_size = estimated_peeled_sequence_size (&size, npeel))
|
||||
> PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Not peeling: peeled sequence size is too large "
|
||||
"(%i insns > --param max-peel-insns)", peeled_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Duplicate possibly eliminating the exits. */
|
||||
initialize_original_copy_tables ();
|
||||
wont_exit = sbitmap_alloc (npeel + 1);
|
||||
bitmap_ones (wont_exit);
|
||||
bitmap_clear_bit (wont_exit, 0);
|
||||
if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
npeel, wont_exit,
|
||||
exit, &to_remove,
|
||||
DLTHE_FLAG_UPDATE_FREQ
|
||||
| DLTHE_FLAG_COMPLETTE_PEEL))
|
||||
{
|
||||
free_original_copy_tables ();
|
||||
free (wont_exit);
|
||||
return false;
|
||||
}
|
||||
FOR_EACH_VEC_ELT (to_remove, i, e)
|
||||
{
|
||||
bool ok = remove_path (e);
|
||||
gcc_assert (ok);
|
||||
}
|
||||
free (wont_exit);
|
||||
free_original_copy_tables ();
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Peeled loop %d, %i times.\n",
|
||||
loop->num, npeel);
|
||||
}
|
||||
if (loop->any_upper_bound)
|
||||
loop->nb_iterations_upper_bound -= npeel;
|
||||
loop->nb_iterations_estimate = 0;
|
||||
/* Make sure to mark loop cold so we do not try to peel it more. */
|
||||
scale_loop_profile (loop, 1, 0);
|
||||
loop->header->count = 0;
|
||||
return true;
|
||||
}
|
||||
/* Adds a canonical induction variable to LOOP if suitable.
|
||||
CREATE_IV is true if we may create a new iv. UL determines
|
||||
which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
|
||||
@ -981,6 +1114,9 @@ canonicalize_loop_induction_variables (struct loop *loop,
|
||||
&& exit && just_once_each_iteration_p (loop, exit->src))
|
||||
create_canonical_iv (loop, exit, niter);
|
||||
|
||||
if (ul == UL_ALL)
|
||||
modified |= try_peel_loop (loop, exit, niter, maxiter);
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user