loop-unroll.c: (decide_unrolling_and_peeling): Rename to

* loop-unroll.c: (decide_unrolling_and_peeling): Rename to
	(decide_unrolling): ... this one.
	(peel_loops_completely): Remove.
	(decide_peel_simple): Remove.
	(decide_peel_once_rolling): Remove.
	(decide_peel_completely): Remove.
	(peel_loop_simple): Remove.
	(peel_loop_completely): Remove.
	(unroll_and_peel_loops): Rename to ...
	(unroll_loops): ... this one; handle only unrolling.
	* cfgloop.h (lpt_dec): Remove LPT_PEEL_COMPLETELY and
	LPT_PEEL_SIMPLE.
	(UAP_PEEL): Remove.
	(unroll_and_peel_loops): Remove.
	(unroll_loops): New.
	* passes.def: Replace
	pass_rtl_unroll_and_peel_loops by pass_rtl_unroll_loops.
	* loop-init.c (gate_rtl_unroll_and_peel_loops,
	rtl_unroll_and_peel_loops): Rename to ...
	(gate_rtl_unroll_loops, rtl_unroll_loops): ... these; update.
	(pass_rtl_unroll_and_peel_loops): Rename to ...
	(pass_rtl_unroll_loops): ... this one.
	* tree-pass.h (make_pass_rtl_unroll_and_peel_loops): Remove.
	(make_pass_rtl_unroll_loops): New.
	* tree-ssa-loop-ivcanon.c: (estimated_peeled_sequence_size, try_peel_loop): New.
	(canonicalize_loop_induction_variables): Update.

	* gcc.dg/tree-prof/peel-1.c: Update.
	* gcc.dg/tree-prof/unroll-1.c: Update.
	* gcc.dg/gcc.dg/unroll_1.c: Update.
	* gcc.dg/gcc.dg/unroll_2.c: Update.
	* gcc.dg/gcc.dg/unroll_3.c: Update.
	* gcc.dg/gcc.dg/unroll_4.c: Update.

From-SVN: r216238
This commit is contained in:
Jan Hubicka 2014-10-15 10:02:06 +02:00 committed by Jan Hubicka
parent fa7fa585ea
commit f8934be787
13 changed files with 268 additions and 544 deletions

View File

@ -1,3 +1,32 @@
2014-10-14 Jan Hubicka <hubicka@ucw.cz>
* loop-unroll.c: (decide_unrolling_and_peeling): Rename to
(decide_unrolling): ... this one.
(peel_loops_completely): Remove.
(decide_peel_simple): Remove.
(decide_peel_once_rolling): Remove.
(decide_peel_completely): Remove.
(peel_loop_simple): Remove.
(peel_loop_completely): Remove.
(unroll_and_peel_loops): Rename to ...
(unroll_loops): ... this one; handle only unrolling.
* cfgloop.h (lpt_dec): Remove LPT_PEEL_COMPLETELY and
LPT_PEEL_SIMPLE.
(UAP_PEEL): Remove.
(unroll_and_peel_loops): Remove.
(unroll_loops): New.
* passes.def: Replace
pass_rtl_unroll_and_peel_loops by pass_rtl_unroll_loops.
* loop-init.c (gate_rtl_unroll_and_peel_loops,
rtl_unroll_and_peel_loops): Rename to ...
(gate_rtl_unroll_loops, rtl_unroll_loops): ... these; update.
(pass_rtl_unroll_and_peel_loops): Rename to ...
(pass_rtl_unroll_loops): ... this one.
* tree-pass.h (make_pass_rtl_unroll_and_peel_loops): Remove.
(make_pass_rtl_unroll_loops): New.
* tree-ssa-loop-ivcanon.c: (estimated_peeled_sequence_size, try_peel_loop): New.
(canonicalize_loop_induction_variables): Update.
2014-10-14 Max Filippov <jcmvbkbc@gmail.com>
* config/xtensa/xtensa.h (TARGET_HARD_FLOAT_POSTINC): new macro.

View File

@ -30,8 +30,6 @@ along with GCC; see the file COPYING3. If not see
enum lpt_dec
{
LPT_NONE,
LPT_PEEL_COMPLETELY,
LPT_PEEL_SIMPLE,
LPT_UNROLL_CONSTANT,
LPT_UNROLL_RUNTIME,
LPT_UNROLL_STUPID
@ -731,12 +729,11 @@ extern void loop_optimizer_finalize (void);
/* Optimization passes. */
enum
{
UAP_PEEL = 1, /* Enables loop peeling. */
UAP_UNROLL = 2, /* Enables unrolling of loops if it seems profitable. */
UAP_UNROLL_ALL = 4 /* Enables unrolling of all loops. */
UAP_UNROLL = 1, /* Enables unrolling of loops if it seems profitable. */
UAP_UNROLL_ALL = 2 /* Enables unrolling of all loops. */
};
extern void unroll_and_peel_loops (int);
extern void unroll_loops (int);
extern void doloop_optimize_loops (void);
extern void move_loop_invariants (void);
extern void scale_loop_profile (struct loop *loop, int scale, gcov_type iteration_bound);

View File

@ -357,7 +357,6 @@ pass_loop2::gate (function *fun)
if (optimize > 0
&& (flag_move_loop_invariants
|| flag_unswitch_loops
|| flag_peel_loops
|| flag_unroll_loops
#ifdef HAVE_doloop_end
|| (flag_branch_on_count_reg && HAVE_doloop_end)
@ -537,7 +536,7 @@ make_pass_rtl_move_loop_invariants (gcc::context *ctxt)
namespace {
const pass_data pass_data_rtl_unroll_and_peel_loops =
const pass_data pass_data_rtl_unroll_loops =
{
RTL_PASS, /* type */
"loop2_unroll", /* name */
@ -550,11 +549,11 @@ const pass_data pass_data_rtl_unroll_and_peel_loops =
0, /* todo_flags_finish */
};
class pass_rtl_unroll_and_peel_loops : public rtl_opt_pass
class pass_rtl_unroll_loops : public rtl_opt_pass
{
public:
pass_rtl_unroll_and_peel_loops (gcc::context *ctxt)
: rtl_opt_pass (pass_data_rtl_unroll_and_peel_loops, ctxt)
pass_rtl_unroll_loops (gcc::context *ctxt)
: rtl_opt_pass (pass_data_rtl_unroll_loops, ctxt)
{}
/* opt_pass methods: */
@ -565,10 +564,10 @@ public:
virtual unsigned int execute (function *);
}; // class pass_rtl_unroll_and_peel_loops
}; // class pass_rtl_unroll_loops
unsigned int
pass_rtl_unroll_and_peel_loops::execute (function *fun)
pass_rtl_unroll_loops::execute (function *fun)
{
if (number_of_loops (fun) > 1)
{
@ -576,14 +575,12 @@ pass_rtl_unroll_and_peel_loops::execute (function *fun)
if (dump_file)
df_dump (dump_file);
if (flag_peel_loops)
flags |= UAP_PEEL;
if (flag_unroll_loops)
flags |= UAP_UNROLL;
if (flag_unroll_all_loops)
flags |= UAP_UNROLL_ALL;
unroll_and_peel_loops (flags);
unroll_loops (flags);
}
return 0;
}
@ -591,9 +588,9 @@ pass_rtl_unroll_and_peel_loops::execute (function *fun)
} // anon namespace
rtl_opt_pass *
make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt)
make_pass_rtl_unroll_loops (gcc::context *ctxt)
{
return new pass_rtl_unroll_and_peel_loops (ctxt);
return new pass_rtl_unroll_loops (ctxt);
}

View File

@ -1,4 +1,4 @@
/* Loop unrolling and peeling.
/* Loop unrolling.
Copyright (C) 2002-2014 Free Software Foundation, Inc.
This file is part of GCC.
@ -34,8 +34,8 @@ along with GCC; see the file COPYING3. If not see
#include "target.h"
#include "dumpfile.h"
/* This pass performs loop unrolling and peeling. We only perform these
optimizations on innermost loops (with single exception) because
/* This pass performs loop unrolling. We only perform this
optimization on innermost loops (with single exception) because
the impact on performance is greatest here, and we want to avoid
unnecessary code size growth. The gain is caused by greater sequentiality
of code, better code to optimize for further passes and in some cases
@ -44,12 +44,6 @@ along with GCC; see the file COPYING3. If not see
What we do:
-- complete peeling of once-rolling loops; this is the above mentioned
exception, as this causes loop to be cancelled completely and
does not cause code growth
-- complete peeling of loops that roll (small) constant times.
-- simple peeling of first iterations of loops that do not roll much
(according to profile feedback)
-- unrolling of loops that roll constant times; this is almost always
win, as we get rid of exit condition tests.
-- unrolling of loops that roll number of times that we can compute
@ -62,7 +56,7 @@ along with GCC; see the file COPYING3. If not see
appropriate function below.
There is a lot of parameters (defined and described in params.def) that
control how much we unroll/peel.
control how much we unroll.
??? A great problem is that we don't have a good way how to determine
how many times we should unroll the loop; the experiments I have made
@ -170,17 +164,11 @@ struct opt_info
basic_block loop_preheader; /* The loop preheader basic block. */
};
static void decide_unrolling_and_peeling (int);
static void peel_loops_completely (int);
static void decide_peel_simple (struct loop *, int);
static void decide_peel_once_rolling (struct loop *, int);
static void decide_peel_completely (struct loop *, int);
static void decide_unroll_stupid (struct loop *, int);
static void decide_unroll_constant_iterations (struct loop *, int);
static void decide_unroll_runtime_iterations (struct loop *, int);
static void peel_loop_simple (struct loop *);
static void peel_loop_completely (struct loop *);
static void unroll_loop_stupid (struct loop *);
static void decide_unrolling (int);
static void unroll_loop_constant_iterations (struct loop *);
static void unroll_loop_runtime_iterations (struct loop *);
static struct opt_info *analyze_insns_in_loop (struct loop *);
@ -197,15 +185,13 @@ static void combine_var_copies_in_loop_exit (struct var_to_expand *,
basic_block);
static rtx get_expansion (struct var_to_expand *);
/* Emit a message summarizing the unroll or peel that will be
/* Emit a message summarizing the unroll that will be
performed for LOOP, along with the loop's location LOCUS, if
appropriate given the dump or -fopt-info settings. */
static void
report_unroll_peel (struct loop *loop, location_t locus)
report_unroll (struct loop *loop, location_t locus)
{
struct niter_desc *desc;
int niters = 0;
int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
if (loop->lpt_decision.decision == LPT_NONE)
@ -214,169 +200,20 @@ report_unroll_peel (struct loop *loop, location_t locus)
if (!dump_enabled_p ())
return;
/* In the special case where the loop never iterated, emit
a different message so that we don't report an unroll by 0.
This matches the equivalent message emitted during tree unrolling. */
if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY
&& !loop->lpt_decision.times)
{
dump_printf_loc (report_flags, locus,
"loop turned into non-loop; it never loops.\n");
return;
}
desc = get_simple_loop_desc (loop);
if (desc->const_iter)
niters = desc->niter;
else if (loop->header->count)
niters = expected_loop_iterations (loop);
if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
dump_printf_loc (report_flags, locus,
"loop with %d iterations completely unrolled",
loop->lpt_decision.times + 1);
else
dump_printf_loc (report_flags, locus,
"loop %s %d times",
(loop->lpt_decision.decision == LPT_PEEL_SIMPLE
? "peeled" : "unrolled"),
loop->lpt_decision.times);
dump_printf_loc (report_flags, locus,
"loop unrolled %d times",
loop->lpt_decision.times);
if (profile_info)
dump_printf (report_flags,
" (header execution count %d",
" (header execution count %d)",
(int)loop->header->count);
if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
dump_printf (report_flags,
"%s%s iterations %d)",
profile_info ? ", " : " (",
desc->const_iter ? "const" : "average",
niters);
else if (profile_info)
dump_printf (report_flags, ")");
dump_printf (report_flags, "\n");
}
/* Unroll and/or peel (depending on FLAGS) LOOPS. */
void
unroll_and_peel_loops (int flags)
{
struct loop *loop;
bool changed = false;
/* First perform complete loop peeling (it is almost surely a win,
and affects parameters for further decision a lot). */
peel_loops_completely (flags);
/* Now decide rest of unrolling and peeling. */
decide_unrolling_and_peeling (flags);
/* Scan the loops, inner ones first. */
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
{
/* And perform the appropriate transformations. */
switch (loop->lpt_decision.decision)
{
case LPT_PEEL_COMPLETELY:
/* Already done. */
gcc_unreachable ();
case LPT_PEEL_SIMPLE:
peel_loop_simple (loop);
changed = true;
break;
case LPT_UNROLL_CONSTANT:
unroll_loop_constant_iterations (loop);
changed = true;
break;
case LPT_UNROLL_RUNTIME:
unroll_loop_runtime_iterations (loop);
changed = true;
break;
case LPT_UNROLL_STUPID:
unroll_loop_stupid (loop);
changed = true;
break;
case LPT_NONE:
break;
default:
gcc_unreachable ();
}
}
if (changed)
{
calculate_dominance_info (CDI_DOMINATORS);
fix_loop_structure (NULL);
}
iv_analysis_done ();
}
/* Check whether exit of the LOOP is at the end of loop body. */
static bool
loop_exit_at_end_p (struct loop *loop)
{
struct niter_desc *desc = get_simple_loop_desc (loop);
rtx_insn *insn;
if (desc->in_edge->dest != loop->latch)
return false;
/* Check that the latch is empty. */
FOR_BB_INSNS (loop->latch, insn)
{
if (NONDEBUG_INSN_P (insn))
return false;
}
return true;
}
/* Depending on FLAGS, check whether to peel loops completely and do so. */
/* Decide whether unroll loops and how much. */
static void
peel_loops_completely (int flags)
{
struct loop *loop;
bool changed = false;
/* Scan the loops, the inner ones first. */
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
{
loop->lpt_decision.decision = LPT_NONE;
location_t locus = get_loop_location (loop);
if (dump_enabled_p ())
dump_printf_loc (TDF_RTL, locus,
";; *** Considering loop %d at BB %d for "
"complete peeling ***\n",
loop->num, loop->header->index);
loop->ninsns = num_loop_insns (loop);
decide_peel_once_rolling (loop, flags);
if (loop->lpt_decision.decision == LPT_NONE)
decide_peel_completely (loop, flags);
if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
{
report_unroll_peel (loop, locus);
peel_loop_completely (loop);
changed = true;
}
}
if (changed)
{
calculate_dominance_info (CDI_DOMINATORS);
fix_loop_structure (NULL);
}
}
/* Decide whether unroll or peel loops (depending on FLAGS) and how much. */
static void
decide_unrolling_and_peeling (int flags)
decide_unrolling (int flags)
{
struct loop *loop;
@ -389,7 +226,7 @@ decide_unrolling_and_peeling (int flags)
if (dump_enabled_p ())
dump_printf_loc (TDF_RTL, locus,
";; *** Considering loop %d at BB %d for "
"unrolling and peeling ***\n",
"unrolling ***\n",
loop->num, loop->header->index);
/* Do not peel cold areas. */
@ -428,204 +265,77 @@ decide_unrolling_and_peeling (int flags)
decide_unroll_runtime_iterations (loop, flags);
if (loop->lpt_decision.decision == LPT_NONE)
decide_unroll_stupid (loop, flags);
if (loop->lpt_decision.decision == LPT_NONE)
decide_peel_simple (loop, flags);
report_unroll_peel (loop, locus);
report_unroll (loop, locus);
}
}
/* Decide whether the LOOP is once rolling and suitable for complete
peeling. */
static void
decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
/* Unroll LOOPS. */
void
unroll_loops (int flags)
{
struct niter_desc *desc;
struct loop *loop;
bool changed = false;
if (dump_file)
fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
/* Now decide rest of unrolling. */
decide_unrolling (flags);
/* Is the loop small enough? */
if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
/* Scan the loops, inner ones first. */
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
{
if (dump_file)
fprintf (dump_file, ";; Not considering loop, is too big\n");
return;
}
/* Check for simple loops. */
desc = get_simple_loop_desc (loop);
/* Check number of iterations. */
if (!desc->simple_p
|| desc->assumptions
|| desc->infinite
|| !desc->const_iter
|| (desc->niter != 0
&& get_max_loop_iterations_int (loop) != 0))
{
if (dump_file)
fprintf (dump_file,
";; Unable to prove that the loop rolls exactly once\n");
return;
}
/* Success. */
loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
}
/* Decide whether the LOOP is suitable for complete peeling. */
static void
decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
{
unsigned npeel;
struct niter_desc *desc;
if (dump_file)
fprintf (dump_file, "\n;; Considering peeling completely\n");
/* Skip non-innermost loops. */
if (loop->inner)
{
if (dump_file)
fprintf (dump_file, ";; Not considering loop, is not innermost\n");
return;
}
/* Do not peel cold areas. */
if (optimize_loop_for_size_p (loop))
{
if (dump_file)
fprintf (dump_file, ";; Not considering loop, cold area\n");
return;
}
/* Can the loop be manipulated? */
if (!can_duplicate_loop_p (loop))
{
if (dump_file)
fprintf (dump_file,
";; Not considering loop, cannot duplicate\n");
return;
}
/* npeel = number of iterations to peel. */
npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
/* Is the loop small enough? */
if (!npeel)
{
if (dump_file)
fprintf (dump_file, ";; Not considering loop, is too big\n");
return;
}
/* Check for simple loops. */
desc = get_simple_loop_desc (loop);
/* Check number of iterations. */
if (!desc->simple_p
|| desc->assumptions
|| !desc->const_iter
|| desc->infinite)
{
if (dump_file)
fprintf (dump_file,
";; Unable to prove that the loop iterates constant times\n");
return;
}
if (desc->niter > npeel - 1)
{
if (dump_file)
/* And perform the appropriate transformations. */
switch (loop->lpt_decision.decision)
{
fprintf (dump_file,
";; Not peeling loop completely, rolls too much (");
fprintf (dump_file, "%"PRId64, desc->niter);
fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
case LPT_UNROLL_CONSTANT:
unroll_loop_constant_iterations (loop);
changed = true;
break;
case LPT_UNROLL_RUNTIME:
unroll_loop_runtime_iterations (loop);
changed = true;
break;
case LPT_UNROLL_STUPID:
unroll_loop_stupid (loop);
changed = true;
break;
case LPT_NONE:
break;
default:
gcc_unreachable ();
}
return;
}
/* Success. */
loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
if (changed)
{
calculate_dominance_info (CDI_DOMINATORS);
fix_loop_structure (NULL);
}
iv_analysis_done ();
}
/* Peel all iterations of LOOP, remove exit edges and cancel the loop
completely. The transformation done:
/* Check whether exit of the LOOP is at the end of loop body. */
for (i = 0; i < 4; i++)
body;
==>
i = 0;
body; i++;
body; i++;
body; i++;
body; i++;
*/
static void
peel_loop_completely (struct loop *loop)
static bool
loop_exit_at_end_p (struct loop *loop)
{
sbitmap wont_exit;
unsigned HOST_WIDE_INT npeel;
unsigned i;
edge ein;
struct niter_desc *desc = get_simple_loop_desc (loop);
struct opt_info *opt_info = NULL;
rtx_insn *insn;
npeel = desc->niter;
/* We should never have conditional in latch block. */
gcc_assert (desc->in_edge->dest != loop->header);
if (npeel)
if (desc->in_edge->dest != loop->latch)
return false;
/* Check that the latch is empty. */
FOR_BB_INSNS (loop->latch, insn)
{
bool ok;
wont_exit = sbitmap_alloc (npeel + 1);
bitmap_ones (wont_exit);
bitmap_clear_bit (wont_exit, 0);
if (desc->noloop_assumptions)
bitmap_clear_bit (wont_exit, 1);
auto_vec<edge> remove_edges;
if (flag_split_ivs_in_unroller)
opt_info = analyze_insns_in_loop (loop);
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
npeel,
wont_exit, desc->out_edge,
&remove_edges,
DLTHE_FLAG_UPDATE_FREQ
| DLTHE_FLAG_COMPLETTE_PEEL
| (opt_info
? DLTHE_RECORD_COPY_NUMBER : 0));
gcc_assert (ok);
free (wont_exit);
if (opt_info)
{
apply_opt_in_copies (opt_info, npeel, false, true);
free_opt_info (opt_info);
}
/* Remove the exit edges. */
FOR_EACH_VEC_ELT (remove_edges, i, ein)
remove_path (ein);
if (INSN_P (insn) && active_insn_p (insn))
return false;
}
ein = desc->in_edge;
free_simple_loop_desc (loop);
/* Now remove the unreachable part of the last iteration and cancel
the loop. */
remove_path (ein);
if (dump_file)
fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
return true;
}
/* Decide whether to unroll LOOP iterating constant number of times
@ -1372,160 +1082,6 @@ unroll_loop_runtime_iterations (struct loop *loop)
max_unroll, num_loop_insns (loop));
}
/* Decide whether to simply peel LOOP and how much. */
static void
decide_peel_simple (struct loop *loop, int flags)
{
unsigned npeel;
widest_int iterations;
if (!(flags & UAP_PEEL))
{
/* We were not asked to, just return back silently. */
return;
}
if (dump_file)
fprintf (dump_file, "\n;; Considering simply peeling loop\n");
/* npeel = number of iterations to peel. */
npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
/* Skip big loops. */
if (!npeel)
{
if (dump_file)
fprintf (dump_file, ";; Not considering loop, is too big\n");
return;
}
/* Do not simply peel loops with branches inside -- it increases number
of mispredicts.
Exception is when we do have profile and we however have good chance
to peel proper number of iterations loop will iterate in practice.
TODO: this heuristic needs tunning; while for complette unrolling
the branch inside loop mostly eliminates any improvements, for
peeling it is not the case. Also a function call inside loop is
also branch from branch prediction POV (and probably better reason
to not unroll/peel). */
if (num_loop_branches (loop) > 1
&& profile_status_for_fn (cfun) != PROFILE_READ)
{
if (dump_file)
fprintf (dump_file, ";; Not peeling, contains branches\n");
return;
}
/* If we have realistic estimate on number of iterations, use it. */
if (get_estimated_loop_iterations (loop, &iterations))
{
if (wi::leu_p (npeel, iterations))
{
if (dump_file)
{
fprintf (dump_file, ";; Not peeling loop, rolls too much (");
fprintf (dump_file, "%"PRId64,
(int64_t) (iterations.to_shwi () + 1));
fprintf (dump_file, " iterations > %d [maximum peelings])\n",
npeel);
}
return;
}
npeel = iterations.to_shwi () + 1;
}
/* If we have small enough bound on iterations, we can still peel (completely
unroll). */
else if (get_max_loop_iterations (loop, &iterations)
&& wi::ltu_p (iterations, npeel))
npeel = iterations.to_shwi () + 1;
else
{
/* For now we have no good heuristics to decide whether loop peeling
will be effective, so disable it. */
if (dump_file)
fprintf (dump_file,
";; Not peeling loop, no evidence it will be profitable\n");
return;
}
/* Success. */
loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
loop->lpt_decision.times = npeel;
}
/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
while (cond)
body;
==> (LOOP->LPT_DECISION.TIMES == 3)
if (!cond) goto end;
body;
if (!cond) goto end;
body;
if (!cond) goto end;
body;
while (cond)
body;
end: ;
*/
static void
peel_loop_simple (struct loop *loop)
{
sbitmap wont_exit;
unsigned npeel = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
struct opt_info *opt_info = NULL;
bool ok;
if (flag_split_ivs_in_unroller && npeel > 1)
opt_info = analyze_insns_in_loop (loop);
wont_exit = sbitmap_alloc (npeel + 1);
bitmap_clear (wont_exit);
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
npeel, wont_exit, NULL,
NULL, DLTHE_FLAG_UPDATE_FREQ
| (opt_info
? DLTHE_RECORD_COPY_NUMBER
: 0));
gcc_assert (ok);
free (wont_exit);
if (opt_info)
{
apply_opt_in_copies (opt_info, npeel, false, false);
free_opt_info (opt_info);
}
if (desc->simple_p)
{
if (desc->const_iter)
{
desc->niter -= npeel;
desc->niter_expr = GEN_INT (desc->niter);
desc->noloop_assumptions = NULL_RTX;
}
else
{
/* We cannot just update niter_expr, as its value might be clobbered
inside loop. We could handle this by counting the number into
temporary just like we do in runtime unrolling, but it does not
seem worthwhile. */
free_simple_loop_desc (loop);
}
}
if (dump_file)
fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
}
/* Decide whether to unroll LOOP stupidly and how much. */
static void
decide_unroll_stupid (struct loop *loop, int flags)

View File

@ -359,7 +359,7 @@ along with GCC; see the file COPYING3. If not see
PUSH_INSERT_PASSES_WITHIN (pass_loop2)
NEXT_PASS (pass_rtl_loop_init);
NEXT_PASS (pass_rtl_move_loop_invariants);
NEXT_PASS (pass_rtl_unroll_and_peel_loops);
NEXT_PASS (pass_rtl_unroll_loops);
NEXT_PASS (pass_rtl_doloop);
NEXT_PASS (pass_rtl_loop_done);
TERMINATE_PASS_LIST ()

View File

@ -1,3 +1,12 @@
2014-10-14 Jan Hubicka <hubicka@ucw.cz>
* gcc.dg/tree-prof/peel-1.c: Update.
* gcc.dg/tree-prof/unroll-1.c: Update.
* gcc.dg/gcc.dg/unroll_1.c: Update.
* gcc.dg/gcc.dg/unroll_2.c: Update.
* gcc.dg/gcc.dg/unroll_3.c: Update.
* gcc.dg/gcc.dg/unroll_4.c: Update.
2014-10-14 DJ Delorie <dj@redhat.com>
* g++.dg/abi/mangle64.C: New.

View File

@ -1,4 +1,4 @@
/* { dg-options "-O3 -fdump-rtl-loop2_unroll -fno-unroll-loops -fpeel-loops" } */
/* { dg-options "-O3 -fdump-tree-cunroll-details -fno-unroll-loops -fpeel-loops" } */
void abort();
int a[1000];

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-loop2_unroll=stderr -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fdisable-tree-cunrolli -fenable-rtl-loop2 -fenable-rtl-loop2_unroll" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details=stderr -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli" } */
unsigned a[100], b[100];
inline void bar()
@ -11,7 +11,7 @@ int foo(void)
{
int i;
bar();
for (i = 0; i < 2; i++) /* { dg-message "note: loop turned into non-loop; it never loops" } */
for (i = 0; i < 2; i++) /* { dg-message "note: loop with 3 iterations completely unrolled" } */
{
a[i]= b[i] + 1;
}
@ -21,7 +21,7 @@ int foo(void)
int foo2(void)
{
int i;
for (i = 0; i < 2; i++) /* { dg-message "note: loop turned into non-loop; it never loops" } */
for (i = 0; i < 2; i++) /* { dg-message "note: loop with 3 iterations completely unrolled" } */
{
a[i]= b[i] + 1;
}

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll=foo -fdisable-tree-cunrolli=foo -fenable-rtl-loop2_unroll" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */
unsigned a[100], b[100];
inline void bar()
@ -28,5 +28,5 @@ int foo2(void)
return 1;
}
/* { dg-final { scan-rtl-dump-times "loop turned into non-loop; it never loops" 1 "loop2_unroll" } } */
/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
/* { dg-final { cleanup-tree-dump "cunrolli" } } */

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fdisable-tree-cunrolli -fenable-rtl-loop2_unroll=foo" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */
unsigned a[100], b[100];
inline void bar()
@ -28,5 +28,5 @@ int foo2(void)
return 1;
}
/* { dg-final { scan-rtl-dump-times "loop turned into non-loop; it never loops" 1 "loop2_unroll" } } */
/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
/* { dg-final { cleanup-tree-dump "cunrolli" } } */

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fdisable-tree-cunrolli -fenable-rtl-loop2_unroll=foo2" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */
unsigned a[100], b[100];
inline void bar()
@ -28,5 +28,5 @@ int foo2(void)
return 1;
}
/* { dg-final { scan-rtl-dump-times "loop turned into non-loop; it never loops" 1 "loop2_unroll" } } */
/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
/* { dg-final { cleanup-tree-dump "cunrolli" } } */

View File

@ -504,7 +504,7 @@ extern rtl_opt_pass *make_pass_outof_cfg_layout_mode (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_loop2 (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_loop_init (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_move_loop_invariants (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_unroll_loops (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_doloop (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_loop_done (gcc::context *ctxt);

View File

@ -28,9 +28,12 @@ along with GCC; see the file COPYING3. If not see
variables. In that case the created optimization possibilities are likely
to pay up.
Additionally in case we detect that it is beneficial to unroll the
loop completely, we do it right here to expose the optimization
possibilities to the following passes. */
We also perform
- complette unrolling (or peeling) when the loops is rolling few enough
times
- simple peeling (i.e. copying few initial iterations prior the loop)
when number of iteration estimate is known (typically by the profile
info). */
#include "config.h"
#include "system.h"
@ -657,11 +660,12 @@ try_unroll_loop_completely (struct loop *loop,
HOST_WIDE_INT maxiter,
location_t locus)
{
unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
unsigned HOST_WIDE_INT n_unroll = 0, ninsns, max_unroll, unr_insns;
gimple cond;
struct loop_size size;
bool n_unroll_found = false;
edge edge_to_cancel = NULL;
int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
/* See if we proved number of iterations to be low constant.
@ -821,6 +825,8 @@ try_unroll_loop_completely (struct loop *loop,
loop->num);
return false;
}
dump_printf_loc (report_flags, locus,
"loop turned into non-loop; it never loops.\n");
initialize_original_copy_tables ();
wont_exit = sbitmap_alloc (n_unroll + 1);
@ -902,6 +908,133 @@ try_unroll_loop_completely (struct loop *loop,
return true;
}
/* Return number of instructions after peeling. */
static unsigned HOST_WIDE_INT
estimated_peeled_sequence_size (struct loop_size *size,
unsigned HOST_WIDE_INT npeel)
{
return MAX (npeel * (HOST_WIDE_INT) (size->overall
- size->eliminated_by_peeling), 1);
}
/* If the loop is expected to iterate N times and is
small enough, duplicate the loop body N+1 times before
the loop itself. This way the hot path will never
enter the loop.
Parameters are the same as for try_unroll_loops_completely */
static bool
try_peel_loop (struct loop *loop,
edge exit, tree niter,
HOST_WIDE_INT maxiter)
{
int npeel;
struct loop_size size;
int peeled_size;
sbitmap wont_exit;
unsigned i;
vec<edge> to_remove = vNULL;
edge e;
/* If the iteration bound is known and large, then we can safely eliminate
the check in peeled copies. */
if (TREE_CODE (niter) != INTEGER_CST)
exit = NULL;
if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
return false;
/* Peel only innermost loops. */
if (loop->inner)
{
if (dump_file)
fprintf (dump_file, "Not peeling: outer loop\n");
return false;
}
if (!optimize_loop_for_speed_p (loop))
{
if (dump_file)
fprintf (dump_file, "Not peeling: cold loop\n");
return false;
}
/* Check if there is an estimate on the number of iterations. */
npeel = estimated_loop_iterations_int (loop);
if (npeel < 0)
{
if (dump_file)
fprintf (dump_file, "Not peeling: number of iterations is not "
"estimated\n");
return false;
}
if (maxiter >= 0 && maxiter <= npeel)
{
if (dump_file)
fprintf (dump_file, "Not peeling: upper bound is known so can "
"unroll complettely\n");
return false;
}
/* We want to peel estimated number of iterations + 1 (so we never
enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
and be sure to avoid overflows. */
if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
{
if (dump_file)
fprintf (dump_file, "Not peeling: rolls too much "
"(%i + 1 > --param max-peel-times)\n", npeel);
return false;
}
npeel++;
/* Check peeled loops size. */
tree_estimate_loop_size (loop, exit, NULL, &size,
PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
if ((peeled_size = estimated_peeled_sequence_size (&size, npeel))
> PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
{
if (dump_file)
fprintf (dump_file, "Not peeling: peeled sequence size is too large "
"(%i insns > --param max-peel-insns)", peeled_size);
return false;
}
/* Duplicate possibly eliminating the exits. */
initialize_original_copy_tables ();
wont_exit = sbitmap_alloc (npeel + 1);
bitmap_ones (wont_exit);
bitmap_clear_bit (wont_exit, 0);
if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
npeel, wont_exit,
exit, &to_remove,
DLTHE_FLAG_UPDATE_FREQ
| DLTHE_FLAG_COMPLETTE_PEEL))
{
free_original_copy_tables ();
free (wont_exit);
return false;
}
FOR_EACH_VEC_ELT (to_remove, i, e)
{
bool ok = remove_path (e);
gcc_assert (ok);
}
free (wont_exit);
free_original_copy_tables ();
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Peeled loop %d, %i times.\n",
loop->num, npeel);
}
if (loop->any_upper_bound)
loop->nb_iterations_upper_bound -= npeel;
loop->nb_iterations_estimate = 0;
/* Make sure to mark loop cold so we do not try to peel it more. */
scale_loop_profile (loop, 1, 0);
loop->header->count = 0;
return true;
}
/* Adds a canonical induction variable to LOOP if suitable.
CREATE_IV is true if we may create a new iv. UL determines
which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
@ -981,6 +1114,9 @@ canonicalize_loop_induction_variables (struct loop *loop,
&& exit && just_once_each_iteration_p (loop, exit->src))
create_canonical_iv (loop, exit, niter);
if (ul == UL_ALL)
modified |= try_peel_loop (loop, exit, niter, maxiter);
return modified;
}