loop-iv.c: New file.

* loop-iv.c: New file.
	* Makefile.in (loop-iv.o): New.
	* basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros.
	* cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order,
	num_loop_branches): New functions.
	* cfgloop.h (get_loop_body_in_dom_order, num_loop_branches,
	iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value,
	find_simple_exit, iv_number_of_iterations, iv_analysis_done,
	get_simple_loop_desc, free_simple_loop_desc): Declare.
	(simple_loop_desc): New inline function.
	(struct rtx_iv, struct niter_desc): New.
	* cfgloopmanip.c (loopify): Specify semantics more precisely.
	* expr.c (force_operand): Handle subregs of expressions created by
	loop unroller.
	* loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move
	parts of the initialization to toplev.c
	* loop-unroll.c (loop_exit_at_end_p): New.
	(unroll_and_peel_loops): Call iv_analysis_done.
	(decide_peel_once_rolling, decide_peel_completely,
	decide_unroll_stupid, decide_unroll_constant_iterations,
	decide_unroll_runtime_iterations, decide_peel_simple,
	peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations,
	unroll_loop_runtime_iterations): Use new simple loop analysis.
	* loop-unswitch.c (compare_and_jump_seq): New.
	(may_unswitch_on_p): Renamed to ...
	(may_unswitch_on): Use new iv analysis.
	(reversed_condition): Export.
	(unswitch_single_loop, unswitch_loop): Use new iv analysis.
	* predict.c (estimate_probability): Use new simple loop analysis.
	* rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq,
	canon_condition, simplify_using_condition): Declare.
	* stor-layout.c (get_mode_bounds): New.
	* toplev.c (rest_of_handle_loop2): Some parts of
	initialization/finalization moved here from loop-init.c.

From-SVN: r77951
This commit is contained in:
Zdenek Dvorak 2004-02-17 17:41:44 +01:00 committed by Zdenek Dvorak
parent cc7ce44e4c
commit 50654f6c03
15 changed files with 3174 additions and 224 deletions

View File

@ -1,3 +1,40 @@
2004-02-17 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
* loop-iv.c: New file.
* Makefile.in (loop-iv.o): New.
* basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros.
* cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order,
num_loop_branches): New functions.
* cfgloop.h (get_loop_body_in_dom_order, num_loop_branches,
iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value,
find_simple_exit, iv_number_of_iterations, iv_analysis_done,
get_simple_loop_desc, free_simple_loop_desc): Declare.
(simple_loop_desc): New inline function.
(struct rtx_iv, struct niter_desc): New.
* cfgloopmanip.c (loopify): Specify semantics more precisely.
* expr.c (force_operand): Handle subregs of expressions created by
loop unroller.
* loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move
parts of the initialization to toplev.c
* loop-unroll.c (loop_exit_at_end_p): New.
(unroll_and_peel_loops): Call iv_analysis_done.
(decide_peel_once_rolling, decide_peel_completely,
decide_unroll_stupid, decide_unroll_constant_iterations,
decide_unroll_runtime_iterations, decide_peel_simple,
peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations,
unroll_loop_runtime_iterations): Use new simple loop analysis.
* loop-unswitch.c (compare_and_jump_seq): New.
(may_unswitch_on_p): Renamed to ...
(may_unswitch_on): Use new iv analysis.
(reversed_condition): Export.
(unswitch_single_loop, unswitch_loop): Use new iv analysis.
* predict.c (estimate_probability): Use new simple loop analysis.
* rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq,
canon_condition, simplify_using_condition): Declare.
* stor-layout.c (get_mode_bounds): New.
* toplev.c (rest_of_handle_loop2): Some parts of
initialization/finalization moved here from loop-init.c.
2004-02-17 Kazu Hirata <kazu@cs.umass.edu>
* config/h8300/h8300.h (FIXED_REGISTERS): Add the soft frame

View File

@ -848,7 +848,7 @@ OBJS-common = \
cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \
cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o \
dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o \
dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o \
dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \
expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \
genrtl.o ggc-common.o global.o graph.o gtype-desc.o \
haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o \
@ -1719,6 +1719,8 @@ cfgloop.o : cfgloop.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) coretypes.h $(TM_H) \
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h flags.h
cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
loop-iv.o : loop-iv.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h output.h coretypes.h $(TM_H)
loop-init.o : loop-init.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \

View File

@ -288,6 +288,17 @@ extern varray_type basic_block_info;
#define FOR_EACH_BB_REVERSE(BB) \
FOR_BB_BETWEEN (BB, EXIT_BLOCK_PTR->prev_bb, ENTRY_BLOCK_PTR, prev_bb)
/* For iterating over insns in basic block. */
#define FOR_BB_INSNS(BB, INSN) \
for ((INSN) = BB_HEAD (BB); \
(INSN) != NEXT_INSN (BB_END (BB)); \
(INSN) = NEXT_INSN (INSN))
#define FOR_BB_INSNS_REVERSE(BB, INSN) \
for ((INSN) = BB_END (BB); \
(INSN) != PREV_INSN (BB_HEAD (BB)); \
(INSN) = PREV_INSN (INSN))
/* Cycles through _all_ basic blocks, even the fake ones (entry and
exit block). */

View File

@ -959,6 +959,62 @@ get_loop_body (const struct loop *loop)
return tovisit;
}
/* Fills dominance descendants inside LOOP of the basic block BB into
array TOVISIT from index *TV. */
static void
fill_sons_in_loop (const struct loop *loop, basic_block bb,
basic_block *tovisit, int *tv)
{
basic_block son, postpone = NULL;
tovisit[(*tv)++] = bb;
for (son = first_dom_son (CDI_DOMINATORS, bb);
son;
son = next_dom_son (CDI_DOMINATORS, son))
{
if (!flow_bb_inside_loop_p (loop, son))
continue;
if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
{
postpone = son;
continue;
}
fill_sons_in_loop (loop, son, tovisit, tv);
}
if (postpone)
fill_sons_in_loop (loop, postpone, tovisit, tv);
}
/* Gets body of a LOOP (that must be different from the outermost loop)
sorted by dominance relation. Additionally, if a basic block s dominates
the latch, then only blocks dominated by s are be after it. */
basic_block *
get_loop_body_in_dom_order (const struct loop *loop)
{
basic_block *tovisit;
int tv;
if (!loop->num_nodes)
abort ();
tovisit = xcalloc (loop->num_nodes, sizeof (basic_block));
if (loop->latch == EXIT_BLOCK_PTR)
abort ();
tv = 0;
fill_sons_in_loop (loop, loop->header, tovisit, &tv);
if (tv != (int) loop->num_nodes)
abort ();
return tovisit;
}
/* Gets exit edges of a LOOP, returning their number in N_EDGES. */
edge *
get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges)
@ -988,6 +1044,27 @@ get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges)
return edges;
}
/* Counts the number of conditional branches inside LOOP. */
unsigned
num_loop_branches (const struct loop *loop)
{
unsigned i, n;
basic_block * body;
if (loop->latch == EXIT_BLOCK_PTR)
abort ();
body = get_loop_body (loop);
n = 0;
for (i = 0; i < loop->num_nodes; i++)
if (body[i]->succ && body[i]->succ->succ_next)
n++;
free (body);
return n;
}
/* Adds basic block BB to LOOP. */
void
add_bb_to_loop (basic_block bb, struct loop *loop)

View File

@ -278,7 +278,9 @@ extern int average_num_loop_insns (struct loop *);
/* Loops & cfg manipulation. */
extern basic_block *get_loop_body (const struct loop *);
extern basic_block *get_loop_body_in_dom_order (const struct loop *);
extern edge *get_loop_exit_edges (const struct loop *, unsigned *);
extern unsigned num_loop_branches (const struct loop *);
extern edge loop_preheader_edge (const struct loop *);
extern edge loop_latch_edge (const struct loop *);
@ -322,6 +324,114 @@ extern void unloop (struct loops *, struct loop *);
extern bool remove_path (struct loops *, edge);
extern edge split_loop_bb (basic_block, rtx);
/* Induction variable analysis. */
/* The description of induction variable. The things are a bit complicated
due to need to handle subregs and extends. The value of the object described
by it can be obtained as follows (all computations are done in extend_mode):
Value in i-th iteration is
delta + mult * extend_{extend_mode} (subreg_{mode} (base + i * step)).
If first_special is true, the value in the first iteration is
delta + mult * base
If extend = NIL, first_special must be false, delta 0, mult 1 and value is
subreg_{mode} (base + i * step)
The get_iv_value function can be used to obtain these expressions.
??? Add a third mode field that would specify the mode in that inner
computation is done, which would enable it to be different from the
outer one? */
struct rtx_iv
{
/* Its base and step (mode of base and step is supposed to be extend_mode,
see the description above). */
rtx base, step;
/* The type of extend applied to it (SIGN_EXTEND, ZERO_EXTEND or NIL). */
enum rtx_code extend;
/* Operations applied in the extended mode. */
rtx delta, mult;
/* The mode it is extended to. */
enum machine_mode extend_mode;
/* The mode the variable iterates in. */
enum machine_mode mode;
/* Whether we have already filled the remaining fields. */
unsigned analysed : 1;
/* Whether the first iteration needs to be handled specially. */
unsigned first_special : 1;
};
/* This should replace struct loop_desc. We keep this just so that we are
able to compare the results. */
struct niter_desc
{
/* The edge out of the loop. */
edge out_edge;
/* The other edge leading from the condition. */
edge in_edge;
/* True if we are able to say anything about number of iterations of the
loop. */
bool simple_p;
/* True if the loop iterates the constant number of times. */
bool const_iter;
/* Number of iterations if constant. */
unsigned HOST_WIDEST_INT niter;
/* Upper bound on the number of iterations. */
unsigned HOST_WIDEST_INT niter_max;
/* Assumptions under that the rest of the information is valid. */
rtx assumptions;
/* Assumptions under that the loop ends before reaching the latch,
even if value of niter_expr says otherwise. */
rtx noloop_assumptions;
/* Condition under that the loop is infinite. */
rtx infinite;
/* Whether the comparison is signed. */
bool signed_p;
/* The mode in that niter_expr should be computed. */
enum machine_mode mode;
/* The number of iterations of the loop. */
rtx niter_expr;
};
extern void iv_analysis_loop_init (struct loop *);
extern rtx iv_get_reaching_def (rtx, rtx);
extern bool iv_analyse (rtx, rtx, struct rtx_iv *);
extern rtx get_iv_value (struct rtx_iv *, rtx);
extern void find_simple_exit (struct loop *, struct niter_desc *);
extern void iv_number_of_iterations (struct loop *, rtx, rtx,
struct niter_desc *);
extern void iv_analysis_done (void);
extern struct niter_desc *get_simple_loop_desc (struct loop *loop);
extern void free_simple_loop_desc (struct loop *loop);
static inline struct niter_desc *
simple_loop_desc (struct loop *loop)
{
return loop->aux;
}
/* Loop optimizer initialization. */
extern struct loops *loop_optimizer_init (FILE *);
extern void loop_optimizer_finalize (struct loops *, FILE *);

View File

@ -480,11 +480,13 @@ scale_loop_frequencies (struct loop *loop, int num, int den)
accordingly. Everything between them plus LATCH_EDGE destination must
be dominated by HEADER_EDGE destination, and back-reachable from
LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
SWITCH_BB->succ to original destination of LATCH_EDGE and
SWITCH_BB->succ->succ_next to original destination of HEADER_EDGE.
FALLTHRU_EDGE (SWITCH_BB) to original destination of HEADER_EDGE and
BRANCH_EDGE (SWITCH_BB) to original destination of LATCH_EDGE.
Returns newly created loop. */
struct loop *
loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block switch_bb)
loopify (struct loops *loops, edge latch_edge, edge header_edge,
basic_block switch_bb)
{
basic_block succ_bb = latch_edge->dest;
basic_block pred_bb = header_edge->src;
@ -509,13 +511,15 @@ loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block swi
/* Redirect edges. */
loop_redirect_edge (latch_edge, loop->header);
loop_redirect_edge (BRANCH_EDGE (switch_bb), succ_bb);
loop_redirect_edge (header_edge, switch_bb);
loop_redirect_edge (switch_bb->succ->succ_next, loop->header);
loop_redirect_edge (switch_bb->succ, succ_bb);
loop_redirect_edge (FALLTHRU_EDGE (switch_bb), loop->header);
/* Update dominators. */
set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
/* Compute new loop. */

View File

@ -5588,6 +5588,20 @@ force_operand (rtx value, rtx target)
rtx subtarget = get_subtarget (target);
enum rtx_code code = GET_CODE (value);
/* Check for subreg applied to an expression produced by loop optimizer. */
if (code == SUBREG
&& GET_CODE (SUBREG_REG (value)) != REG
&& GET_CODE (SUBREG_REG (value)) != MEM)
{
value = simplify_gen_subreg (GET_MODE (value),
force_reg (GET_MODE (SUBREG_REG (value)),
force_operand (SUBREG_REG (value),
NULL_RTX)),
GET_MODE (SUBREG_REG (value)),
SUBREG_BYTE (value));
code = GET_CODE (value);
}
/* Check for a PIC address load. */
if ((code == PLUS || code == MINUS)
&& XEXP (value, 0) == pic_offset_table_rtx

View File

@ -36,9 +36,6 @@ loop_optimizer_init (FILE *dumpfile)
struct loops *loops = xcalloc (1, sizeof (struct loops));
edge e;
/* Initialize structures for layout changes. */
cfg_layout_initialize ();
/* Avoid annoying special cases of edges going to exit
block. */
for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
@ -49,18 +46,11 @@ loop_optimizer_init (FILE *dumpfile)
if (flow_loops_find (loops, LOOP_TREE) <= 1)
{
basic_block bb;
/* No loops. */
flow_loops_free (loops);
free_dominance_info (CDI_DOMINATORS);
free (loops);
/* Make chain. */
FOR_EACH_BB (bb)
if (bb->next_bb != EXIT_BLOCK_PTR)
bb->rbi->next = bb->next_bb;
cfg_layout_finalize ();
return NULL;
}
@ -94,13 +84,14 @@ loop_optimizer_init (FILE *dumpfile)
void
loop_optimizer_finalize (struct loops *loops, FILE *dumpfile)
{
basic_block bb;
unsigned i;
/* Finalize layout changes. */
/* Make chain. */
FOR_EACH_BB (bb)
if (bb->next_bb != EXIT_BLOCK_PTR)
bb->rbi->next = bb->next_bb;
if (!loops)
return;
for (i = 1; i < loops->num; i++)
if (loops->parray[i])
free_simple_loop_desc (loops->parray[i]);
/* Another dump. */
flow_loops_dump (loops, dumpfile, NULL, 1);
@ -110,9 +101,6 @@ loop_optimizer_finalize (struct loops *loops, FILE *dumpfile)
free_dominance_info (CDI_DOMINATORS);
free (loops);
/* Finalize changes. */
cfg_layout_finalize ();
/* Checking. */
#ifdef ENABLE_CHECKING
verify_flow_info ();

2465
gcc/loop-iv.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -85,7 +85,7 @@ void
unroll_and_peel_loops (struct loops *loops, int flags)
{
struct loop *loop, *next;
int check;
bool check;
/* First perform complete loop peeling (it is almost surely a win,
and affects parameters for further decision a lot). */
@ -110,7 +110,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
else
next = loop->outer;
check = 1;
check = true;
/* And perform the appropriate transformations. */
switch (loop->lpt_decision.decision)
{
@ -130,7 +130,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
unroll_loop_stupid (loops, loop);
break;
case LPT_NONE:
check = 0;
check = false;
break;
default:
abort ();
@ -144,6 +144,29 @@ unroll_and_peel_loops (struct loops *loops, int flags)
}
loop = next;
}
iv_analysis_done ();
}
/* Check whether exit of the LOOP is at the end of loop body. */
static bool
loop_exit_at_end_p (struct loop *loop)
{
struct niter_desc *desc = get_simple_loop_desc (loop);
rtx insn;
if (desc->in_edge->dest != loop->latch)
return false;
/* Check that the latch is empty. */
FOR_BB_INSNS (loop->latch, insn)
{
if (INSN_P (insn))
return false;
}
return true;
}
/* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
@ -168,10 +191,9 @@ peel_loops_completely (struct loops *loops, int flags)
next = loop->outer;
loop->lpt_decision.decision = LPT_NONE;
loop->has_desc = 0;
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n",
fprintf (rtl_dump_file, "\n;; *** Considering loop %d for complete peeling ***\n",
loop->num);
loop->ninsns = num_loop_insns (loop);
@ -216,7 +238,7 @@ decide_unrolling_and_peeling (struct loops *loops, int flags)
loop->lpt_decision.decision = LPT_NONE;
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num);
fprintf (rtl_dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
/* Do not peel cold areas. */
if (!maybe_hot_bb_p (loop->header))
@ -269,8 +291,10 @@ decide_unrolling_and_peeling (struct loops *loops, int flags)
static void
decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
{
struct niter_desc *desc;
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n");
fprintf (rtl_dump_file, "\n;; Considering peeling once rolling loop\n");
/* Is the loop small enough? */
if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
@ -281,11 +305,13 @@ decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
}
/* Check for simple loops. */
loop->simple = simple_loop_p (loop, &loop->desc);
loop->has_desc = 1;
desc = get_simple_loop_desc (loop);
/* Check number of iterations. */
if (!loop->simple || !loop->desc.const_iter || loop->desc.niter != 0)
if (!desc->simple_p
|| desc->assumptions
|| !desc->const_iter
|| desc->niter != 0)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
@ -303,9 +329,10 @@ static void
decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
{
unsigned npeel;
struct niter_desc *desc;
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Considering peeling completely\n");
fprintf (rtl_dump_file, "\n;; Considering peeling completely\n");
/* Skip non-innermost loops. */
if (loop->inner)
@ -346,26 +373,24 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
}
/* Check for simple loops. */
if (!loop->has_desc)
{
loop->simple = simple_loop_p (loop, &loop->desc);
loop->has_desc = 1;
}
desc = get_simple_loop_desc (loop);
/* Check number of iterations. */
if (!loop->simple || !loop->desc.const_iter)
if (!desc->simple_p
|| desc->assumptions
|| !desc->const_iter)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
return;
}
if (loop->desc.niter > npeel - 1)
if (desc->niter > npeel - 1)
{
if (rtl_dump_file)
{
fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter);
fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
}
return;
@ -397,8 +422,8 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
sbitmap wont_exit;
unsigned HOST_WIDE_INT npeel;
unsigned n_remove_edges, i;
edge *remove_edges;
struct loop_desc *desc = &loop->desc;
edge *remove_edges, ei;
struct niter_desc *desc = get_simple_loop_desc (loop);
npeel = desc->niter;
@ -407,7 +432,7 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
wont_exit = sbitmap_alloc (npeel + 1);
sbitmap_ones (wont_exit);
RESET_BIT (wont_exit, 0);
if (desc->may_be_zero)
if (desc->noloop_assumptions)
RESET_BIT (wont_exit, 1);
remove_edges = xcalloc (npeel, sizeof (edge));
@ -427,19 +452,24 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
free (remove_edges);
}
ei = desc->in_edge;
free_simple_loop_desc (loop);
/* Now remove the unreachable part of the last iteration and cancel
the loop. */
remove_path (loops, desc->in_edge);
remove_path (loops, ei);
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
}
/* Decide whether to unroll LOOP iterating constant number of times and how much. */
static void
decide_unroll_constant_iterations (struct loop *loop, int flags)
{
unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i;
unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
struct niter_desc *desc;
if (!(flags & UAP_UNROLL))
{
@ -448,7 +478,8 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
}
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n");
fprintf (rtl_dump_file,
"\n;; Considering unrolling loop with constant number of iterations\n");
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
@ -468,14 +499,10 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
}
/* Check for simple loops. */
if (!loop->has_desc)
{
loop->simple = simple_loop_p (loop, &loop->desc);
loop->has_desc = 1;
}
desc = get_simple_loop_desc (loop);
/* Check number of iterations. */
if (!loop->simple || !loop->desc.const_iter)
if (!desc->simple_p || !desc->const_iter || desc->assumptions)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
@ -483,7 +510,7 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
}
/* Check whether the loop rolls enough to consider. */
if (loop->desc.niter < 2 * nunroll)
if (desc->niter < 2 * nunroll)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
@ -497,16 +524,17 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
best_copies = 2 * nunroll + 10;
i = 2 * nunroll + 2;
if ((unsigned) i - 1 >= loop->desc.niter)
i = loop->desc.niter - 2;
if (i - 1 >= desc->niter)
i = desc->niter - 2;
for (; i >= nunroll - 1; i--)
{
unsigned exit_mod = loop->desc.niter % (i + 1);
unsigned exit_mod = desc->niter % (i + 1);
if (loop->desc.postincr)
if (!loop_exit_at_end_p (loop))
n_copies = exit_mod + i + 1;
else if (exit_mod != (unsigned) i || loop->desc.may_be_zero)
else if (exit_mod != (unsigned) i
|| desc->noloop_assumptions != NULL_RTX)
n_copies = exit_mod + i + 2;
else
n_copies = i + 1;
@ -524,6 +552,11 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
loop->lpt_decision.times = best_unroll;
if (rtl_dump_file)
fprintf (rtl_dump_file,
";; Decided to unroll the constant times rolling loop, %d times.\n",
loop->lpt_decision.times);
}
/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
@ -554,11 +587,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
unsigned n_remove_edges, i;
edge *remove_edges;
unsigned max_unroll = loop->lpt_decision.times;
struct loop_desc *desc = &loop->desc;
struct niter_desc *desc = get_simple_loop_desc (loop);
bool exit_at_end = loop_exit_at_end_p (loop);
niter = desc->niter;
if (niter <= (unsigned) max_unroll + 1)
if (niter <= max_unroll + 1)
abort (); /* Should not get here (such loop should be peeled instead). */
exit_mod = niter % (max_unroll + 1);
@ -569,9 +603,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
n_remove_edges = 0;
if (desc->postincr)
if (!exit_at_end)
{
/* Counter is incremented after the exit test; leave exit test
/* The exit is not at the end of the loop; leave exit test
in the first copy, so that the loops that start with test
of exit condition have continuous body after unrolling. */
@ -580,15 +614,22 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
/* Peel exit_mod iterations. */
RESET_BIT (wont_exit, 0);
if (desc->may_be_zero)
if (desc->noloop_assumptions)
RESET_BIT (wont_exit, 1);
if (exit_mod
&& !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
loops, exit_mod,
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
if (exit_mod)
{
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
loops, exit_mod,
wont_exit, desc->out_edge,
remove_edges, &n_remove_edges,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
desc->noloop_assumptions = NULL_RTX;
desc->niter -= exit_mod;
desc->niter_max -= exit_mod;
}
SET_BIT (wont_exit, 1);
}
@ -602,12 +643,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
/* We know that niter >= max_unroll + 2; so we do not need to care of
case when we would exit before reaching the loop. So just peel
exit_mod + 1 iterations.
*/
if (exit_mod != (unsigned) max_unroll || desc->may_be_zero)
exit_mod + 1 iterations. */
if (exit_mod != max_unroll
|| desc->noloop_assumptions)
{
RESET_BIT (wont_exit, 0);
if (desc->may_be_zero)
if (desc->noloop_assumptions)
RESET_BIT (wont_exit, 1);
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
@ -616,6 +657,10 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
DLTHE_FLAG_UPDATE_FREQ))
abort ();
desc->niter -= exit_mod + 1;
desc->niter_max -= exit_mod + 1;
desc->noloop_assumptions = NULL_RTX;
SET_BIT (wont_exit, 0);
SET_BIT (wont_exit, 1);
}
@ -632,6 +677,27 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
free (wont_exit);
if (exit_at_end)
{
basic_block exit_block = desc->in_edge->src->rbi->copy;
/* Find a new in and out edge; they are in the last copy we have made. */
if (exit_block->succ->dest == desc->out_edge->dest)
{
desc->out_edge = exit_block->succ;
desc->in_edge = exit_block->succ->succ_next;
}
else
{
desc->out_edge = exit_block->succ->succ_next;
desc->in_edge = exit_block->succ;
}
}
desc->niter /= max_unroll + 1;
desc->niter_max /= max_unroll + 1;
desc->niter_expr = GEN_INT (desc->niter);
/* Remove the edges. */
for (i = 0; i < n_remove_edges; i++)
remove_path (loops, remove_edges[i]);
@ -647,6 +713,7 @@ static void
decide_unroll_runtime_iterations (struct loop *loop, int flags)
{
unsigned nunroll, nunroll_by_av, i;
struct niter_desc *desc;
if (!(flags & UAP_UNROLL))
{
@ -655,7 +722,8 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
}
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n");
fprintf (rtl_dump_file,
"\n;; Considering unrolling loop with runtime computable number of iterations\n");
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
@ -675,21 +743,18 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
}
/* Check for simple loops. */
if (!loop->has_desc)
{
loop->simple = simple_loop_p (loop, &loop->desc);
loop->has_desc = 1;
}
desc = get_simple_loop_desc (loop);
/* Check simpleness. */
if (!loop->simple)
if (!desc->simple_p || desc->assumptions)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n");
fprintf (rtl_dump_file,
";; Unable to prove that the number of iterations can be counted in runtime\n");
return;
}
if (loop->desc.const_iter)
if (desc->const_iter)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
@ -706,10 +771,16 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
/* Success; now force nunroll to be power of 2, as we are unable to
cope with overflows in computation of number of iterations. */
for (i = 1; 2 * i <= nunroll; i *= 2);
for (i = 1; 2 * i <= nunroll; i *= 2)
continue;
loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
loop->lpt_decision.times = i - 1;
if (rtl_dump_file)
fprintf (rtl_dump_file,
";; Decided to unroll the runtime computable times rolling loop, %d times.\n",
loop->lpt_decision.times);
}
/* Unroll LOOP for that we are able to count number of iterations in runtime
@ -746,7 +817,7 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
static void
unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
{
rtx niter, init_code, branch_code, jump, label;
rtx old_niter, niter, init_code, branch_code, tmp;
unsigned i, j, p;
basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
unsigned n_dom_bbs;
@ -756,7 +827,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
edge *remove_edges, e;
bool extra_zero_check, last_may_exit;
unsigned max_unroll = loop->lpt_decision.times;
struct loop_desc *desc = &loop->desc;
struct niter_desc *desc = get_simple_loop_desc (loop);
bool exit_at_end = loop_exit_at_end_p (loop);
/* Remember blocks whose dominators will have to be updated. */
dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
@ -777,7 +849,7 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
}
free (body);
if (desc->postincr)
if (!exit_at_end)
{
/* Leave exit in first copy (for explanation why see comment in
unroll_loop_constant_iterations). */
@ -798,15 +870,15 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
/* Get expression for number of iterations. */
start_sequence ();
niter = count_loop_iterations (desc, NULL, NULL);
if (!niter)
abort ();
niter = force_operand (niter, NULL);
old_niter = niter = gen_reg_rtx (desc->mode);
tmp = force_operand (copy_rtx (desc->niter_expr), niter);
if (tmp != niter)
emit_move_insn (niter, tmp);
/* Count modulo by ANDing it with max_unroll; we use the fact that
the number of unrollings is a power of two, and thus this is correct
even if there is overflow in the computation. */
niter = expand_simple_binop (GET_MODE (desc->var), AND,
niter = expand_simple_binop (desc->mode, AND,
niter,
GEN_INT (max_unroll),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
@ -824,10 +896,11 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
/* Peel the first copy of loop body (almost always we must leave exit test
here; the only exception is when we have extra zero check and the number
of iterations is reliable (i.e. comes out of NE condition). Also record
the place of (possible) extra zero check. */
of iterations is reliable. Also record the place of (possible) extra
zero check. */
sbitmap_zero (wont_exit);
if (extra_zero_check && desc->cond == NE)
if (extra_zero_check
&& !desc->noloop_assumptions)
SET_BIT (wont_exit, 1);
ezc_swtch = loop_preheader_edge (loop)->src;
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
@ -857,20 +930,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
p = REG_BR_PROB_BASE / (i + 2);
preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
label = block_label (preheader);
start_sequence ();
do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0,
GET_MODE (desc->var), NULL_RTX, NULL_RTX,
label);
jump = get_last_insn ();
JUMP_LABEL (jump) = label;
REG_NOTES (jump)
= gen_rtx_EXPR_LIST (REG_BR_PROB,
GEN_INT (p), REG_NOTES (jump));
LABEL_NUSES (label)++;
branch_code = get_insns ();
end_sequence ();
branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
block_label (preheader), p, NULL_RTX);
swtch = loop_split_edge_with (swtch->pred, branch_code);
set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
@ -886,20 +947,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
p = REG_BR_PROB_BASE / (max_unroll + 1);
swtch = ezc_swtch;
preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
label = block_label (preheader);
start_sequence ();
do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0,
GET_MODE (desc->var), NULL_RTX, NULL_RTX,
label);
jump = get_last_insn ();
JUMP_LABEL (jump) = label;
REG_NOTES (jump)
= gen_rtx_EXPR_LIST (REG_BR_PROB,
GEN_INT (p), REG_NOTES (jump));
LABEL_NUSES (label)++;
branch_code = get_insns ();
end_sequence ();
branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
block_label (preheader), p, NULL_RTX);
swtch = loop_split_edge_with (swtch->succ, branch_code);
set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
@ -925,11 +974,45 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
free (wont_exit);
if (exit_at_end)
{
basic_block exit_block = desc->in_edge->src->rbi->copy;
/* Find a new in and out edge; they are in the last copy we have made. */
if (exit_block->succ->dest == desc->out_edge->dest)
{
desc->out_edge = exit_block->succ;
desc->in_edge = exit_block->succ->succ_next;
}
else
{
desc->out_edge = exit_block->succ->succ_next;
desc->in_edge = exit_block->succ;
}
}
/* Remove the edges. */
for (i = 0; i < n_remove_edges; i++)
remove_path (loops, remove_edges[i]);
free (remove_edges);
/* We must be careful when updating the number of iterations due to
preconditioning and the fact that the value must be valid at entry
of the loop. After passing through the above code, we see that
the correct new number of iterations is this: */
if (desc->const_iter)
abort ();
desc->niter_expr =
simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
desc->niter_max /= max_unroll + 1;
if (exit_at_end)
{
desc->niter_expr =
simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
desc->noloop_assumptions = NULL_RTX;
desc->niter_max--;
}
if (rtl_dump_file)
fprintf (rtl_dump_file,
";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
@ -941,6 +1024,7 @@ static void
decide_peel_simple (struct loop *loop, int flags)
{
unsigned npeel;
struct niter_desc *desc;
if (!(flags & UAP_PEEL))
{
@ -949,7 +1033,7 @@ decide_peel_simple (struct loop *loop, int flags)
}
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Considering simply peeling loop\n");
fprintf (rtl_dump_file, "\n;; Considering simply peeling loop\n");
/* npeel = number of iterations to peel. */
npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
@ -965,14 +1049,10 @@ decide_peel_simple (struct loop *loop, int flags)
}
/* Check for simple loops. */
if (!loop->has_desc)
{
loop->simple = simple_loop_p (loop, &loop->desc);
loop->has_desc = 1;
}
desc = get_simple_loop_desc (loop);
/* Check number of iterations. */
if (loop->simple && loop->desc.const_iter)
if (desc->simple_p && !desc->assumptions && desc->const_iter)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
@ -981,7 +1061,7 @@ decide_peel_simple (struct loop *loop, int flags)
/* Do not simply peel loops with branches inside -- it increases number
of mispredicts. */
if (loop->desc.n_branches > 1)
if (num_loop_branches (loop) > 1)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
@ -1016,6 +1096,10 @@ decide_peel_simple (struct loop *loop, int flags)
/* Success. */
loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
loop->lpt_decision.times = npeel;
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Decided to simply peel the loop, %d times.\n",
loop->lpt_decision.times);
}
/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
@ -1037,6 +1121,7 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
{
sbitmap wont_exit;
unsigned npeel = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
wont_exit = sbitmap_alloc (npeel + 1);
sbitmap_zero (wont_exit);
@ -1048,6 +1133,23 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
free (wont_exit);
if (desc->simple_p)
{
if (desc->const_iter)
{
desc->niter -= npeel;
desc->niter_expr = GEN_INT (desc->niter);
desc->noloop_assumptions = NULL_RTX;
}
else
{
/* We cannot just update niter_expr, as its value might be clobbered
inside loop. We could handle this by counting the number into
temporary just like we do in runtime unrolling, but it does not
seem worthwhile. */
free_simple_loop_desc (loop);
}
}
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel);
}
@ -1057,6 +1159,7 @@ static void
decide_unroll_stupid (struct loop *loop, int flags)
{
unsigned nunroll, nunroll_by_av, i;
struct niter_desc *desc;
if (!(flags & UAP_UNROLL_ALL))
{
@ -1065,7 +1168,7 @@ decide_unroll_stupid (struct loop *loop, int flags)
}
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n");
fprintf (rtl_dump_file, "\n;; Considering unrolling loop stupidly\n");
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
@ -1085,14 +1188,10 @@ decide_unroll_stupid (struct loop *loop, int flags)
}
/* Check for simple loops. */
if (!loop->has_desc)
{
loop->simple = simple_loop_p (loop, &loop->desc);
loop->has_desc = 1;
}
desc = get_simple_loop_desc (loop);
/* Check simpleness. */
if (loop->simple)
if (desc->simple_p && !desc->assumptions)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; The loop is simple\n");
@ -1101,7 +1200,7 @@ decide_unroll_stupid (struct loop *loop, int flags)
/* Do not unroll loops with branches inside -- it increases number
of mispredicts. */
if (loop->desc.n_branches > 1)
if (num_loop_branches (loop) > 1)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
@ -1109,7 +1208,8 @@ decide_unroll_stupid (struct loop *loop, int flags)
}
/* If we have profile feedback, check whether the loop rolls. */
if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
if (loop->header->count
&& expected_loop_iterations (loop) < 2 * nunroll)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
@ -1119,10 +1219,16 @@ decide_unroll_stupid (struct loop *loop, int flags)
/* Success. Now force nunroll to be power of 2, as it seems that this
improves results (partially because of better alignments, partially
because of some dark magic). */
for (i = 1; 2 * i <= nunroll; i *= 2);
for (i = 1; 2 * i <= nunroll; i *= 2)
continue;
loop->lpt_decision.decision = LPT_UNROLL_STUPID;
loop->lpt_decision.times = i - 1;
if (rtl_dump_file)
fprintf (rtl_dump_file,
";; Decided to unroll the loop stupidly, %d times.\n",
loop->lpt_decision.times);
}
/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
@ -1147,6 +1253,7 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
{
sbitmap wont_exit;
unsigned nunroll = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
wont_exit = sbitmap_alloc (nunroll + 1);
sbitmap_zero (wont_exit);
@ -1158,6 +1265,17 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
free (wont_exit);
if (desc->simple_p)
{
/* We indeed may get here provided that there are nontrivial assumptions
for a loop to be really simple. We could update the counts, but the
problem is that we are unable to decide which exit will be taken
(not really true in case the number of iterations is constant,
but noone will do anything with this information, so we do not
worry about it). */
desc->simple_p = false;
}
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n",
nunroll, num_loop_insns (loop));

View File

@ -79,11 +79,63 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
with handling this case. */
static struct loop *unswitch_loop (struct loops *, struct loop *,
basic_block);
basic_block, rtx, rtx);
static void unswitch_single_loop (struct loops *, struct loop *, rtx, int);
static bool may_unswitch_on_p (basic_block, struct loop *,
basic_block *);
static rtx reversed_condition (rtx);
static rtx may_unswitch_on (basic_block, struct loop *, rtx *);
/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
true, with probability PROB. If CINSN is not NULL, it is the insn to copy
in order to create a jump. */
rtx
compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
rtx cinsn)
{
rtx seq, jump, cond;
enum machine_mode mode;
mode = GET_MODE (op0);
if (mode == VOIDmode)
mode = GET_MODE (op1);
start_sequence ();
if (GET_MODE_CLASS (mode) == MODE_CC)
{
/* A hack -- there seems to be no easy generic way how to make a
conditional jump from a ccmode comparison. */
if (!cinsn)
abort ();
cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
if (GET_CODE (cond) != comp
|| !rtx_equal_p (op0, XEXP (cond, 0))
|| !rtx_equal_p (op1, XEXP (cond, 1)))
abort ();
emit_jump_insn (copy_insn (PATTERN (cinsn)));
jump = get_last_insn ();
JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
LABEL_NUSES (JUMP_LABEL (jump))++;
redirect_jump (jump, label, 0);
}
else
{
if (cinsn)
abort ();
op0 = force_operand (op0, NULL_RTX);
op1 = force_operand (op1, NULL_RTX);
do_compare_rtx_and_jump (op0, op1, comp, 0,
mode, NULL_RTX, NULL_RTX, label);
jump = get_last_insn ();
JUMP_LABEL (jump) = label;
LABEL_NUSES (label)++;
}
REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
REG_NOTES (jump));
seq = get_insns ();
end_sequence ();
return seq;
}
/* Main entry point. Perform loop unswitching on all suitable LOOPS. */
void
@ -111,48 +163,82 @@ unswitch_loops (struct loops *loops)
verify_loop_structure (loops);
#endif
}
iv_analysis_done ();
}
/* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
basic blocks (for what it means see comments below). List of basic blocks
inside LOOP is provided in BODY to save time. */
static bool
may_unswitch_on_p (basic_block bb, struct loop *loop, basic_block *body)
basic blocks (for what it means see comments below). In case condition
compares loop invariant cc mode register, return the jump in CINSN. */
static rtx
may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
{
rtx test;
rtx test, at, insn, op[2];
struct rtx_iv iv;
unsigned i;
enum machine_mode mode;
/* BB must end in a simple conditional jump. */
if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next)
return false;
return NULL_RTX;
if (!any_condjump_p (BB_END (bb)))
return false;
return NULL_RTX;
/* With branches inside loop. */
if (!flow_bb_inside_loop_p (loop, bb->succ->dest)
|| !flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest))
return false;
return NULL_RTX;
/* It must be executed just once each iteration (because otherwise we
are unable to update dominator/irreducible loop information correctly). */
if (!just_once_each_iteration_p (loop, bb))
return false;
return NULL_RTX;
/* Condition must be invariant. We use just a stupid test of invariantness
of the condition: all used regs must not be modified inside loop body. */
test = get_condition (BB_END (bb), NULL, true);
/* Condition must be invariant. */
test = get_condition (BB_END (bb), &at, true);
if (!test)
return false;
return NULL_RTX;
for (i = 0; i < loop->num_nodes; i++)
if (modified_between_p (test, BB_HEAD (body[i]), NEXT_INSN (BB_END (body[i]))))
return false;
for (i = 0; i < 2; i++)
{
op[i] = XEXP (test, i);
return true;
if (CONSTANT_P (op[i]))
continue;
insn = iv_get_reaching_def (at, op[i]);
if (!iv_analyse (insn, op[i], &iv))
return NULL_RTX;
if (iv.step != const0_rtx
|| iv.first_special)
return NULL_RTX;
op[i] = get_iv_value (&iv, const0_rtx);
}
mode = GET_MODE (op[0]);
if (mode == VOIDmode)
mode = GET_MODE (op[1]);
if (GET_MODE_CLASS (mode) == MODE_CC)
{
if (at != BB_END (bb))
return NULL_RTX;
*cinsn = BB_END (bb);
if (!rtx_equal_p (op[0], XEXP (test, 0))
|| !rtx_equal_p (op[1], XEXP (test, 1)))
return NULL_RTX;
return test;
}
return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
op[0], op[1]));
}
/* Reverses CONDition; returns NULL if we cannot. */
static rtx
rtx
reversed_condition (rtx cond)
{
enum rtx_code reversed;
@ -173,13 +259,10 @@ static void
unswitch_single_loop (struct loops *loops, struct loop *loop,
rtx cond_checked, int num)
{
basic_block *bbs, bb;
basic_block *bbs;
struct loop *nloop;
unsigned i;
int true_first;
rtx cond, rcond, conds, rconds, acond, split_before;
int always_true;
int always_false;
rtx cond, rcond, conds, rconds, acond, cinsn = NULL_RTX;
int repeat;
edge e;
@ -237,8 +320,9 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
/* Find a bb to unswitch on. */
bbs = get_loop_body (loop);
iv_analysis_loop_init (loop);
for (i = 0; i < loop->num_nodes; i++)
if (may_unswitch_on_p (bbs[i], loop, bbs))
if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
break;
if (i == loop->num_nodes)
@ -247,39 +331,26 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
return;
}
if (!(cond = get_condition (BB_END (bbs[i]), &split_before, true)))
abort ();
rcond = reversed_condition (cond);
if (rcond)
rcond = canon_condition (rcond);
/* Check whether the result can be predicted. */
always_true = 0;
always_false = 0;
for (acond = cond_checked; acond; acond = XEXP (acond, 1))
{
if (rtx_equal_p (cond, XEXP (acond, 0)))
{
always_true = 1;
break;
}
if (rtx_equal_p (rcond, XEXP (acond, 0)))
{
always_false = 1;
break;
}
}
simplify_using_condition (XEXP (acond, 0), &cond, NULL);
if (always_true)
if (cond == const_true_rtx)
{
/* Remove false path. */
for (e = bbs[i]->succ; !(e->flags & EDGE_FALLTHRU); e = e->succ_next);
e = FALLTHRU_EDGE (bbs[i]);
remove_path (loops, e);
free (bbs);
repeat = 1;
}
else if (always_false)
else if (cond == const0_rtx)
{
/* Remove true path. */
for (e = bbs[i]->succ; e->flags & EDGE_FALLTHRU; e = e->succ_next);
e = BRANCH_EDGE (bbs[i]);
remove_path (loops, e);
free (bbs);
repeat = 1;
@ -293,21 +364,17 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
else
rconds = cond_checked;
/* Separate condition in a single basic block. */
bb = split_loop_bb (bbs[i], PREV_INSN (split_before))->dest;
free (bbs);
true_first = !(bb->succ->flags & EDGE_FALLTHRU);
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unswitching loop\n");
/* Unswitch the loop on this condition. */
nloop = unswitch_loop (loops, loop, bb);
nloop = unswitch_loop (loops, loop, bbs[i], cond, cinsn);
if (!nloop)
abort ();
/* Invoke itself on modified loops. */
unswitch_single_loop (loops, nloop, true_first ? conds : rconds, num + 1);
unswitch_single_loop (loops, loop, true_first ? rconds : conds, num + 1);
unswitch_single_loop (loops, nloop, rconds, num + 1);
unswitch_single_loop (loops, loop, conds, num + 1);
free_EXPR_LIST_node (conds);
if (rcond)
@ -316,17 +383,21 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
/* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON. We only support
unswitching of innermost loops. UNSWITCH_ON must be executed in every
iteration, i.e. it must dominate LOOP latch, and should only contain code
for the condition we unswitch on. Returns NULL if impossible, new
loop otherwise. */
iteration, i.e. it must dominate LOOP latch. COND is the condition
determining which loop is entered. Returns NULL if impossible, new loop
otherwise. The new loop is entered if COND is true. If CINSN is not
NULL, it is the insn in that COND is compared. */
static struct loop *
unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on,
rtx cond, rtx cinsn)
{
edge entry, latch_edge;
edge entry, latch_edge, true_edge, false_edge, e;
basic_block switch_bb, unswitch_on_alt, src;
struct loop *nloop;
sbitmap zero_bitmap;
int irred_flag;
int irred_flag, prob;
rtx seq;
/* Some sanity checking. */
if (!flow_bb_inside_loop_p (loop, unswitch_on))
@ -343,12 +414,6 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->succ_next->dest))
abort ();
/* Will we be able to perform redirection? */
if (!any_condjump_p (BB_END (unswitch_on)))
return NULL;
if (!cfg_layout_can_duplicate_bb_p (unswitch_on))
return NULL;
entry = loop_preheader_edge (loop);
/* Make a copy. */
@ -365,10 +430,24 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
/* Record the block with condition we unswitch on. */
unswitch_on_alt = unswitch_on->rbi->copy;
true_edge = BRANCH_EDGE (unswitch_on_alt);
false_edge = FALLTHRU_EDGE (unswitch_on);
latch_edge = loop->latch->rbi->copy->succ;
/* Create a block with the condition. */
prob = true_edge->probability;
switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
block_label (true_edge->dest),
prob, cinsn);
emit_insn_after (seq, BB_END (switch_bb));
e = make_edge (switch_bb, true_edge->dest, 0);
e->probability = prob;
e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
e->probability = false_edge->probability;
e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;
/* Make a copy of the block containing the condition; we will use
it as switch to decide which loop we want to use. */
switch_bb = cfg_layout_duplicate_bb (unswitch_on, NULL);
if (irred_flag)
{
switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
@ -381,19 +460,14 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
switch_bb->succ->flags &= ~EDGE_IRREDUCIBLE_LOOP;
switch_bb->succ->succ_next->flags &= ~EDGE_IRREDUCIBLE_LOOP;
}
unswitch_on->rbi->copy = unswitch_on_alt;
/* Loopify from the copy of LOOP body, constructing the new loop. */
for (latch_edge = loop->latch->rbi->copy->succ;
latch_edge->dest != loop->header;
latch_edge = latch_edge->succ_next);
nloop = loopify (loops, latch_edge,
loop->header->rbi->copy->pred, switch_bb);
/* Remove branches that are now unreachable in new loops. We rely on the
fact that cfg_layout_duplicate_bb reverses list of edges. */
remove_path (loops, unswitch_on->succ);
remove_path (loops, unswitch_on_alt->succ);
/* Remove branches that are now unreachable in new loops. */
remove_path (loops, true_edge);
remove_path (loops, false_edge);
/* One of created loops do not have to be subloop of the outer loop now,
so fix its placement in loop data structure. */

View File

@ -406,13 +406,16 @@ estimate_probability (struct loops *loops_info)
unsigned j;
int exits;
struct loop *loop = loops_info->parray[i];
struct loop_desc desc;
struct niter_desc desc;
unsigned HOST_WIDE_INT niter;
flow_loop_scan (loop, LOOP_EXIT_EDGES);
exits = loop->num_exits;
if (simple_loop_p (loop, &desc) && desc.const_iter)
iv_analysis_loop_init (loop);
find_simple_exit (loop, &desc);
if (desc.simple_p && desc.const_iter)
{
int prob;
niter = desc.niter + 1;
@ -472,6 +475,8 @@ estimate_probability (struct loops *loops_info)
free (bbs);
}
iv_analysis_done ();
/* Attempt to predict conditional jumps using a number of heuristics. */
FOR_EACH_BB (bb)
{

View File

@ -2361,4 +2361,15 @@ extern void tracer (void);
/* In var-tracking.c */
extern void variable_tracking_main (void);
/* In stor-layout.c. */
extern void get_mode_bounds (enum machine_mode, int, rtx *, rtx *);
/* In loop-unswitch.c */
extern rtx reversed_condition (rtx);
extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx);
/* In loop-iv.c */
extern rtx canon_condition (rtx);
extern void simplify_using_condition (rtx, rtx *, struct bitmap_head_def *);
#endif /* ! GCC_RTL_H */

View File

@ -2118,4 +2118,27 @@ get_best_mode (int bitsize, int bitpos, unsigned int align,
return mode;
}
/* Gets minimal and maximal values for MODE (signed or unsigned depending on
SIGN). */
void
get_mode_bounds (enum machine_mode mode, int sign, rtx *mmin, rtx *mmax)
{
int size = GET_MODE_BITSIZE (mode);
if (size > HOST_BITS_PER_WIDE_INT)
abort ();
if (sign)
{
*mmin = GEN_INT (-((unsigned HOST_WIDE_INT) 1 << (size - 1)));
*mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1)) - 1);
}
else
{
*mmin = const0_rtx;
*mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1) << 1) - 1);
}
}
#include "gt-stor-layout.h"

View File

@ -3034,11 +3034,16 @@ static void
rest_of_handle_loop2 (tree decl, rtx insns)
{
struct loops *loops;
basic_block bb;
timevar_push (TV_LOOP);
open_dump_file (DFI_loop2, decl);
if (rtl_dump_file)
dump_flow_info (rtl_dump_file);
/* Initialize structures for layout changes. */
cfg_layout_initialize ();
loops = loop_optimizer_init (rtl_dump_file);
if (loops)
@ -3056,6 +3061,12 @@ rest_of_handle_loop2 (tree decl, rtx insns)
loop_optimizer_finalize (loops, rtl_dump_file);
}
/* Finalize layout changes. */
FOR_EACH_BB (bb)
if (bb->next_bb != EXIT_BLOCK_PTR)
bb->rbi->next = bb->next_bb;
cfg_layout_finalize ();
cleanup_cfg (CLEANUP_EXPENSIVE);
delete_trivially_dead_insns (insns, max_reg_num ());
reg_scan (insns, max_reg_num (), 0);