loop-iv.c: New file.
* loop-iv.c: New file. * Makefile.in (loop-iv.o): New. * basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros. * cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order, num_loop_branches): New functions. * cfgloop.h (get_loop_body_in_dom_order, num_loop_branches, iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value, find_simple_exit, iv_number_of_iterations, iv_analysis_done, get_simple_loop_desc, free_simple_loop_desc): Declare. (simple_loop_desc): New inline function. (struct rtx_iv, struct niter_desc): New. * cfgloopmanip.c (loopify): Specify semantics more precisely. * expr.c (force_operand): Handle subregs of expressions created by loop unroller. * loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move parts of the initialization to toplev.c * loop-unroll.c (loop_exit_at_end_p): New. (unroll_and_peel_loops): Call iv_analysis_done. (decide_peel_once_rolling, decide_peel_completely, decide_unroll_stupid, decide_unroll_constant_iterations, decide_unroll_runtime_iterations, decide_peel_simple, peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations, unroll_loop_runtime_iterations): Use new simple loop analysis. * loop-unswitch.c (compare_and_jump_seq): New. (may_unswitch_on_p): Renamed to ... (may_unswitch_on): Use new iv analysis. (reversed_condition): Export. (unswitch_single_loop, unswitch_loop): Use new iv analysis. * predict.c (estimate_probability): Use new simple loop analysis. * rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq, canon_condition, simplify_using_condition): Declare. * stor-layout.c (get_mode_bounds): New. * toplev.c (rest_of_handle_loop2): Some parts of initialization/finalization moved here from loop-init.c. From-SVN: r77951
This commit is contained in:
parent
cc7ce44e4c
commit
50654f6c03
|
@ -1,3 +1,40 @@
|
|||
2004-02-17 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
|
||||
|
||||
* loop-iv.c: New file.
|
||||
* Makefile.in (loop-iv.o): New.
|
||||
* basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros.
|
||||
* cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order,
|
||||
num_loop_branches): New functions.
|
||||
* cfgloop.h (get_loop_body_in_dom_order, num_loop_branches,
|
||||
iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value,
|
||||
find_simple_exit, iv_number_of_iterations, iv_analysis_done,
|
||||
get_simple_loop_desc, free_simple_loop_desc): Declare.
|
||||
(simple_loop_desc): New inline function.
|
||||
(struct rtx_iv, struct niter_desc): New.
|
||||
* cfgloopmanip.c (loopify): Specify semantics more precisely.
|
||||
* expr.c (force_operand): Handle subregs of expressions created by
|
||||
loop unroller.
|
||||
* loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move
|
||||
parts of the initialization to toplev.c
|
||||
* loop-unroll.c (loop_exit_at_end_p): New.
|
||||
(unroll_and_peel_loops): Call iv_analysis_done.
|
||||
(decide_peel_once_rolling, decide_peel_completely,
|
||||
decide_unroll_stupid, decide_unroll_constant_iterations,
|
||||
decide_unroll_runtime_iterations, decide_peel_simple,
|
||||
peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations,
|
||||
unroll_loop_runtime_iterations): Use new simple loop analysis.
|
||||
* loop-unswitch.c (compare_and_jump_seq): New.
|
||||
(may_unswitch_on_p): Renamed to ...
|
||||
(may_unswitch_on): Use new iv analysis.
|
||||
(reversed_condition): Export.
|
||||
(unswitch_single_loop, unswitch_loop): Use new iv analysis.
|
||||
* predict.c (estimate_probability): Use new simple loop analysis.
|
||||
* rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq,
|
||||
canon_condition, simplify_using_condition): Declare.
|
||||
* stor-layout.c (get_mode_bounds): New.
|
||||
* toplev.c (rest_of_handle_loop2): Some parts of
|
||||
initialization/finalization moved here from loop-init.c.
|
||||
|
||||
2004-02-17 Kazu Hirata <kazu@cs.umass.edu>
|
||||
|
||||
* config/h8300/h8300.h (FIXED_REGISTERS): Add the soft frame
|
||||
|
|
|
@ -848,7 +848,7 @@ OBJS-common = \
|
|||
cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \
|
||||
cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o \
|
||||
dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o \
|
||||
dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o \
|
||||
dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \
|
||||
expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \
|
||||
genrtl.o ggc-common.o global.o graph.o gtype-desc.o \
|
||||
haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o \
|
||||
|
@ -1719,6 +1719,8 @@ cfgloop.o : cfgloop.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) coretypes.h $(TM_H) \
|
|||
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h flags.h
|
||||
cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
|
||||
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
|
||||
loop-iv.o : loop-iv.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \
|
||||
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
|
||||
cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
|
||||
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h output.h coretypes.h $(TM_H)
|
||||
loop-init.o : loop-init.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
|
||||
|
|
|
@ -288,6 +288,17 @@ extern varray_type basic_block_info;
|
|||
#define FOR_EACH_BB_REVERSE(BB) \
|
||||
FOR_BB_BETWEEN (BB, EXIT_BLOCK_PTR->prev_bb, ENTRY_BLOCK_PTR, prev_bb)
|
||||
|
||||
/* For iterating over insns in basic block. */
|
||||
#define FOR_BB_INSNS(BB, INSN) \
|
||||
for ((INSN) = BB_HEAD (BB); \
|
||||
(INSN) != NEXT_INSN (BB_END (BB)); \
|
||||
(INSN) = NEXT_INSN (INSN))
|
||||
|
||||
#define FOR_BB_INSNS_REVERSE(BB, INSN) \
|
||||
for ((INSN) = BB_END (BB); \
|
||||
(INSN) != PREV_INSN (BB_HEAD (BB)); \
|
||||
(INSN) = PREV_INSN (INSN))
|
||||
|
||||
/* Cycles through _all_ basic blocks, even the fake ones (entry and
|
||||
exit block). */
|
||||
|
||||
|
|
|
@ -959,6 +959,62 @@ get_loop_body (const struct loop *loop)
|
|||
return tovisit;
|
||||
}
|
||||
|
||||
/* Fills dominance descendants inside LOOP of the basic block BB into
|
||||
array TOVISIT from index *TV. */
|
||||
|
||||
static void
|
||||
fill_sons_in_loop (const struct loop *loop, basic_block bb,
|
||||
basic_block *tovisit, int *tv)
|
||||
{
|
||||
basic_block son, postpone = NULL;
|
||||
|
||||
tovisit[(*tv)++] = bb;
|
||||
for (son = first_dom_son (CDI_DOMINATORS, bb);
|
||||
son;
|
||||
son = next_dom_son (CDI_DOMINATORS, son))
|
||||
{
|
||||
if (!flow_bb_inside_loop_p (loop, son))
|
||||
continue;
|
||||
|
||||
if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
|
||||
{
|
||||
postpone = son;
|
||||
continue;
|
||||
}
|
||||
fill_sons_in_loop (loop, son, tovisit, tv);
|
||||
}
|
||||
|
||||
if (postpone)
|
||||
fill_sons_in_loop (loop, postpone, tovisit, tv);
|
||||
}
|
||||
|
||||
/* Gets body of a LOOP (that must be different from the outermost loop)
|
||||
sorted by dominance relation. Additionally, if a basic block s dominates
|
||||
the latch, then only blocks dominated by s are be after it. */
|
||||
|
||||
basic_block *
|
||||
get_loop_body_in_dom_order (const struct loop *loop)
|
||||
{
|
||||
basic_block *tovisit;
|
||||
int tv;
|
||||
|
||||
if (!loop->num_nodes)
|
||||
abort ();
|
||||
|
||||
tovisit = xcalloc (loop->num_nodes, sizeof (basic_block));
|
||||
|
||||
if (loop->latch == EXIT_BLOCK_PTR)
|
||||
abort ();
|
||||
|
||||
tv = 0;
|
||||
fill_sons_in_loop (loop, loop->header, tovisit, &tv);
|
||||
|
||||
if (tv != (int) loop->num_nodes)
|
||||
abort ();
|
||||
|
||||
return tovisit;
|
||||
}
|
||||
|
||||
/* Gets exit edges of a LOOP, returning their number in N_EDGES. */
|
||||
edge *
|
||||
get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges)
|
||||
|
@ -988,6 +1044,27 @@ get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges)
|
|||
return edges;
|
||||
}
|
||||
|
||||
/* Counts the number of conditional branches inside LOOP. */
|
||||
|
||||
unsigned
|
||||
num_loop_branches (const struct loop *loop)
|
||||
{
|
||||
unsigned i, n;
|
||||
basic_block * body;
|
||||
|
||||
if (loop->latch == EXIT_BLOCK_PTR)
|
||||
abort ();
|
||||
|
||||
body = get_loop_body (loop);
|
||||
n = 0;
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
if (body[i]->succ && body[i]->succ->succ_next)
|
||||
n++;
|
||||
free (body);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/* Adds basic block BB to LOOP. */
|
||||
void
|
||||
add_bb_to_loop (basic_block bb, struct loop *loop)
|
||||
|
|
110
gcc/cfgloop.h
110
gcc/cfgloop.h
|
@ -278,7 +278,9 @@ extern int average_num_loop_insns (struct loop *);
|
|||
|
||||
/* Loops & cfg manipulation. */
|
||||
extern basic_block *get_loop_body (const struct loop *);
|
||||
extern basic_block *get_loop_body_in_dom_order (const struct loop *);
|
||||
extern edge *get_loop_exit_edges (const struct loop *, unsigned *);
|
||||
extern unsigned num_loop_branches (const struct loop *);
|
||||
|
||||
extern edge loop_preheader_edge (const struct loop *);
|
||||
extern edge loop_latch_edge (const struct loop *);
|
||||
|
@ -322,6 +324,114 @@ extern void unloop (struct loops *, struct loop *);
|
|||
extern bool remove_path (struct loops *, edge);
|
||||
extern edge split_loop_bb (basic_block, rtx);
|
||||
|
||||
/* Induction variable analysis. */
|
||||
|
||||
/* The description of induction variable. The things are a bit complicated
|
||||
due to need to handle subregs and extends. The value of the object described
|
||||
by it can be obtained as follows (all computations are done in extend_mode):
|
||||
|
||||
Value in i-th iteration is
|
||||
delta + mult * extend_{extend_mode} (subreg_{mode} (base + i * step)).
|
||||
|
||||
If first_special is true, the value in the first iteration is
|
||||
delta + mult * base
|
||||
|
||||
If extend = NIL, first_special must be false, delta 0, mult 1 and value is
|
||||
subreg_{mode} (base + i * step)
|
||||
|
||||
The get_iv_value function can be used to obtain these expressions.
|
||||
|
||||
??? Add a third mode field that would specify the mode in that inner
|
||||
computation is done, which would enable it to be different from the
|
||||
outer one? */
|
||||
|
||||
struct rtx_iv
|
||||
{
|
||||
/* Its base and step (mode of base and step is supposed to be extend_mode,
|
||||
see the description above). */
|
||||
rtx base, step;
|
||||
|
||||
/* The type of extend applied to it (SIGN_EXTEND, ZERO_EXTEND or NIL). */
|
||||
enum rtx_code extend;
|
||||
|
||||
/* Operations applied in the extended mode. */
|
||||
rtx delta, mult;
|
||||
|
||||
/* The mode it is extended to. */
|
||||
enum machine_mode extend_mode;
|
||||
|
||||
/* The mode the variable iterates in. */
|
||||
enum machine_mode mode;
|
||||
|
||||
/* Whether we have already filled the remaining fields. */
|
||||
unsigned analysed : 1;
|
||||
|
||||
/* Whether the first iteration needs to be handled specially. */
|
||||
unsigned first_special : 1;
|
||||
};
|
||||
|
||||
/* This should replace struct loop_desc. We keep this just so that we are
|
||||
able to compare the results. */
|
||||
|
||||
struct niter_desc
|
||||
{
|
||||
/* The edge out of the loop. */
|
||||
edge out_edge;
|
||||
|
||||
/* The other edge leading from the condition. */
|
||||
edge in_edge;
|
||||
|
||||
/* True if we are able to say anything about number of iterations of the
|
||||
loop. */
|
||||
bool simple_p;
|
||||
|
||||
/* True if the loop iterates the constant number of times. */
|
||||
bool const_iter;
|
||||
|
||||
/* Number of iterations if constant. */
|
||||
unsigned HOST_WIDEST_INT niter;
|
||||
|
||||
/* Upper bound on the number of iterations. */
|
||||
unsigned HOST_WIDEST_INT niter_max;
|
||||
|
||||
/* Assumptions under that the rest of the information is valid. */
|
||||
rtx assumptions;
|
||||
|
||||
/* Assumptions under that the loop ends before reaching the latch,
|
||||
even if value of niter_expr says otherwise. */
|
||||
rtx noloop_assumptions;
|
||||
|
||||
/* Condition under that the loop is infinite. */
|
||||
rtx infinite;
|
||||
|
||||
/* Whether the comparison is signed. */
|
||||
bool signed_p;
|
||||
|
||||
/* The mode in that niter_expr should be computed. */
|
||||
enum machine_mode mode;
|
||||
|
||||
/* The number of iterations of the loop. */
|
||||
rtx niter_expr;
|
||||
};
|
||||
|
||||
extern void iv_analysis_loop_init (struct loop *);
|
||||
extern rtx iv_get_reaching_def (rtx, rtx);
|
||||
extern bool iv_analyse (rtx, rtx, struct rtx_iv *);
|
||||
extern rtx get_iv_value (struct rtx_iv *, rtx);
|
||||
extern void find_simple_exit (struct loop *, struct niter_desc *);
|
||||
extern void iv_number_of_iterations (struct loop *, rtx, rtx,
|
||||
struct niter_desc *);
|
||||
extern void iv_analysis_done (void);
|
||||
|
||||
extern struct niter_desc *get_simple_loop_desc (struct loop *loop);
|
||||
extern void free_simple_loop_desc (struct loop *loop);
|
||||
|
||||
static inline struct niter_desc *
|
||||
simple_loop_desc (struct loop *loop)
|
||||
{
|
||||
return loop->aux;
|
||||
}
|
||||
|
||||
/* Loop optimizer initialization. */
|
||||
extern struct loops *loop_optimizer_init (FILE *);
|
||||
extern void loop_optimizer_finalize (struct loops *, FILE *);
|
||||
|
|
|
@ -480,11 +480,13 @@ scale_loop_frequencies (struct loop *loop, int num, int den)
|
|||
accordingly. Everything between them plus LATCH_EDGE destination must
|
||||
be dominated by HEADER_EDGE destination, and back-reachable from
|
||||
LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
|
||||
SWITCH_BB->succ to original destination of LATCH_EDGE and
|
||||
SWITCH_BB->succ->succ_next to original destination of HEADER_EDGE.
|
||||
FALLTHRU_EDGE (SWITCH_BB) to original destination of HEADER_EDGE and
|
||||
BRANCH_EDGE (SWITCH_BB) to original destination of LATCH_EDGE.
|
||||
Returns newly created loop. */
|
||||
|
||||
struct loop *
|
||||
loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block switch_bb)
|
||||
loopify (struct loops *loops, edge latch_edge, edge header_edge,
|
||||
basic_block switch_bb)
|
||||
{
|
||||
basic_block succ_bb = latch_edge->dest;
|
||||
basic_block pred_bb = header_edge->src;
|
||||
|
@ -509,13 +511,15 @@ loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block swi
|
|||
|
||||
/* Redirect edges. */
|
||||
loop_redirect_edge (latch_edge, loop->header);
|
||||
loop_redirect_edge (BRANCH_EDGE (switch_bb), succ_bb);
|
||||
|
||||
loop_redirect_edge (header_edge, switch_bb);
|
||||
loop_redirect_edge (switch_bb->succ->succ_next, loop->header);
|
||||
loop_redirect_edge (switch_bb->succ, succ_bb);
|
||||
loop_redirect_edge (FALLTHRU_EDGE (switch_bb), loop->header);
|
||||
|
||||
/* Update dominators. */
|
||||
set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
|
||||
set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
|
||||
|
||||
set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
|
||||
|
||||
/* Compute new loop. */
|
||||
|
|
14
gcc/expr.c
14
gcc/expr.c
|
@ -5588,6 +5588,20 @@ force_operand (rtx value, rtx target)
|
|||
rtx subtarget = get_subtarget (target);
|
||||
enum rtx_code code = GET_CODE (value);
|
||||
|
||||
/* Check for subreg applied to an expression produced by loop optimizer. */
|
||||
if (code == SUBREG
|
||||
&& GET_CODE (SUBREG_REG (value)) != REG
|
||||
&& GET_CODE (SUBREG_REG (value)) != MEM)
|
||||
{
|
||||
value = simplify_gen_subreg (GET_MODE (value),
|
||||
force_reg (GET_MODE (SUBREG_REG (value)),
|
||||
force_operand (SUBREG_REG (value),
|
||||
NULL_RTX)),
|
||||
GET_MODE (SUBREG_REG (value)),
|
||||
SUBREG_BYTE (value));
|
||||
code = GET_CODE (value);
|
||||
}
|
||||
|
||||
/* Check for a PIC address load. */
|
||||
if ((code == PLUS || code == MINUS)
|
||||
&& XEXP (value, 0) == pic_offset_table_rtx
|
||||
|
|
|
@ -36,9 +36,6 @@ loop_optimizer_init (FILE *dumpfile)
|
|||
struct loops *loops = xcalloc (1, sizeof (struct loops));
|
||||
edge e;
|
||||
|
||||
/* Initialize structures for layout changes. */
|
||||
cfg_layout_initialize ();
|
||||
|
||||
/* Avoid annoying special cases of edges going to exit
|
||||
block. */
|
||||
for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
|
||||
|
@ -49,18 +46,11 @@ loop_optimizer_init (FILE *dumpfile)
|
|||
|
||||
if (flow_loops_find (loops, LOOP_TREE) <= 1)
|
||||
{
|
||||
basic_block bb;
|
||||
|
||||
/* No loops. */
|
||||
flow_loops_free (loops);
|
||||
free_dominance_info (CDI_DOMINATORS);
|
||||
free (loops);
|
||||
|
||||
/* Make chain. */
|
||||
FOR_EACH_BB (bb)
|
||||
if (bb->next_bb != EXIT_BLOCK_PTR)
|
||||
bb->rbi->next = bb->next_bb;
|
||||
cfg_layout_finalize ();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -94,13 +84,14 @@ loop_optimizer_init (FILE *dumpfile)
|
|||
void
|
||||
loop_optimizer_finalize (struct loops *loops, FILE *dumpfile)
|
||||
{
|
||||
basic_block bb;
|
||||
unsigned i;
|
||||
|
||||
/* Finalize layout changes. */
|
||||
/* Make chain. */
|
||||
FOR_EACH_BB (bb)
|
||||
if (bb->next_bb != EXIT_BLOCK_PTR)
|
||||
bb->rbi->next = bb->next_bb;
|
||||
if (!loops)
|
||||
return;
|
||||
|
||||
for (i = 1; i < loops->num; i++)
|
||||
if (loops->parray[i])
|
||||
free_simple_loop_desc (loops->parray[i]);
|
||||
|
||||
/* Another dump. */
|
||||
flow_loops_dump (loops, dumpfile, NULL, 1);
|
||||
|
@ -110,9 +101,6 @@ loop_optimizer_finalize (struct loops *loops, FILE *dumpfile)
|
|||
free_dominance_info (CDI_DOMINATORS);
|
||||
free (loops);
|
||||
|
||||
/* Finalize changes. */
|
||||
cfg_layout_finalize ();
|
||||
|
||||
/* Checking. */
|
||||
#ifdef ENABLE_CHECKING
|
||||
verify_flow_info ();
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -85,7 +85,7 @@ void
|
|||
unroll_and_peel_loops (struct loops *loops, int flags)
|
||||
{
|
||||
struct loop *loop, *next;
|
||||
int check;
|
||||
bool check;
|
||||
|
||||
/* First perform complete loop peeling (it is almost surely a win,
|
||||
and affects parameters for further decision a lot). */
|
||||
|
@ -110,7 +110,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
|
|||
else
|
||||
next = loop->outer;
|
||||
|
||||
check = 1;
|
||||
check = true;
|
||||
/* And perform the appropriate transformations. */
|
||||
switch (loop->lpt_decision.decision)
|
||||
{
|
||||
|
@ -130,7 +130,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
|
|||
unroll_loop_stupid (loops, loop);
|
||||
break;
|
||||
case LPT_NONE:
|
||||
check = 0;
|
||||
check = false;
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
|
@ -144,6 +144,29 @@ unroll_and_peel_loops (struct loops *loops, int flags)
|
|||
}
|
||||
loop = next;
|
||||
}
|
||||
|
||||
iv_analysis_done ();
|
||||
}
|
||||
|
||||
/* Check whether exit of the LOOP is at the end of loop body. */
|
||||
|
||||
static bool
|
||||
loop_exit_at_end_p (struct loop *loop)
|
||||
{
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
rtx insn;
|
||||
|
||||
if (desc->in_edge->dest != loop->latch)
|
||||
return false;
|
||||
|
||||
/* Check that the latch is empty. */
|
||||
FOR_BB_INSNS (loop->latch, insn)
|
||||
{
|
||||
if (INSN_P (insn))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
|
||||
|
@ -168,10 +191,9 @@ peel_loops_completely (struct loops *loops, int flags)
|
|||
next = loop->outer;
|
||||
|
||||
loop->lpt_decision.decision = LPT_NONE;
|
||||
loop->has_desc = 0;
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n",
|
||||
fprintf (rtl_dump_file, "\n;; *** Considering loop %d for complete peeling ***\n",
|
||||
loop->num);
|
||||
|
||||
loop->ninsns = num_loop_insns (loop);
|
||||
|
@ -216,7 +238,7 @@ decide_unrolling_and_peeling (struct loops *loops, int flags)
|
|||
loop->lpt_decision.decision = LPT_NONE;
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num);
|
||||
fprintf (rtl_dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
|
||||
|
||||
/* Do not peel cold areas. */
|
||||
if (!maybe_hot_bb_p (loop->header))
|
||||
|
@ -269,8 +291,10 @@ decide_unrolling_and_peeling (struct loops *loops, int flags)
|
|||
static void
|
||||
decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
|
||||
{
|
||||
struct niter_desc *desc;
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n");
|
||||
fprintf (rtl_dump_file, "\n;; Considering peeling once rolling loop\n");
|
||||
|
||||
/* Is the loop small enough? */
|
||||
if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
|
||||
|
@ -281,11 +305,13 @@ decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
|
|||
}
|
||||
|
||||
/* Check for simple loops. */
|
||||
loop->simple = simple_loop_p (loop, &loop->desc);
|
||||
loop->has_desc = 1;
|
||||
desc = get_simple_loop_desc (loop);
|
||||
|
||||
/* Check number of iterations. */
|
||||
if (!loop->simple || !loop->desc.const_iter || loop->desc.niter != 0)
|
||||
if (!desc->simple_p
|
||||
|| desc->assumptions
|
||||
|| !desc->const_iter
|
||||
|| desc->niter != 0)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
|
||||
|
@ -303,9 +329,10 @@ static void
|
|||
decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
|
||||
{
|
||||
unsigned npeel;
|
||||
struct niter_desc *desc;
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Considering peeling completely\n");
|
||||
fprintf (rtl_dump_file, "\n;; Considering peeling completely\n");
|
||||
|
||||
/* Skip non-innermost loops. */
|
||||
if (loop->inner)
|
||||
|
@ -346,26 +373,24 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
|
|||
}
|
||||
|
||||
/* Check for simple loops. */
|
||||
if (!loop->has_desc)
|
||||
{
|
||||
loop->simple = simple_loop_p (loop, &loop->desc);
|
||||
loop->has_desc = 1;
|
||||
}
|
||||
desc = get_simple_loop_desc (loop);
|
||||
|
||||
/* Check number of iterations. */
|
||||
if (!loop->simple || !loop->desc.const_iter)
|
||||
if (!desc->simple_p
|
||||
|| desc->assumptions
|
||||
|| !desc->const_iter)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (loop->desc.niter > npeel - 1)
|
||||
if (desc->niter > npeel - 1)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
{
|
||||
fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
|
||||
fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter);
|
||||
fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
|
||||
fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
|
||||
}
|
||||
return;
|
||||
|
@ -397,8 +422,8 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
|
|||
sbitmap wont_exit;
|
||||
unsigned HOST_WIDE_INT npeel;
|
||||
unsigned n_remove_edges, i;
|
||||
edge *remove_edges;
|
||||
struct loop_desc *desc = &loop->desc;
|
||||
edge *remove_edges, ei;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
|
||||
npeel = desc->niter;
|
||||
|
||||
|
@ -407,7 +432,7 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
|
|||
wont_exit = sbitmap_alloc (npeel + 1);
|
||||
sbitmap_ones (wont_exit);
|
||||
RESET_BIT (wont_exit, 0);
|
||||
if (desc->may_be_zero)
|
||||
if (desc->noloop_assumptions)
|
||||
RESET_BIT (wont_exit, 1);
|
||||
|
||||
remove_edges = xcalloc (npeel, sizeof (edge));
|
||||
|
@ -427,19 +452,24 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
|
|||
free (remove_edges);
|
||||
}
|
||||
|
||||
ei = desc->in_edge;
|
||||
free_simple_loop_desc (loop);
|
||||
|
||||
/* Now remove the unreachable part of the last iteration and cancel
|
||||
the loop. */
|
||||
remove_path (loops, desc->in_edge);
|
||||
remove_path (loops, ei);
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
|
||||
}
|
||||
|
||||
/* Decide whether to unroll LOOP iterating constant number of times and how much. */
|
||||
|
||||
static void
|
||||
decide_unroll_constant_iterations (struct loop *loop, int flags)
|
||||
{
|
||||
unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i;
|
||||
unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
|
||||
struct niter_desc *desc;
|
||||
|
||||
if (!(flags & UAP_UNROLL))
|
||||
{
|
||||
|
@ -448,7 +478,8 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
|
|||
}
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n");
|
||||
fprintf (rtl_dump_file,
|
||||
"\n;; Considering unrolling loop with constant number of iterations\n");
|
||||
|
||||
/* nunroll = total number of copies of the original loop body in
|
||||
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
|
||||
|
@ -468,14 +499,10 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
|
|||
}
|
||||
|
||||
/* Check for simple loops. */
|
||||
if (!loop->has_desc)
|
||||
{
|
||||
loop->simple = simple_loop_p (loop, &loop->desc);
|
||||
loop->has_desc = 1;
|
||||
}
|
||||
desc = get_simple_loop_desc (loop);
|
||||
|
||||
/* Check number of iterations. */
|
||||
if (!loop->simple || !loop->desc.const_iter)
|
||||
if (!desc->simple_p || !desc->const_iter || desc->assumptions)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
|
||||
|
@ -483,7 +510,7 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
|
|||
}
|
||||
|
||||
/* Check whether the loop rolls enough to consider. */
|
||||
if (loop->desc.niter < 2 * nunroll)
|
||||
if (desc->niter < 2 * nunroll)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
|
||||
|
@ -497,16 +524,17 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
|
|||
best_copies = 2 * nunroll + 10;
|
||||
|
||||
i = 2 * nunroll + 2;
|
||||
if ((unsigned) i - 1 >= loop->desc.niter)
|
||||
i = loop->desc.niter - 2;
|
||||
if (i - 1 >= desc->niter)
|
||||
i = desc->niter - 2;
|
||||
|
||||
for (; i >= nunroll - 1; i--)
|
||||
{
|
||||
unsigned exit_mod = loop->desc.niter % (i + 1);
|
||||
unsigned exit_mod = desc->niter % (i + 1);
|
||||
|
||||
if (loop->desc.postincr)
|
||||
if (!loop_exit_at_end_p (loop))
|
||||
n_copies = exit_mod + i + 1;
|
||||
else if (exit_mod != (unsigned) i || loop->desc.may_be_zero)
|
||||
else if (exit_mod != (unsigned) i
|
||||
|| desc->noloop_assumptions != NULL_RTX)
|
||||
n_copies = exit_mod + i + 2;
|
||||
else
|
||||
n_copies = i + 1;
|
||||
|
@ -524,6 +552,11 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
|
|||
|
||||
loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
|
||||
loop->lpt_decision.times = best_unroll;
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file,
|
||||
";; Decided to unroll the constant times rolling loop, %d times.\n",
|
||||
loop->lpt_decision.times);
|
||||
}
|
||||
|
||||
/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
|
||||
|
@ -554,11 +587,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
unsigned n_remove_edges, i;
|
||||
edge *remove_edges;
|
||||
unsigned max_unroll = loop->lpt_decision.times;
|
||||
struct loop_desc *desc = &loop->desc;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
bool exit_at_end = loop_exit_at_end_p (loop);
|
||||
|
||||
niter = desc->niter;
|
||||
|
||||
if (niter <= (unsigned) max_unroll + 1)
|
||||
if (niter <= max_unroll + 1)
|
||||
abort (); /* Should not get here (such loop should be peeled instead). */
|
||||
|
||||
exit_mod = niter % (max_unroll + 1);
|
||||
|
@ -569,9 +603,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
|
||||
n_remove_edges = 0;
|
||||
|
||||
if (desc->postincr)
|
||||
if (!exit_at_end)
|
||||
{
|
||||
/* Counter is incremented after the exit test; leave exit test
|
||||
/* The exit is not at the end of the loop; leave exit test
|
||||
in the first copy, so that the loops that start with test
|
||||
of exit condition have continuous body after unrolling. */
|
||||
|
||||
|
@ -580,15 +614,22 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
|
||||
/* Peel exit_mod iterations. */
|
||||
RESET_BIT (wont_exit, 0);
|
||||
if (desc->may_be_zero)
|
||||
if (desc->noloop_assumptions)
|
||||
RESET_BIT (wont_exit, 1);
|
||||
|
||||
if (exit_mod
|
||||
&& !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
loops, exit_mod,
|
||||
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
|
||||
DLTHE_FLAG_UPDATE_FREQ))
|
||||
abort ();
|
||||
if (exit_mod)
|
||||
{
|
||||
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
loops, exit_mod,
|
||||
wont_exit, desc->out_edge,
|
||||
remove_edges, &n_remove_edges,
|
||||
DLTHE_FLAG_UPDATE_FREQ))
|
||||
abort ();
|
||||
|
||||
desc->noloop_assumptions = NULL_RTX;
|
||||
desc->niter -= exit_mod;
|
||||
desc->niter_max -= exit_mod;
|
||||
}
|
||||
|
||||
SET_BIT (wont_exit, 1);
|
||||
}
|
||||
|
@ -602,12 +643,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
|
||||
/* We know that niter >= max_unroll + 2; so we do not need to care of
|
||||
case when we would exit before reaching the loop. So just peel
|
||||
exit_mod + 1 iterations.
|
||||
*/
|
||||
if (exit_mod != (unsigned) max_unroll || desc->may_be_zero)
|
||||
exit_mod + 1 iterations. */
|
||||
if (exit_mod != max_unroll
|
||||
|| desc->noloop_assumptions)
|
||||
{
|
||||
RESET_BIT (wont_exit, 0);
|
||||
if (desc->may_be_zero)
|
||||
if (desc->noloop_assumptions)
|
||||
RESET_BIT (wont_exit, 1);
|
||||
|
||||
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
|
@ -616,6 +657,10 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
DLTHE_FLAG_UPDATE_FREQ))
|
||||
abort ();
|
||||
|
||||
desc->niter -= exit_mod + 1;
|
||||
desc->niter_max -= exit_mod + 1;
|
||||
desc->noloop_assumptions = NULL_RTX;
|
||||
|
||||
SET_BIT (wont_exit, 0);
|
||||
SET_BIT (wont_exit, 1);
|
||||
}
|
||||
|
@ -632,6 +677,27 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
|
||||
free (wont_exit);
|
||||
|
||||
if (exit_at_end)
|
||||
{
|
||||
basic_block exit_block = desc->in_edge->src->rbi->copy;
|
||||
/* Find a new in and out edge; they are in the last copy we have made. */
|
||||
|
||||
if (exit_block->succ->dest == desc->out_edge->dest)
|
||||
{
|
||||
desc->out_edge = exit_block->succ;
|
||||
desc->in_edge = exit_block->succ->succ_next;
|
||||
}
|
||||
else
|
||||
{
|
||||
desc->out_edge = exit_block->succ->succ_next;
|
||||
desc->in_edge = exit_block->succ;
|
||||
}
|
||||
}
|
||||
|
||||
desc->niter /= max_unroll + 1;
|
||||
desc->niter_max /= max_unroll + 1;
|
||||
desc->niter_expr = GEN_INT (desc->niter);
|
||||
|
||||
/* Remove the edges. */
|
||||
for (i = 0; i < n_remove_edges; i++)
|
||||
remove_path (loops, remove_edges[i]);
|
||||
|
@ -647,6 +713,7 @@ static void
|
|||
decide_unroll_runtime_iterations (struct loop *loop, int flags)
|
||||
{
|
||||
unsigned nunroll, nunroll_by_av, i;
|
||||
struct niter_desc *desc;
|
||||
|
||||
if (!(flags & UAP_UNROLL))
|
||||
{
|
||||
|
@ -655,7 +722,8 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
|
|||
}
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n");
|
||||
fprintf (rtl_dump_file,
|
||||
"\n;; Considering unrolling loop with runtime computable number of iterations\n");
|
||||
|
||||
/* nunroll = total number of copies of the original loop body in
|
||||
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
|
||||
|
@ -675,21 +743,18 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
|
|||
}
|
||||
|
||||
/* Check for simple loops. */
|
||||
if (!loop->has_desc)
|
||||
{
|
||||
loop->simple = simple_loop_p (loop, &loop->desc);
|
||||
loop->has_desc = 1;
|
||||
}
|
||||
desc = get_simple_loop_desc (loop);
|
||||
|
||||
/* Check simpleness. */
|
||||
if (!loop->simple)
|
||||
if (!desc->simple_p || desc->assumptions)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n");
|
||||
fprintf (rtl_dump_file,
|
||||
";; Unable to prove that the number of iterations can be counted in runtime\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (loop->desc.const_iter)
|
||||
if (desc->const_iter)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
|
||||
|
@ -706,10 +771,16 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
|
|||
|
||||
/* Success; now force nunroll to be power of 2, as we are unable to
|
||||
cope with overflows in computation of number of iterations. */
|
||||
for (i = 1; 2 * i <= nunroll; i *= 2);
|
||||
for (i = 1; 2 * i <= nunroll; i *= 2)
|
||||
continue;
|
||||
|
||||
loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
|
||||
loop->lpt_decision.times = i - 1;
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file,
|
||||
";; Decided to unroll the runtime computable times rolling loop, %d times.\n",
|
||||
loop->lpt_decision.times);
|
||||
}
|
||||
|
||||
/* Unroll LOOP for that we are able to count number of iterations in runtime
|
||||
|
@ -746,7 +817,7 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
|
|||
static void
|
||||
unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
||||
{
|
||||
rtx niter, init_code, branch_code, jump, label;
|
||||
rtx old_niter, niter, init_code, branch_code, tmp;
|
||||
unsigned i, j, p;
|
||||
basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
|
||||
unsigned n_dom_bbs;
|
||||
|
@ -756,7 +827,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
|||
edge *remove_edges, e;
|
||||
bool extra_zero_check, last_may_exit;
|
||||
unsigned max_unroll = loop->lpt_decision.times;
|
||||
struct loop_desc *desc = &loop->desc;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
bool exit_at_end = loop_exit_at_end_p (loop);
|
||||
|
||||
/* Remember blocks whose dominators will have to be updated. */
|
||||
dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
|
||||
|
@ -777,7 +849,7 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
|||
}
|
||||
free (body);
|
||||
|
||||
if (desc->postincr)
|
||||
if (!exit_at_end)
|
||||
{
|
||||
/* Leave exit in first copy (for explanation why see comment in
|
||||
unroll_loop_constant_iterations). */
|
||||
|
@ -798,15 +870,15 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
|||
|
||||
/* Get expression for number of iterations. */
|
||||
start_sequence ();
|
||||
niter = count_loop_iterations (desc, NULL, NULL);
|
||||
if (!niter)
|
||||
abort ();
|
||||
niter = force_operand (niter, NULL);
|
||||
old_niter = niter = gen_reg_rtx (desc->mode);
|
||||
tmp = force_operand (copy_rtx (desc->niter_expr), niter);
|
||||
if (tmp != niter)
|
||||
emit_move_insn (niter, tmp);
|
||||
|
||||
/* Count modulo by ANDing it with max_unroll; we use the fact that
|
||||
the number of unrollings is a power of two, and thus this is correct
|
||||
even if there is overflow in the computation. */
|
||||
niter = expand_simple_binop (GET_MODE (desc->var), AND,
|
||||
niter = expand_simple_binop (desc->mode, AND,
|
||||
niter,
|
||||
GEN_INT (max_unroll),
|
||||
NULL_RTX, 0, OPTAB_LIB_WIDEN);
|
||||
|
@ -824,10 +896,11 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
|||
|
||||
/* Peel the first copy of loop body (almost always we must leave exit test
|
||||
here; the only exception is when we have extra zero check and the number
|
||||
of iterations is reliable (i.e. comes out of NE condition). Also record
|
||||
the place of (possible) extra zero check. */
|
||||
of iterations is reliable. Also record the place of (possible) extra
|
||||
zero check. */
|
||||
sbitmap_zero (wont_exit);
|
||||
if (extra_zero_check && desc->cond == NE)
|
||||
if (extra_zero_check
|
||||
&& !desc->noloop_assumptions)
|
||||
SET_BIT (wont_exit, 1);
|
||||
ezc_swtch = loop_preheader_edge (loop)->src;
|
||||
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
|
@ -857,20 +930,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
|||
p = REG_BR_PROB_BASE / (i + 2);
|
||||
|
||||
preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
|
||||
label = block_label (preheader);
|
||||
start_sequence ();
|
||||
do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0,
|
||||
GET_MODE (desc->var), NULL_RTX, NULL_RTX,
|
||||
label);
|
||||
jump = get_last_insn ();
|
||||
JUMP_LABEL (jump) = label;
|
||||
REG_NOTES (jump)
|
||||
= gen_rtx_EXPR_LIST (REG_BR_PROB,
|
||||
GEN_INT (p), REG_NOTES (jump));
|
||||
|
||||
LABEL_NUSES (label)++;
|
||||
branch_code = get_insns ();
|
||||
end_sequence ();
|
||||
branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
|
||||
block_label (preheader), p, NULL_RTX);
|
||||
|
||||
swtch = loop_split_edge_with (swtch->pred, branch_code);
|
||||
set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
|
||||
|
@ -886,20 +947,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
|||
p = REG_BR_PROB_BASE / (max_unroll + 1);
|
||||
swtch = ezc_swtch;
|
||||
preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
|
||||
label = block_label (preheader);
|
||||
start_sequence ();
|
||||
do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0,
|
||||
GET_MODE (desc->var), NULL_RTX, NULL_RTX,
|
||||
label);
|
||||
jump = get_last_insn ();
|
||||
JUMP_LABEL (jump) = label;
|
||||
REG_NOTES (jump)
|
||||
= gen_rtx_EXPR_LIST (REG_BR_PROB,
|
||||
GEN_INT (p), REG_NOTES (jump));
|
||||
|
||||
LABEL_NUSES (label)++;
|
||||
branch_code = get_insns ();
|
||||
end_sequence ();
|
||||
branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
|
||||
block_label (preheader), p, NULL_RTX);
|
||||
|
||||
swtch = loop_split_edge_with (swtch->succ, branch_code);
|
||||
set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
|
||||
|
@ -925,11 +974,45 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
|||
|
||||
free (wont_exit);
|
||||
|
||||
if (exit_at_end)
|
||||
{
|
||||
basic_block exit_block = desc->in_edge->src->rbi->copy;
|
||||
/* Find a new in and out edge; they are in the last copy we have made. */
|
||||
|
||||
if (exit_block->succ->dest == desc->out_edge->dest)
|
||||
{
|
||||
desc->out_edge = exit_block->succ;
|
||||
desc->in_edge = exit_block->succ->succ_next;
|
||||
}
|
||||
else
|
||||
{
|
||||
desc->out_edge = exit_block->succ->succ_next;
|
||||
desc->in_edge = exit_block->succ;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove the edges. */
|
||||
for (i = 0; i < n_remove_edges; i++)
|
||||
remove_path (loops, remove_edges[i]);
|
||||
free (remove_edges);
|
||||
|
||||
/* We must be careful when updating the number of iterations due to
|
||||
preconditioning and the fact that the value must be valid at entry
|
||||
of the loop. After passing through the above code, we see that
|
||||
the correct new number of iterations is this: */
|
||||
if (desc->const_iter)
|
||||
abort ();
|
||||
desc->niter_expr =
|
||||
simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
|
||||
desc->niter_max /= max_unroll + 1;
|
||||
if (exit_at_end)
|
||||
{
|
||||
desc->niter_expr =
|
||||
simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
|
||||
desc->noloop_assumptions = NULL_RTX;
|
||||
desc->niter_max--;
|
||||
}
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file,
|
||||
";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
|
||||
|
@ -941,6 +1024,7 @@ static void
|
|||
decide_peel_simple (struct loop *loop, int flags)
|
||||
{
|
||||
unsigned npeel;
|
||||
struct niter_desc *desc;
|
||||
|
||||
if (!(flags & UAP_PEEL))
|
||||
{
|
||||
|
@ -949,7 +1033,7 @@ decide_peel_simple (struct loop *loop, int flags)
|
|||
}
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Considering simply peeling loop\n");
|
||||
fprintf (rtl_dump_file, "\n;; Considering simply peeling loop\n");
|
||||
|
||||
/* npeel = number of iterations to peel. */
|
||||
npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
|
||||
|
@ -965,14 +1049,10 @@ decide_peel_simple (struct loop *loop, int flags)
|
|||
}
|
||||
|
||||
/* Check for simple loops. */
|
||||
if (!loop->has_desc)
|
||||
{
|
||||
loop->simple = simple_loop_p (loop, &loop->desc);
|
||||
loop->has_desc = 1;
|
||||
}
|
||||
desc = get_simple_loop_desc (loop);
|
||||
|
||||
/* Check number of iterations. */
|
||||
if (loop->simple && loop->desc.const_iter)
|
||||
if (desc->simple_p && !desc->assumptions && desc->const_iter)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
|
||||
|
@ -981,7 +1061,7 @@ decide_peel_simple (struct loop *loop, int flags)
|
|||
|
||||
/* Do not simply peel loops with branches inside -- it increases number
|
||||
of mispredicts. */
|
||||
if (loop->desc.n_branches > 1)
|
||||
if (num_loop_branches (loop) > 1)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
|
||||
|
@ -1016,6 +1096,10 @@ decide_peel_simple (struct loop *loop, int flags)
|
|||
/* Success. */
|
||||
loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
|
||||
loop->lpt_decision.times = npeel;
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Decided to simply peel the loop, %d times.\n",
|
||||
loop->lpt_decision.times);
|
||||
}
|
||||
|
||||
/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
|
||||
|
@ -1037,6 +1121,7 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
|
|||
{
|
||||
sbitmap wont_exit;
|
||||
unsigned npeel = loop->lpt_decision.times;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
|
||||
wont_exit = sbitmap_alloc (npeel + 1);
|
||||
sbitmap_zero (wont_exit);
|
||||
|
@ -1048,6 +1133,23 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
|
|||
|
||||
free (wont_exit);
|
||||
|
||||
if (desc->simple_p)
|
||||
{
|
||||
if (desc->const_iter)
|
||||
{
|
||||
desc->niter -= npeel;
|
||||
desc->niter_expr = GEN_INT (desc->niter);
|
||||
desc->noloop_assumptions = NULL_RTX;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We cannot just update niter_expr, as its value might be clobbered
|
||||
inside loop. We could handle this by counting the number into
|
||||
temporary just like we do in runtime unrolling, but it does not
|
||||
seem worthwhile. */
|
||||
free_simple_loop_desc (loop);
|
||||
}
|
||||
}
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel);
|
||||
}
|
||||
|
@ -1057,6 +1159,7 @@ static void
|
|||
decide_unroll_stupid (struct loop *loop, int flags)
|
||||
{
|
||||
unsigned nunroll, nunroll_by_av, i;
|
||||
struct niter_desc *desc;
|
||||
|
||||
if (!(flags & UAP_UNROLL_ALL))
|
||||
{
|
||||
|
@ -1065,7 +1168,7 @@ decide_unroll_stupid (struct loop *loop, int flags)
|
|||
}
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n");
|
||||
fprintf (rtl_dump_file, "\n;; Considering unrolling loop stupidly\n");
|
||||
|
||||
/* nunroll = total number of copies of the original loop body in
|
||||
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
|
||||
|
@ -1085,14 +1188,10 @@ decide_unroll_stupid (struct loop *loop, int flags)
|
|||
}
|
||||
|
||||
/* Check for simple loops. */
|
||||
if (!loop->has_desc)
|
||||
{
|
||||
loop->simple = simple_loop_p (loop, &loop->desc);
|
||||
loop->has_desc = 1;
|
||||
}
|
||||
desc = get_simple_loop_desc (loop);
|
||||
|
||||
/* Check simpleness. */
|
||||
if (loop->simple)
|
||||
if (desc->simple_p && !desc->assumptions)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; The loop is simple\n");
|
||||
|
@ -1101,7 +1200,7 @@ decide_unroll_stupid (struct loop *loop, int flags)
|
|||
|
||||
/* Do not unroll loops with branches inside -- it increases number
|
||||
of mispredicts. */
|
||||
if (loop->desc.n_branches > 1)
|
||||
if (num_loop_branches (loop) > 1)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
|
||||
|
@ -1109,7 +1208,8 @@ decide_unroll_stupid (struct loop *loop, int flags)
|
|||
}
|
||||
|
||||
/* If we have profile feedback, check whether the loop rolls. */
|
||||
if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
|
||||
if (loop->header->count
|
||||
&& expected_loop_iterations (loop) < 2 * nunroll)
|
||||
{
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
|
||||
|
@ -1119,10 +1219,16 @@ decide_unroll_stupid (struct loop *loop, int flags)
|
|||
/* Success. Now force nunroll to be power of 2, as it seems that this
|
||||
improves results (partially because of better alignments, partially
|
||||
because of some dark magic). */
|
||||
for (i = 1; 2 * i <= nunroll; i *= 2);
|
||||
for (i = 1; 2 * i <= nunroll; i *= 2)
|
||||
continue;
|
||||
|
||||
loop->lpt_decision.decision = LPT_UNROLL_STUPID;
|
||||
loop->lpt_decision.times = i - 1;
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file,
|
||||
";; Decided to unroll the loop stupidly, %d times.\n",
|
||||
loop->lpt_decision.times);
|
||||
}
|
||||
|
||||
/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
|
||||
|
@ -1147,6 +1253,7 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
|
|||
{
|
||||
sbitmap wont_exit;
|
||||
unsigned nunroll = loop->lpt_decision.times;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
|
||||
wont_exit = sbitmap_alloc (nunroll + 1);
|
||||
sbitmap_zero (wont_exit);
|
||||
|
@ -1158,6 +1265,17 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
|
|||
|
||||
free (wont_exit);
|
||||
|
||||
if (desc->simple_p)
|
||||
{
|
||||
/* We indeed may get here provided that there are nontrivial assumptions
|
||||
for a loop to be really simple. We could update the counts, but the
|
||||
problem is that we are unable to decide which exit will be taken
|
||||
(not really true in case the number of iterations is constant,
|
||||
but noone will do anything with this information, so we do not
|
||||
worry about it). */
|
||||
desc->simple_p = false;
|
||||
}
|
||||
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n",
|
||||
nunroll, num_loop_insns (loop));
|
||||
|
|
|
@ -79,11 +79,63 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
with handling this case. */
|
||||
|
||||
static struct loop *unswitch_loop (struct loops *, struct loop *,
|
||||
basic_block);
|
||||
basic_block, rtx, rtx);
|
||||
static void unswitch_single_loop (struct loops *, struct loop *, rtx, int);
|
||||
static bool may_unswitch_on_p (basic_block, struct loop *,
|
||||
basic_block *);
|
||||
static rtx reversed_condition (rtx);
|
||||
static rtx may_unswitch_on (basic_block, struct loop *, rtx *);
|
||||
|
||||
/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
|
||||
true, with probability PROB. If CINSN is not NULL, it is the insn to copy
|
||||
in order to create a jump. */
|
||||
|
||||
rtx
|
||||
compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
|
||||
rtx cinsn)
|
||||
{
|
||||
rtx seq, jump, cond;
|
||||
enum machine_mode mode;
|
||||
|
||||
mode = GET_MODE (op0);
|
||||
if (mode == VOIDmode)
|
||||
mode = GET_MODE (op1);
|
||||
|
||||
start_sequence ();
|
||||
if (GET_MODE_CLASS (mode) == MODE_CC)
|
||||
{
|
||||
/* A hack -- there seems to be no easy generic way how to make a
|
||||
conditional jump from a ccmode comparison. */
|
||||
if (!cinsn)
|
||||
abort ();
|
||||
cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
|
||||
if (GET_CODE (cond) != comp
|
||||
|| !rtx_equal_p (op0, XEXP (cond, 0))
|
||||
|| !rtx_equal_p (op1, XEXP (cond, 1)))
|
||||
abort ();
|
||||
emit_jump_insn (copy_insn (PATTERN (cinsn)));
|
||||
jump = get_last_insn ();
|
||||
JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
|
||||
LABEL_NUSES (JUMP_LABEL (jump))++;
|
||||
redirect_jump (jump, label, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cinsn)
|
||||
abort ();
|
||||
|
||||
op0 = force_operand (op0, NULL_RTX);
|
||||
op1 = force_operand (op1, NULL_RTX);
|
||||
do_compare_rtx_and_jump (op0, op1, comp, 0,
|
||||
mode, NULL_RTX, NULL_RTX, label);
|
||||
jump = get_last_insn ();
|
||||
JUMP_LABEL (jump) = label;
|
||||
LABEL_NUSES (label)++;
|
||||
}
|
||||
REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
|
||||
REG_NOTES (jump));
|
||||
seq = get_insns ();
|
||||
end_sequence ();
|
||||
|
||||
return seq;
|
||||
}
|
||||
|
||||
/* Main entry point. Perform loop unswitching on all suitable LOOPS. */
|
||||
void
|
||||
|
@ -111,48 +163,82 @@ unswitch_loops (struct loops *loops)
|
|||
verify_loop_structure (loops);
|
||||
#endif
|
||||
}
|
||||
|
||||
iv_analysis_done ();
|
||||
}
|
||||
|
||||
/* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
|
||||
basic blocks (for what it means see comments below). List of basic blocks
|
||||
inside LOOP is provided in BODY to save time. */
|
||||
static bool
|
||||
may_unswitch_on_p (basic_block bb, struct loop *loop, basic_block *body)
|
||||
basic blocks (for what it means see comments below). In case condition
|
||||
compares loop invariant cc mode register, return the jump in CINSN. */
|
||||
|
||||
static rtx
|
||||
may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
|
||||
{
|
||||
rtx test;
|
||||
rtx test, at, insn, op[2];
|
||||
struct rtx_iv iv;
|
||||
unsigned i;
|
||||
enum machine_mode mode;
|
||||
|
||||
/* BB must end in a simple conditional jump. */
|
||||
if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next)
|
||||
return false;
|
||||
return NULL_RTX;
|
||||
if (!any_condjump_p (BB_END (bb)))
|
||||
return false;
|
||||
return NULL_RTX;
|
||||
|
||||
/* With branches inside loop. */
|
||||
if (!flow_bb_inside_loop_p (loop, bb->succ->dest)
|
||||
|| !flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest))
|
||||
return false;
|
||||
return NULL_RTX;
|
||||
|
||||
/* It must be executed just once each iteration (because otherwise we
|
||||
are unable to update dominator/irreducible loop information correctly). */
|
||||
if (!just_once_each_iteration_p (loop, bb))
|
||||
return false;
|
||||
return NULL_RTX;
|
||||
|
||||
/* Condition must be invariant. We use just a stupid test of invariantness
|
||||
of the condition: all used regs must not be modified inside loop body. */
|
||||
test = get_condition (BB_END (bb), NULL, true);
|
||||
/* Condition must be invariant. */
|
||||
test = get_condition (BB_END (bb), &at, true);
|
||||
if (!test)
|
||||
return false;
|
||||
return NULL_RTX;
|
||||
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
if (modified_between_p (test, BB_HEAD (body[i]), NEXT_INSN (BB_END (body[i]))))
|
||||
return false;
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
op[i] = XEXP (test, i);
|
||||
|
||||
return true;
|
||||
if (CONSTANT_P (op[i]))
|
||||
continue;
|
||||
|
||||
insn = iv_get_reaching_def (at, op[i]);
|
||||
if (!iv_analyse (insn, op[i], &iv))
|
||||
return NULL_RTX;
|
||||
if (iv.step != const0_rtx
|
||||
|| iv.first_special)
|
||||
return NULL_RTX;
|
||||
|
||||
op[i] = get_iv_value (&iv, const0_rtx);
|
||||
}
|
||||
|
||||
mode = GET_MODE (op[0]);
|
||||
if (mode == VOIDmode)
|
||||
mode = GET_MODE (op[1]);
|
||||
if (GET_MODE_CLASS (mode) == MODE_CC)
|
||||
{
|
||||
if (at != BB_END (bb))
|
||||
return NULL_RTX;
|
||||
|
||||
*cinsn = BB_END (bb);
|
||||
if (!rtx_equal_p (op[0], XEXP (test, 0))
|
||||
|| !rtx_equal_p (op[1], XEXP (test, 1)))
|
||||
return NULL_RTX;
|
||||
|
||||
return test;
|
||||
}
|
||||
|
||||
return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
|
||||
op[0], op[1]));
|
||||
}
|
||||
|
||||
/* Reverses CONDition; returns NULL if we cannot. */
|
||||
static rtx
|
||||
rtx
|
||||
reversed_condition (rtx cond)
|
||||
{
|
||||
enum rtx_code reversed;
|
||||
|
@ -173,13 +259,10 @@ static void
|
|||
unswitch_single_loop (struct loops *loops, struct loop *loop,
|
||||
rtx cond_checked, int num)
|
||||
{
|
||||
basic_block *bbs, bb;
|
||||
basic_block *bbs;
|
||||
struct loop *nloop;
|
||||
unsigned i;
|
||||
int true_first;
|
||||
rtx cond, rcond, conds, rconds, acond, split_before;
|
||||
int always_true;
|
||||
int always_false;
|
||||
rtx cond, rcond, conds, rconds, acond, cinsn = NULL_RTX;
|
||||
int repeat;
|
||||
edge e;
|
||||
|
||||
|
@ -237,8 +320,9 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
|
|||
|
||||
/* Find a bb to unswitch on. */
|
||||
bbs = get_loop_body (loop);
|
||||
iv_analysis_loop_init (loop);
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
if (may_unswitch_on_p (bbs[i], loop, bbs))
|
||||
if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
|
||||
break;
|
||||
|
||||
if (i == loop->num_nodes)
|
||||
|
@ -247,39 +331,26 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
|
|||
return;
|
||||
}
|
||||
|
||||
if (!(cond = get_condition (BB_END (bbs[i]), &split_before, true)))
|
||||
abort ();
|
||||
rcond = reversed_condition (cond);
|
||||
if (rcond)
|
||||
rcond = canon_condition (rcond);
|
||||
|
||||
/* Check whether the result can be predicted. */
|
||||
always_true = 0;
|
||||
always_false = 0;
|
||||
for (acond = cond_checked; acond; acond = XEXP (acond, 1))
|
||||
{
|
||||
if (rtx_equal_p (cond, XEXP (acond, 0)))
|
||||
{
|
||||
always_true = 1;
|
||||
break;
|
||||
}
|
||||
if (rtx_equal_p (rcond, XEXP (acond, 0)))
|
||||
{
|
||||
always_false = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
simplify_using_condition (XEXP (acond, 0), &cond, NULL);
|
||||
|
||||
if (always_true)
|
||||
if (cond == const_true_rtx)
|
||||
{
|
||||
/* Remove false path. */
|
||||
for (e = bbs[i]->succ; !(e->flags & EDGE_FALLTHRU); e = e->succ_next);
|
||||
e = FALLTHRU_EDGE (bbs[i]);
|
||||
remove_path (loops, e);
|
||||
free (bbs);
|
||||
repeat = 1;
|
||||
}
|
||||
else if (always_false)
|
||||
else if (cond == const0_rtx)
|
||||
{
|
||||
/* Remove true path. */
|
||||
for (e = bbs[i]->succ; e->flags & EDGE_FALLTHRU; e = e->succ_next);
|
||||
e = BRANCH_EDGE (bbs[i]);
|
||||
remove_path (loops, e);
|
||||
free (bbs);
|
||||
repeat = 1;
|
||||
|
@ -293,21 +364,17 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
|
|||
else
|
||||
rconds = cond_checked;
|
||||
|
||||
/* Separate condition in a single basic block. */
|
||||
bb = split_loop_bb (bbs[i], PREV_INSN (split_before))->dest;
|
||||
free (bbs);
|
||||
true_first = !(bb->succ->flags & EDGE_FALLTHRU);
|
||||
if (rtl_dump_file)
|
||||
fprintf (rtl_dump_file, ";; Unswitching loop\n");
|
||||
|
||||
/* Unswitch the loop on this condition. */
|
||||
nloop = unswitch_loop (loops, loop, bb);
|
||||
nloop = unswitch_loop (loops, loop, bbs[i], cond, cinsn);
|
||||
if (!nloop)
|
||||
abort ();
|
||||
|
||||
/* Invoke itself on modified loops. */
|
||||
unswitch_single_loop (loops, nloop, true_first ? conds : rconds, num + 1);
|
||||
unswitch_single_loop (loops, loop, true_first ? rconds : conds, num + 1);
|
||||
unswitch_single_loop (loops, nloop, rconds, num + 1);
|
||||
unswitch_single_loop (loops, loop, conds, num + 1);
|
||||
|
||||
free_EXPR_LIST_node (conds);
|
||||
if (rcond)
|
||||
|
@ -316,17 +383,21 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
|
|||
|
||||
/* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON. We only support
|
||||
unswitching of innermost loops. UNSWITCH_ON must be executed in every
|
||||
iteration, i.e. it must dominate LOOP latch, and should only contain code
|
||||
for the condition we unswitch on. Returns NULL if impossible, new
|
||||
loop otherwise. */
|
||||
iteration, i.e. it must dominate LOOP latch. COND is the condition
|
||||
determining which loop is entered. Returns NULL if impossible, new loop
|
||||
otherwise. The new loop is entered if COND is true. If CINSN is not
|
||||
NULL, it is the insn in that COND is compared. */
|
||||
|
||||
static struct loop *
|
||||
unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
|
||||
unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on,
|
||||
rtx cond, rtx cinsn)
|
||||
{
|
||||
edge entry, latch_edge;
|
||||
edge entry, latch_edge, true_edge, false_edge, e;
|
||||
basic_block switch_bb, unswitch_on_alt, src;
|
||||
struct loop *nloop;
|
||||
sbitmap zero_bitmap;
|
||||
int irred_flag;
|
||||
int irred_flag, prob;
|
||||
rtx seq;
|
||||
|
||||
/* Some sanity checking. */
|
||||
if (!flow_bb_inside_loop_p (loop, unswitch_on))
|
||||
|
@ -343,12 +414,6 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
|
|||
if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->succ_next->dest))
|
||||
abort ();
|
||||
|
||||
/* Will we be able to perform redirection? */
|
||||
if (!any_condjump_p (BB_END (unswitch_on)))
|
||||
return NULL;
|
||||
if (!cfg_layout_can_duplicate_bb_p (unswitch_on))
|
||||
return NULL;
|
||||
|
||||
entry = loop_preheader_edge (loop);
|
||||
|
||||
/* Make a copy. */
|
||||
|
@ -365,10 +430,24 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
|
|||
|
||||
/* Record the block with condition we unswitch on. */
|
||||
unswitch_on_alt = unswitch_on->rbi->copy;
|
||||
true_edge = BRANCH_EDGE (unswitch_on_alt);
|
||||
false_edge = FALLTHRU_EDGE (unswitch_on);
|
||||
latch_edge = loop->latch->rbi->copy->succ;
|
||||
|
||||
/* Create a block with the condition. */
|
||||
prob = true_edge->probability;
|
||||
switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
|
||||
seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
|
||||
block_label (true_edge->dest),
|
||||
prob, cinsn);
|
||||
emit_insn_after (seq, BB_END (switch_bb));
|
||||
e = make_edge (switch_bb, true_edge->dest, 0);
|
||||
e->probability = prob;
|
||||
e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
|
||||
e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
|
||||
e->probability = false_edge->probability;
|
||||
e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;
|
||||
|
||||
/* Make a copy of the block containing the condition; we will use
|
||||
it as switch to decide which loop we want to use. */
|
||||
switch_bb = cfg_layout_duplicate_bb (unswitch_on, NULL);
|
||||
if (irred_flag)
|
||||
{
|
||||
switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
|
||||
|
@ -381,19 +460,14 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
|
|||
switch_bb->succ->flags &= ~EDGE_IRREDUCIBLE_LOOP;
|
||||
switch_bb->succ->succ_next->flags &= ~EDGE_IRREDUCIBLE_LOOP;
|
||||
}
|
||||
unswitch_on->rbi->copy = unswitch_on_alt;
|
||||
|
||||
/* Loopify from the copy of LOOP body, constructing the new loop. */
|
||||
for (latch_edge = loop->latch->rbi->copy->succ;
|
||||
latch_edge->dest != loop->header;
|
||||
latch_edge = latch_edge->succ_next);
|
||||
nloop = loopify (loops, latch_edge,
|
||||
loop->header->rbi->copy->pred, switch_bb);
|
||||
|
||||
/* Remove branches that are now unreachable in new loops. We rely on the
|
||||
fact that cfg_layout_duplicate_bb reverses list of edges. */
|
||||
remove_path (loops, unswitch_on->succ);
|
||||
remove_path (loops, unswitch_on_alt->succ);
|
||||
/* Remove branches that are now unreachable in new loops. */
|
||||
remove_path (loops, true_edge);
|
||||
remove_path (loops, false_edge);
|
||||
|
||||
/* One of created loops do not have to be subloop of the outer loop now,
|
||||
so fix its placement in loop data structure. */
|
||||
|
|
|
@ -406,13 +406,16 @@ estimate_probability (struct loops *loops_info)
|
|||
unsigned j;
|
||||
int exits;
|
||||
struct loop *loop = loops_info->parray[i];
|
||||
struct loop_desc desc;
|
||||
struct niter_desc desc;
|
||||
unsigned HOST_WIDE_INT niter;
|
||||
|
||||
flow_loop_scan (loop, LOOP_EXIT_EDGES);
|
||||
exits = loop->num_exits;
|
||||
|
||||
if (simple_loop_p (loop, &desc) && desc.const_iter)
|
||||
iv_analysis_loop_init (loop);
|
||||
find_simple_exit (loop, &desc);
|
||||
|
||||
if (desc.simple_p && desc.const_iter)
|
||||
{
|
||||
int prob;
|
||||
niter = desc.niter + 1;
|
||||
|
@ -472,6 +475,8 @@ estimate_probability (struct loops *loops_info)
|
|||
free (bbs);
|
||||
}
|
||||
|
||||
iv_analysis_done ();
|
||||
|
||||
/* Attempt to predict conditional jumps using a number of heuristics. */
|
||||
FOR_EACH_BB (bb)
|
||||
{
|
||||
|
|
11
gcc/rtl.h
11
gcc/rtl.h
|
@ -2361,4 +2361,15 @@ extern void tracer (void);
|
|||
/* In var-tracking.c */
|
||||
extern void variable_tracking_main (void);
|
||||
|
||||
/* In stor-layout.c. */
|
||||
extern void get_mode_bounds (enum machine_mode, int, rtx *, rtx *);
|
||||
|
||||
/* In loop-unswitch.c */
|
||||
extern rtx reversed_condition (rtx);
|
||||
extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx);
|
||||
|
||||
/* In loop-iv.c */
|
||||
extern rtx canon_condition (rtx);
|
||||
extern void simplify_using_condition (rtx, rtx *, struct bitmap_head_def *);
|
||||
|
||||
#endif /* ! GCC_RTL_H */
|
||||
|
|
|
@ -2118,4 +2118,27 @@ get_best_mode (int bitsize, int bitpos, unsigned int align,
|
|||
return mode;
|
||||
}
|
||||
|
||||
/* Gets minimal and maximal values for MODE (signed or unsigned depending on
|
||||
SIGN). */
|
||||
|
||||
void
|
||||
get_mode_bounds (enum machine_mode mode, int sign, rtx *mmin, rtx *mmax)
|
||||
{
|
||||
int size = GET_MODE_BITSIZE (mode);
|
||||
|
||||
if (size > HOST_BITS_PER_WIDE_INT)
|
||||
abort ();
|
||||
|
||||
if (sign)
|
||||
{
|
||||
*mmin = GEN_INT (-((unsigned HOST_WIDE_INT) 1 << (size - 1)));
|
||||
*mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1)) - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
*mmin = const0_rtx;
|
||||
*mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1) << 1) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
#include "gt-stor-layout.h"
|
||||
|
|
11
gcc/toplev.c
11
gcc/toplev.c
|
@ -3034,11 +3034,16 @@ static void
|
|||
rest_of_handle_loop2 (tree decl, rtx insns)
|
||||
{
|
||||
struct loops *loops;
|
||||
basic_block bb;
|
||||
|
||||
timevar_push (TV_LOOP);
|
||||
open_dump_file (DFI_loop2, decl);
|
||||
if (rtl_dump_file)
|
||||
dump_flow_info (rtl_dump_file);
|
||||
|
||||
/* Initialize structures for layout changes. */
|
||||
cfg_layout_initialize ();
|
||||
|
||||
loops = loop_optimizer_init (rtl_dump_file);
|
||||
|
||||
if (loops)
|
||||
|
@ -3056,6 +3061,12 @@ rest_of_handle_loop2 (tree decl, rtx insns)
|
|||
loop_optimizer_finalize (loops, rtl_dump_file);
|
||||
}
|
||||
|
||||
/* Finalize layout changes. */
|
||||
FOR_EACH_BB (bb)
|
||||
if (bb->next_bb != EXIT_BLOCK_PTR)
|
||||
bb->rbi->next = bb->next_bb;
|
||||
cfg_layout_finalize ();
|
||||
|
||||
cleanup_cfg (CLEANUP_EXPENSIVE);
|
||||
delete_trivially_dead_insns (insns, max_reg_num ());
|
||||
reg_scan (insns, max_reg_num (), 0);
|
||||
|
|
Loading…
Reference in New Issue