Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.

* Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.
	* basic-block.h (struct reorder_block_def): Add copy_number field.
	* cfgloop.h (biv_p): Declare.
	* cfgloopmanip.c (duplicate_loop_to_header_edge): Set copy_number.
	* common.opt (fsplit-ivs-in-unroller): New flag.
	* loop-iv.c (biv_p): New function.
	* loop-unroll.c: Include hashtab.h and recog.h.
	(struct iv_to_split, struct split_ivs_info): New types.
	(analyze_ivs_to_split, si_info_start_duplication, split_ivs_in_copies,
	free_si_info, si_info_hash, si_info_eq, analyze_iv_to_split_insn,
	determine_split_iv_delta, get_ivts_expr, allocate_basic_variable,
	insert_base_initialization, split_iv): New functions.
	(peel_loop_completely, unroll_loop_constant_iterations,
	unroll_loop_runtime_iterations, peel_loop_simple, unroll_loop_stupid):
	Use them.
	* doc/invoke.texi (-fsplit-ivs-in-unroller): Document.

Co-Authored-By: Steven Bosscher <stevenb@suse.de>

From-SVN: r87487
This commit is contained in:
Zdenek Dvorak 2004-09-14 10:05:46 +02:00 committed by Zdenek Dvorak
parent d840495b15
commit 113d659afd
9 changed files with 533 additions and 8 deletions

View File

@ -1,3 +1,23 @@
2004-09-14 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
Steven Bosscher <stevenb@suse.de>
* Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.
* basic-block.h (struct reorder_block_def): Add copy_number field.
* cfgloop.h (biv_p): Declare.
* cfgloopmanip.c (duplicate_loop_to_header_edge): Set copy_number.
* common.opt (fsplit-ivs-in-unroller): New flag.
* loop-iv.c (biv_p): New function.
* loop-unroll.c: Include hashtab.h and recog.h.
(struct iv_to_split, struct split_ivs_info): New types.
(analyze_ivs_to_split, si_info_start_duplication, split_ivs_in_copies,
free_si_info, si_info_hash, si_info_eq, analyze_iv_to_split_insn,
determine_split_iv_delta, get_ivts_expr, allocate_basic_variable,
insert_base_initialization, split_iv): New functions.
(peel_loop_completely, unroll_loop_constant_iterations,
unroll_loop_runtime_iterations, peel_loop_simple, unroll_loop_stupid):
Use them.
* doc/invoke.texi (-fsplit-ivs-in-unroller): Document.
2004-09-14 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
* tree-cfg.c (thread_jumps): Update dominators correctly in

View File

@ -2028,7 +2028,7 @@ loop-unswitch.o : loop-unswitch.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
output.h $(EXPR_H) coretypes.h $(TM_H)
loop-unroll.o: loop-unroll.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
$(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(CFGLAYOUT_H) $(PARAMS_H) \
output.h $(EXPR_H) coretypes.h $(TM_H)
output.h $(EXPR_H) coretypes.h $(TM_H) $(HASHTAB_H) $(RECOG_H)
dominance.o : dominance.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
hard-reg-set.h $(BASIC_BLOCK_H) et-forest.h
et-forest.o : et-forest.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) et-forest.h alloc-pool.h

View File

@ -295,6 +295,7 @@ typedef struct reorder_block_def
/* Used by loop copying. */
basic_block copy;
int duplicated;
int copy_number;
/* These fields are used by bb-reorder pass. */
int visited;

View File

@ -410,6 +410,7 @@ extern void iv_analysis_loop_init (struct loop *);
extern rtx iv_get_reaching_def (rtx, rtx);
extern bool iv_analyze (rtx, rtx, struct rtx_iv *);
extern rtx get_iv_value (struct rtx_iv *, rtx);
extern bool biv_p (rtx, rtx);
extern void find_simple_exit (struct loop *, struct niter_desc *);
extern void iv_number_of_iterations (struct loop *, rtx, rtx,
struct niter_desc *);

View File

@ -991,6 +991,9 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, struct loops *loops,
/* Copy bbs. */
copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop);
for (i = 0; i < n; i++)
new_bbs[i]->rbi->copy_number = j + 1;
/* Note whether the blocks and edges belong to an irreducible loop. */
if (add_irreducible_flag)
{
@ -1069,6 +1072,8 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, struct loops *loops,
int n_dom_bbs,j;
bb = bbs[i];
bb->rbi->copy_number = 0;
n_dom_bbs = get_dominated_by (CDI_DOMINATORS, bb, &dom_bbs);
for (j = 0; j < n_dom_bbs; j++)
{

View File

@ -744,6 +744,10 @@ fspeculative-prefetching
Common Report Var(flag_speculative_prefetching)
Use value profiling for speculative prefetching
fsplit-ivs-in-unroller
Common Report Var(flag_split_ivs_in_unroller) Init(1)
Split lifetimes of induction variables when loops are unrolled.
; Emit code to probe the stack, to help detect stack overflow; also
; may cause large objects to be allocated dynamically.
fstack-check

View File

@ -315,7 +315,7 @@ Objective-C and Objective-C++ Dialects}.
-fsignaling-nans -fsingle-precision-constant -fspeculative-prefetching @gol
-fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol
-funroll-all-loops -funroll-loops -fpeel-loops @gol
-funswitch-loops @gol
-fsplit-ivs-in-unroller -funswitch-loops @gol
-ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol
-ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
-ftree-dominator-opts -ftree-dse -ftree-copyrename @gol
@ -4696,6 +4696,20 @@ the loop is entered. This usually makes programs run more slowly.
@option{-funroll-all-loops} implies the same options as
@option{-funroll-loops},
@item -fsplit-ivs-in-unroller
@opindex -fsplit-ivs-in-unroller
Enables expressing of values of induction variables in later iterations
of the unrolled loop using the value in the first iteration. This breaks
long dependency chains, thus improving efficiency of the scheduling passes
(for best results, @option{-fweb} should be used as well).
Combination of @option{-fweb} and CSE is often sufficient to obtain the
same effect. However in cases the loop body is more complicated than
a single basic block, this is not reliable. It also does not work at all
on some of the architectures due to restrictions in the CSE pass.
This optimization is enabled by default.
@item -fprefetch-loop-arrays
@opindex fprefetch-loop-arrays
If supported by the target machine, generate instructions to prefetch

View File

@ -1183,6 +1183,24 @@ iv_analyze (rtx insn, rtx def, struct rtx_iv *iv)
return iv->base != NULL_RTX;
}
/* Checks whether definition of register REG in INSN a basic induction
variable. IV analysis must have been initialized (via a call to
iv_analysis_loop_init) for this function to produce a result. */
bool
biv_p (rtx insn, rtx reg)
{
struct rtx_iv iv;
if (!REG_P (reg))
return false;
if (last_def[REGNO (reg)] != insn)
return false;
return iv_analyze_biv (reg, &iv);
}
/* Calculates value of IV at ITERATION-th iteration. */
rtx

View File

@ -30,6 +30,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "params.h"
#include "output.h"
#include "expr.h"
#include "hashtab.h"
#include "recog.h"
/* This pass performs loop unrolling and peeling. We only perform these
optimizations on innermost loops (with single exception) because
@ -66,6 +68,28 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
showed that this choice may affect performance in order of several %.
*/
/* Information about induction variables to split. */
struct iv_to_split
{
rtx insn; /* The insn in that the induction variable occurs. */
rtx base_var; /* The variable on that the values in the further
iterations are based. */
rtx step; /* Step of the induction variable. */
unsigned n_loc;
unsigned loc[3]; /* Location where the definition of the induction
variable occurs in the insn. For example if
N_LOC is 2, the expression is located at
XEXP (XEXP (single_set, loc[0]), loc[1]). */
};
struct split_ivs_info
{
htab_t insns_to_split; /* A hashtable of insns to split. */
unsigned first_new_block; /* The first basic block that was
duplicated. */
};
static void decide_unrolling_and_peeling (struct loops *, int);
static void peel_loops_completely (struct loops *, int);
static void decide_peel_simple (struct loop *, int);
@ -79,6 +103,10 @@ static void peel_loop_completely (struct loops *, struct loop *);
static void unroll_loop_stupid (struct loops *, struct loop *);
static void unroll_loop_constant_iterations (struct loops *, struct loop *);
static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
static struct split_ivs_info *analyze_ivs_to_split (struct loop *);
static void si_info_start_duplication (struct split_ivs_info *);
static void split_ivs_in_copies (struct split_ivs_info *, unsigned, bool, bool);
static void free_si_info (struct split_ivs_info *);
/* Unroll and/or peel (depending on FLAGS) LOOPS. */
void
@ -116,7 +144,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
{
case LPT_PEEL_COMPLETELY:
/* Already done. */
abort ();
gcc_unreachable ();
case LPT_PEEL_SIMPLE:
peel_loop_simple (loops, loop);
break;
@ -133,7 +161,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
check = false;
break;
default:
abort ();
gcc_unreachable ();
}
if (check)
{
@ -428,6 +456,7 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
unsigned n_remove_edges, i;
edge *remove_edges, ei;
struct niter_desc *desc = get_simple_loop_desc (loop);
struct split_ivs_info *si_info = NULL;
npeel = desc->niter;
@ -442,6 +471,10 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
remove_edges = xcalloc (npeel, sizeof (edge));
n_remove_edges = 0;
if (flag_split_ivs_in_unroller)
si_info = analyze_ivs_to_split (loop);
si_info_start_duplication (si_info);
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
loops, npeel,
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
@ -450,6 +483,12 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
free (wont_exit);
if (si_info)
{
split_ivs_in_copies (si_info, npeel, false, true);
free_si_info (si_info);
}
/* Remove the exit edges. */
for (i = 0; i < n_remove_edges; i++)
remove_path (loops, remove_edges[i]);
@ -597,11 +636,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
unsigned max_unroll = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
bool exit_at_end = loop_exit_at_end_p (loop);
struct split_ivs_info *si_info = NULL;
niter = desc->niter;
if (niter <= max_unroll + 1)
abort (); /* Should not get here (such loop should be peeled instead). */
/* Should not get here (such loop should be peeled instead). */
gcc_assert (niter > max_unroll + 1);
exit_mod = niter % (max_unroll + 1);
@ -611,6 +651,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
n_remove_edges = 0;
if (flag_split_ivs_in_unroller)
si_info = analyze_ivs_to_split (loop);
if (!exit_at_end)
{
/* The exit is not at the end of the loop; leave exit test
@ -627,6 +670,7 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
if (exit_mod)
{
si_info_start_duplication (si_info);
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
loops, exit_mod,
wont_exit, desc->out_edge,
@ -634,6 +678,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
DLTHE_FLAG_UPDATE_FREQ))
abort ();
if (si_info && exit_mod > 1)
split_ivs_in_copies (si_info, exit_mod, false, false);
desc->noloop_assumptions = NULL_RTX;
desc->niter -= exit_mod;
desc->niter_max -= exit_mod;
@ -659,12 +706,16 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
if (desc->noloop_assumptions)
RESET_BIT (wont_exit, 1);
si_info_start_duplication (si_info);
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
loops, exit_mod + 1,
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
if (si_info && exit_mod > 0)
split_ivs_in_copies (si_info, exit_mod + 1, false, false);
desc->niter -= exit_mod + 1;
desc->niter_max -= exit_mod + 1;
desc->noloop_assumptions = NULL_RTX;
@ -677,12 +728,19 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
}
/* Now unroll the loop. */
si_info_start_duplication (si_info);
if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
loops, max_unroll,
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
if (si_info)
{
split_ivs_in_copies (si_info, max_unroll, true, true);
free_si_info (si_info);
}
free (wont_exit);
if (exit_at_end)
@ -842,6 +900,10 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
unsigned max_unroll = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
bool exit_at_end = loop_exit_at_end_p (loop);
struct split_ivs_info *si_info = NULL;
if (flag_split_ivs_in_unroller)
si_info = analyze_ivs_to_split (loop);
/* Remember blocks whose dominators will have to be updated. */
dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
@ -979,12 +1041,19 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
sbitmap_ones (wont_exit);
RESET_BIT (wont_exit, may_exit_copy);
si_info_start_duplication (si_info);
if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
loops, max_unroll,
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
if (si_info)
{
split_ivs_in_copies (si_info, max_unroll, true, true);
free_si_info (si_info);
}
free (wont_exit);
if (exit_at_end)
@ -1013,8 +1082,7 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
preconditioning and the fact that the value must be valid at entry
of the loop. After passing through the above code, we see that
the correct new number of iterations is this: */
if (desc->const_iter)
abort ();
gcc_assert (!desc->const_iter);
desc->niter_expr =
simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
desc->niter_max /= max_unroll + 1;
@ -1138,10 +1206,15 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
sbitmap wont_exit;
unsigned npeel = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
struct split_ivs_info *si_info = NULL;
if (flag_split_ivs_in_unroller && npeel > 1)
si_info = analyze_ivs_to_split (loop);
wont_exit = sbitmap_alloc (npeel + 1);
sbitmap_zero (wont_exit);
si_info_start_duplication (si_info);
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
loops, npeel, wont_exit, NULL, NULL, NULL,
DLTHE_FLAG_UPDATE_FREQ))
@ -1149,6 +1222,12 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
free (wont_exit);
if (si_info)
{
split_ivs_in_copies (si_info, npeel, false, false);
free_si_info (si_info);
}
if (desc->simple_p)
{
if (desc->const_iter)
@ -1271,15 +1350,26 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
sbitmap wont_exit;
unsigned nunroll = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
struct split_ivs_info *si_info = NULL;
if (flag_split_ivs_in_unroller)
si_info = analyze_ivs_to_split (loop);
wont_exit = sbitmap_alloc (nunroll + 1);
sbitmap_zero (wont_exit);
si_info_start_duplication (si_info);
if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
loops, nunroll, wont_exit, NULL, NULL, NULL,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
if (si_info)
{
split_ivs_in_copies (si_info, nunroll, true, true);
free_si_info (si_info);
}
free (wont_exit);
if (desc->simple_p)
@ -1297,3 +1387,375 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
nunroll, num_loop_insns (loop));
}
/* A hash function for information about insns to split. */
static hashval_t
si_info_hash (const void *ivts)
{
return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
}
/* An equality functions for information about insns to split. */
static int
si_info_eq (const void *ivts1, const void *ivts2)
{
const struct iv_to_split *i1 = ivts1;
const struct iv_to_split *i2 = ivts2;
return i1->insn == i2->insn;
}
/* Determine whether there is an induction variable in INSN that
we would like to split during unrolling. Return NULL if INSN
contains no interesting IVs. Otherwise, allocate an IV_TO_SPLIT
structure, fill it with the relevant information and return a
pointer to it. */
static struct iv_to_split *
analyze_iv_to_split_insn (rtx insn)
{
rtx set, dest;
struct rtx_iv iv;
struct iv_to_split *ivts;
/* For now we just split the basic induction variables. Later this may be
extended for example by selecting also addresses of memory references. */
set = single_set (insn);
if (!set)
return NULL;
dest = SET_DEST (set);
if (!REG_P (dest))
return NULL;
if (!biv_p (insn, dest))
return NULL;
if (!iv_analyze (insn, dest, &iv))
abort ();
if (iv.step == const0_rtx
|| iv.mode != iv.extend_mode)
return NULL;
/* Record the insn to split. */
ivts = xmalloc (sizeof (struct iv_to_split));
ivts->insn = insn;
ivts->base_var = NULL_RTX;
ivts->step = iv.step;
ivts->n_loc = 1;
ivts->loc[0] = 1;
return ivts;
}
/* Determines which of induction variables in LOOP to split.
Return a SPLIT_IVS_INFO struct with the hash table filled
with all insns to split IVs in. The FIRST_NEW_BLOCK field
is undefined for the return value. */
static struct split_ivs_info *
analyze_ivs_to_split (struct loop *loop)
{
basic_block *body, bb;
unsigned i;
struct split_ivs_info *si_info = xcalloc (1, sizeof (struct split_ivs_info));
rtx insn;
struct iv_to_split *ivts;
PTR *slot;
si_info->insns_to_split = htab_create (5 * loop->num_nodes,
si_info_hash, si_info_eq, free);
iv_analysis_loop_init (loop);
body = get_loop_body (loop);
for (i = 0; i < loop->num_nodes; i++)
{
bb = body[i];
if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
continue;
FOR_BB_INSNS (bb, insn)
{
if (!INSN_P (insn))
continue;
ivts = analyze_iv_to_split_insn (insn);
if (!ivts)
continue;
slot = htab_find_slot (si_info->insns_to_split, ivts, INSERT);
*slot = ivts;
}
}
free (body);
return si_info;
}
/* Called just before loop duplication. Records start of duplicated area
to SI_INFO. */
static void
si_info_start_duplication (struct split_ivs_info *si_info)
{
if (si_info)
si_info->first_new_block = last_basic_block;
}
/* Determine the number of iterations between initialization of the base
variable and the current copy (N_COPY). N_COPIES is the total number
of newly created copies. UNROLLING is true if we are unrolling
(not peeling) the loop. */
static unsigned
determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
{
if (unrolling)
{
/* If we are unrolling, initialization is done in the original loop
body (number 0). */
return n_copy;
}
else
{
/* If we are peeling, the copy in that the initialization occurs has
number 1. The original loop (number 0) is the last. */
if (n_copy)
return n_copy - 1;
else
return n_copies;
}
}
/* Locate in EXPR the expression corresponding to the location recorded
in IVTS, and return a pointer to the RTX for this location. */
static rtx *
get_ivts_expr (rtx expr, struct iv_to_split *ivts)
{
unsigned i;
rtx *ret = &expr;
for (i = 0; i < ivts->n_loc; i++)
ret = &XEXP (*ret, ivts->loc[i]);
return ret;
}
/* Allocate basic variable for the induction variable chain. Callback for
htab_traverse. */
static int
allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
{
struct iv_to_split *ivts = *slot;
rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
ivts->base_var = gen_reg_rtx (GET_MODE (expr));
return 1;
}
/* Insert initialization of basic variable of IVTS before INSN, taking
the initial value from INSN. */
static void
insert_base_initialization (struct iv_to_split *ivts, rtx insn)
{
rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
rtx seq;
start_sequence ();
expr = force_operand (expr, ivts->base_var);
if (expr != ivts->base_var)
emit_move_insn (ivts->base_var, expr);
seq = get_insns ();
end_sequence ();
emit_insn_before (seq, insn);
}
/* Replace the use of induction variable described in IVTS in INSN
by base variable + DELTA * step. */
static void
split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
{
rtx expr, *loc, seq, incr, var;
enum machine_mode mode = GET_MODE (ivts->base_var);
rtx src, dest, set;
/* Construct base + DELTA * step. */
if (!delta)
expr = ivts->base_var;
else
{
incr = simplify_gen_binary (MULT, mode,
ivts->step, gen_int_mode (delta, mode));
expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
ivts->base_var, incr);
}
/* Figure out where to do the replacement. */
loc = get_ivts_expr (single_set (insn), ivts);
/* If we can make the replacement right away, we're done. */
if (validate_change (insn, loc, expr, 0))
return;
/* Otherwise, force EXPR into a register and try again. */
start_sequence ();
var = gen_reg_rtx (mode);
expr = force_operand (expr, var);
if (expr != var)
emit_move_insn (var, expr);
seq = get_insns ();
end_sequence ();
emit_insn_before (seq, insn);
if (validate_change (insn, loc, var, 0))
return;
/* The last chance. Try recreating the assignment in insn
completely from scratch. */
set = single_set (insn);
gcc_assert (set);
start_sequence ();
*loc = var;
src = copy_rtx (SET_SRC (set));
dest = copy_rtx (SET_DEST (set));
src = force_operand (src, dest);
if (src != dest)
emit_move_insn (dest, src);
seq = get_insns ();
end_sequence ();
emit_insn_before (seq, insn);
delete_insn (insn);
}
/* Splits induction variables (that are marked in SI_INFO) in copies of loop.
I.e. replace
i = i + 1;
...
i = i + 1;
...
i = i + 1;
...
type chains by
i0 = i + 1
...
i = i0 + 1
...
i = i0 + 2
...
UNROLLING is true if we unrolled (not peeled) the loop.
REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
the loop (as it should happen in complete unrolling, but not in ordinary
peeling of the loop). */
static void
split_ivs_in_copies (struct split_ivs_info *si_info, unsigned n_copies,
bool unrolling, bool rewrite_original_loop)
{
unsigned i, delta;
basic_block bb, orig_bb;
rtx insn, orig_insn, next;
struct iv_to_split ivts_templ, *ivts;
/* Sanity check -- we need to put initialization in the original loop
body. */
gcc_assert (!unrolling || rewrite_original_loop);
/* Allocate the basic variables (i0). */
htab_traverse (si_info->insns_to_split, allocate_basic_variable, NULL);
for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
{
bb = BASIC_BLOCK (i);
orig_bb = bb->rbi->original;
delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
unrolling);
orig_insn = BB_HEAD (orig_bb);
for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
{
next = NEXT_INSN (insn);
if (!INSN_P (insn))
continue;
while (!INSN_P (orig_insn))
orig_insn = NEXT_INSN (orig_insn);
ivts_templ.insn = orig_insn;
ivts = htab_find (si_info->insns_to_split, &ivts_templ);
if (ivts)
{
#ifdef ENABLE_CHECKING
if (!rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)))
abort ();
#endif
if (!delta)
insert_base_initialization (ivts, insn);
split_iv (ivts, insn, delta);
}
orig_insn = NEXT_INSN (orig_insn);
}
}
if (!rewrite_original_loop)
return;
/* Rewrite also the original loop body. Find them as originals of the blocks
in the last copied iteration, i.e. those that have
bb->rbi->original->copy == bb. */
for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
{
bb = BASIC_BLOCK (i);
orig_bb = bb->rbi->original;
if (orig_bb->rbi->copy != bb)
continue;
delta = determine_split_iv_delta (0, n_copies, unrolling);
for (orig_insn = BB_HEAD (orig_bb);
orig_insn != NEXT_INSN (BB_END (bb));
orig_insn = next)
{
next = NEXT_INSN (orig_insn);
if (!INSN_P (orig_insn))
continue;
ivts_templ.insn = orig_insn;
ivts = htab_find (si_info->insns_to_split, &ivts_templ);
if (!ivts)
continue;
if (!delta)
insert_base_initialization (ivts, orig_insn);
split_iv (ivts, orig_insn, delta);
}
}
}
/* Release SI_INFO. */
static void
free_si_info (struct split_ivs_info *si_info)
{
htab_delete (si_info->insns_to_split);
free (si_info);
}