Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.
* Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency. * basic-block.h (struct reorder_block_def): Add copy_number field. * cfgloop.h (biv_p): Declare. * cfgloopmanip.c (duplicate_loop_to_header_edge): Set copy_number. * common.opt (fsplit-ivs-in-unroller): New flag. * loop-iv.c (biv_p): New function. * loop-unroll.c: Include hashtab.h and recog.h. (struct iv_to_split, struct split_ivs_info): New types. (analyze_ivs_to_split, si_info_start_duplication, split_ivs_in_copies, free_si_info, si_info_hash, si_info_eq, analyze_iv_to_split_insn, determine_split_iv_delta, get_ivts_expr, allocate_basic_variable, insert_base_initialization, split_iv): New functions. (peel_loop_completely, unroll_loop_constant_iterations, unroll_loop_runtime_iterations, peel_loop_simple, unroll_loop_stupid): Use them. * doc/invoke.texi (-fsplit-ivs-in-unroller): Document. Co-Authored-By: Steven Bosscher <stevenb@suse.de> From-SVN: r87487
This commit is contained in:
parent
d840495b15
commit
113d659afd
|
@ -1,3 +1,23 @@
|
|||
2004-09-14 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
|
||||
Steven Bosscher <stevenb@suse.de>
|
||||
|
||||
* Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.
|
||||
* basic-block.h (struct reorder_block_def): Add copy_number field.
|
||||
* cfgloop.h (biv_p): Declare.
|
||||
* cfgloopmanip.c (duplicate_loop_to_header_edge): Set copy_number.
|
||||
* common.opt (fsplit-ivs-in-unroller): New flag.
|
||||
* loop-iv.c (biv_p): New function.
|
||||
* loop-unroll.c: Include hashtab.h and recog.h.
|
||||
(struct iv_to_split, struct split_ivs_info): New types.
|
||||
(analyze_ivs_to_split, si_info_start_duplication, split_ivs_in_copies,
|
||||
free_si_info, si_info_hash, si_info_eq, analyze_iv_to_split_insn,
|
||||
determine_split_iv_delta, get_ivts_expr, allocate_basic_variable,
|
||||
insert_base_initialization, split_iv): New functions.
|
||||
(peel_loop_completely, unroll_loop_constant_iterations,
|
||||
unroll_loop_runtime_iterations, peel_loop_simple, unroll_loop_stupid):
|
||||
Use them.
|
||||
* doc/invoke.texi (-fsplit-ivs-in-unroller): Document.
|
||||
|
||||
2004-09-14 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
|
||||
|
||||
* tree-cfg.c (thread_jumps): Update dominators correctly in
|
||||
|
|
|
@ -2028,7 +2028,7 @@ loop-unswitch.o : loop-unswitch.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
|
|||
output.h $(EXPR_H) coretypes.h $(TM_H)
|
||||
loop-unroll.o: loop-unroll.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
|
||||
$(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(CFGLAYOUT_H) $(PARAMS_H) \
|
||||
output.h $(EXPR_H) coretypes.h $(TM_H)
|
||||
output.h $(EXPR_H) coretypes.h $(TM_H) $(HASHTAB_H) $(RECOG_H)
|
||||
dominance.o : dominance.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
|
||||
hard-reg-set.h $(BASIC_BLOCK_H) et-forest.h
|
||||
et-forest.o : et-forest.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) et-forest.h alloc-pool.h
|
||||
|
|
|
@ -295,6 +295,7 @@ typedef struct reorder_block_def
|
|||
/* Used by loop copying. */
|
||||
basic_block copy;
|
||||
int duplicated;
|
||||
int copy_number;
|
||||
|
||||
/* These fields are used by bb-reorder pass. */
|
||||
int visited;
|
||||
|
|
|
@ -410,6 +410,7 @@ extern void iv_analysis_loop_init (struct loop *);
|
|||
extern rtx iv_get_reaching_def (rtx, rtx);
|
||||
extern bool iv_analyze (rtx, rtx, struct rtx_iv *);
|
||||
extern rtx get_iv_value (struct rtx_iv *, rtx);
|
||||
extern bool biv_p (rtx, rtx);
|
||||
extern void find_simple_exit (struct loop *, struct niter_desc *);
|
||||
extern void iv_number_of_iterations (struct loop *, rtx, rtx,
|
||||
struct niter_desc *);
|
||||
|
|
|
@ -991,6 +991,9 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, struct loops *loops,
|
|||
/* Copy bbs. */
|
||||
copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop);
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
new_bbs[i]->rbi->copy_number = j + 1;
|
||||
|
||||
/* Note whether the blocks and edges belong to an irreducible loop. */
|
||||
if (add_irreducible_flag)
|
||||
{
|
||||
|
@ -1069,6 +1072,8 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, struct loops *loops,
|
|||
int n_dom_bbs,j;
|
||||
|
||||
bb = bbs[i];
|
||||
bb->rbi->copy_number = 0;
|
||||
|
||||
n_dom_bbs = get_dominated_by (CDI_DOMINATORS, bb, &dom_bbs);
|
||||
for (j = 0; j < n_dom_bbs; j++)
|
||||
{
|
||||
|
|
|
@ -744,6 +744,10 @@ fspeculative-prefetching
|
|||
Common Report Var(flag_speculative_prefetching)
|
||||
Use value profiling for speculative prefetching
|
||||
|
||||
fsplit-ivs-in-unroller
|
||||
Common Report Var(flag_split_ivs_in_unroller) Init(1)
|
||||
Split lifetimes of induction variables when loops are unrolled.
|
||||
|
||||
; Emit code to probe the stack, to help detect stack overflow; also
|
||||
; may cause large objects to be allocated dynamically.
|
||||
fstack-check
|
||||
|
|
|
@ -315,7 +315,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-fsignaling-nans -fsingle-precision-constant -fspeculative-prefetching @gol
|
||||
-fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol
|
||||
-funroll-all-loops -funroll-loops -fpeel-loops @gol
|
||||
-funswitch-loops @gol
|
||||
-fsplit-ivs-in-unroller -funswitch-loops @gol
|
||||
-ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol
|
||||
-ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
|
||||
-ftree-dominator-opts -ftree-dse -ftree-copyrename @gol
|
||||
|
@ -4696,6 +4696,20 @@ the loop is entered. This usually makes programs run more slowly.
|
|||
@option{-funroll-all-loops} implies the same options as
|
||||
@option{-funroll-loops},
|
||||
|
||||
@item -fsplit-ivs-in-unroller
|
||||
@opindex -fsplit-ivs-in-unroller
|
||||
Enables expressing of values of induction variables in later iterations
|
||||
of the unrolled loop using the value in the first iteration. This breaks
|
||||
long dependency chains, thus improving efficiency of the scheduling passes
|
||||
(for best results, @option{-fweb} should be used as well).
|
||||
|
||||
Combination of @option{-fweb} and CSE is often sufficient to obtain the
|
||||
same effect. However in cases the loop body is more complicated than
|
||||
a single basic block, this is not reliable. It also does not work at all
|
||||
on some of the architectures due to restrictions in the CSE pass.
|
||||
|
||||
This optimization is enabled by default.
|
||||
|
||||
@item -fprefetch-loop-arrays
|
||||
@opindex fprefetch-loop-arrays
|
||||
If supported by the target machine, generate instructions to prefetch
|
||||
|
|
|
@ -1183,6 +1183,24 @@ iv_analyze (rtx insn, rtx def, struct rtx_iv *iv)
|
|||
return iv->base != NULL_RTX;
|
||||
}
|
||||
|
||||
/* Checks whether definition of register REG in INSN a basic induction
|
||||
variable. IV analysis must have been initialized (via a call to
|
||||
iv_analysis_loop_init) for this function to produce a result. */
|
||||
|
||||
bool
|
||||
biv_p (rtx insn, rtx reg)
|
||||
{
|
||||
struct rtx_iv iv;
|
||||
|
||||
if (!REG_P (reg))
|
||||
return false;
|
||||
|
||||
if (last_def[REGNO (reg)] != insn)
|
||||
return false;
|
||||
|
||||
return iv_analyze_biv (reg, &iv);
|
||||
}
|
||||
|
||||
/* Calculates value of IV at ITERATION-th iteration. */
|
||||
|
||||
rtx
|
||||
|
|
|
@ -30,6 +30,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
#include "params.h"
|
||||
#include "output.h"
|
||||
#include "expr.h"
|
||||
#include "hashtab.h"
|
||||
#include "recog.h"
|
||||
|
||||
/* This pass performs loop unrolling and peeling. We only perform these
|
||||
optimizations on innermost loops (with single exception) because
|
||||
|
@ -66,6 +68,28 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
showed that this choice may affect performance in order of several %.
|
||||
*/
|
||||
|
||||
/* Information about induction variables to split. */
|
||||
|
||||
struct iv_to_split
|
||||
{
|
||||
rtx insn; /* The insn in that the induction variable occurs. */
|
||||
rtx base_var; /* The variable on that the values in the further
|
||||
iterations are based. */
|
||||
rtx step; /* Step of the induction variable. */
|
||||
unsigned n_loc;
|
||||
unsigned loc[3]; /* Location where the definition of the induction
|
||||
variable occurs in the insn. For example if
|
||||
N_LOC is 2, the expression is located at
|
||||
XEXP (XEXP (single_set, loc[0]), loc[1]). */
|
||||
};
|
||||
|
||||
struct split_ivs_info
|
||||
{
|
||||
htab_t insns_to_split; /* A hashtable of insns to split. */
|
||||
unsigned first_new_block; /* The first basic block that was
|
||||
duplicated. */
|
||||
};
|
||||
|
||||
static void decide_unrolling_and_peeling (struct loops *, int);
|
||||
static void peel_loops_completely (struct loops *, int);
|
||||
static void decide_peel_simple (struct loop *, int);
|
||||
|
@ -79,6 +103,10 @@ static void peel_loop_completely (struct loops *, struct loop *);
|
|||
static void unroll_loop_stupid (struct loops *, struct loop *);
|
||||
static void unroll_loop_constant_iterations (struct loops *, struct loop *);
|
||||
static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
|
||||
static struct split_ivs_info *analyze_ivs_to_split (struct loop *);
|
||||
static void si_info_start_duplication (struct split_ivs_info *);
|
||||
static void split_ivs_in_copies (struct split_ivs_info *, unsigned, bool, bool);
|
||||
static void free_si_info (struct split_ivs_info *);
|
||||
|
||||
/* Unroll and/or peel (depending on FLAGS) LOOPS. */
|
||||
void
|
||||
|
@ -116,7 +144,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
|
|||
{
|
||||
case LPT_PEEL_COMPLETELY:
|
||||
/* Already done. */
|
||||
abort ();
|
||||
gcc_unreachable ();
|
||||
case LPT_PEEL_SIMPLE:
|
||||
peel_loop_simple (loops, loop);
|
||||
break;
|
||||
|
@ -133,7 +161,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
|
|||
check = false;
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
gcc_unreachable ();
|
||||
}
|
||||
if (check)
|
||||
{
|
||||
|
@ -428,6 +456,7 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
|
|||
unsigned n_remove_edges, i;
|
||||
edge *remove_edges, ei;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
struct split_ivs_info *si_info = NULL;
|
||||
|
||||
npeel = desc->niter;
|
||||
|
||||
|
@ -442,6 +471,10 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
|
|||
remove_edges = xcalloc (npeel, sizeof (edge));
|
||||
n_remove_edges = 0;
|
||||
|
||||
if (flag_split_ivs_in_unroller)
|
||||
si_info = analyze_ivs_to_split (loop);
|
||||
|
||||
si_info_start_duplication (si_info);
|
||||
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
loops, npeel,
|
||||
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
|
||||
|
@ -450,6 +483,12 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
|
|||
|
||||
free (wont_exit);
|
||||
|
||||
if (si_info)
|
||||
{
|
||||
split_ivs_in_copies (si_info, npeel, false, true);
|
||||
free_si_info (si_info);
|
||||
}
|
||||
|
||||
/* Remove the exit edges. */
|
||||
for (i = 0; i < n_remove_edges; i++)
|
||||
remove_path (loops, remove_edges[i]);
|
||||
|
@ -597,11 +636,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
unsigned max_unroll = loop->lpt_decision.times;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
bool exit_at_end = loop_exit_at_end_p (loop);
|
||||
struct split_ivs_info *si_info = NULL;
|
||||
|
||||
niter = desc->niter;
|
||||
|
||||
if (niter <= max_unroll + 1)
|
||||
abort (); /* Should not get here (such loop should be peeled instead). */
|
||||
/* Should not get here (such loop should be peeled instead). */
|
||||
gcc_assert (niter > max_unroll + 1);
|
||||
|
||||
exit_mod = niter % (max_unroll + 1);
|
||||
|
||||
|
@ -611,6 +651,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
|
||||
n_remove_edges = 0;
|
||||
|
||||
if (flag_split_ivs_in_unroller)
|
||||
si_info = analyze_ivs_to_split (loop);
|
||||
|
||||
if (!exit_at_end)
|
||||
{
|
||||
/* The exit is not at the end of the loop; leave exit test
|
||||
|
@ -627,6 +670,7 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
|
||||
if (exit_mod)
|
||||
{
|
||||
si_info_start_duplication (si_info);
|
||||
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
loops, exit_mod,
|
||||
wont_exit, desc->out_edge,
|
||||
|
@ -634,6 +678,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
DLTHE_FLAG_UPDATE_FREQ))
|
||||
abort ();
|
||||
|
||||
if (si_info && exit_mod > 1)
|
||||
split_ivs_in_copies (si_info, exit_mod, false, false);
|
||||
|
||||
desc->noloop_assumptions = NULL_RTX;
|
||||
desc->niter -= exit_mod;
|
||||
desc->niter_max -= exit_mod;
|
||||
|
@ -659,12 +706,16 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
if (desc->noloop_assumptions)
|
||||
RESET_BIT (wont_exit, 1);
|
||||
|
||||
si_info_start_duplication (si_info);
|
||||
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
loops, exit_mod + 1,
|
||||
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
|
||||
DLTHE_FLAG_UPDATE_FREQ))
|
||||
abort ();
|
||||
|
||||
if (si_info && exit_mod > 0)
|
||||
split_ivs_in_copies (si_info, exit_mod + 1, false, false);
|
||||
|
||||
desc->niter -= exit_mod + 1;
|
||||
desc->niter_max -= exit_mod + 1;
|
||||
desc->noloop_assumptions = NULL_RTX;
|
||||
|
@ -677,12 +728,19 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
|
|||
}
|
||||
|
||||
/* Now unroll the loop. */
|
||||
si_info_start_duplication (si_info);
|
||||
if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
|
||||
loops, max_unroll,
|
||||
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
|
||||
DLTHE_FLAG_UPDATE_FREQ))
|
||||
abort ();
|
||||
|
||||
if (si_info)
|
||||
{
|
||||
split_ivs_in_copies (si_info, max_unroll, true, true);
|
||||
free_si_info (si_info);
|
||||
}
|
||||
|
||||
free (wont_exit);
|
||||
|
||||
if (exit_at_end)
|
||||
|
@ -842,6 +900,10 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
|||
unsigned max_unroll = loop->lpt_decision.times;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
bool exit_at_end = loop_exit_at_end_p (loop);
|
||||
struct split_ivs_info *si_info = NULL;
|
||||
|
||||
if (flag_split_ivs_in_unroller)
|
||||
si_info = analyze_ivs_to_split (loop);
|
||||
|
||||
/* Remember blocks whose dominators will have to be updated. */
|
||||
dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
|
||||
|
@ -979,12 +1041,19 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
|||
sbitmap_ones (wont_exit);
|
||||
RESET_BIT (wont_exit, may_exit_copy);
|
||||
|
||||
si_info_start_duplication (si_info);
|
||||
if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
|
||||
loops, max_unroll,
|
||||
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
|
||||
DLTHE_FLAG_UPDATE_FREQ))
|
||||
abort ();
|
||||
|
||||
if (si_info)
|
||||
{
|
||||
split_ivs_in_copies (si_info, max_unroll, true, true);
|
||||
free_si_info (si_info);
|
||||
}
|
||||
|
||||
free (wont_exit);
|
||||
|
||||
if (exit_at_end)
|
||||
|
@ -1013,8 +1082,7 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
|
|||
preconditioning and the fact that the value must be valid at entry
|
||||
of the loop. After passing through the above code, we see that
|
||||
the correct new number of iterations is this: */
|
||||
if (desc->const_iter)
|
||||
abort ();
|
||||
gcc_assert (!desc->const_iter);
|
||||
desc->niter_expr =
|
||||
simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
|
||||
desc->niter_max /= max_unroll + 1;
|
||||
|
@ -1138,10 +1206,15 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
|
|||
sbitmap wont_exit;
|
||||
unsigned npeel = loop->lpt_decision.times;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
struct split_ivs_info *si_info = NULL;
|
||||
|
||||
if (flag_split_ivs_in_unroller && npeel > 1)
|
||||
si_info = analyze_ivs_to_split (loop);
|
||||
|
||||
wont_exit = sbitmap_alloc (npeel + 1);
|
||||
sbitmap_zero (wont_exit);
|
||||
|
||||
si_info_start_duplication (si_info);
|
||||
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
|
||||
loops, npeel, wont_exit, NULL, NULL, NULL,
|
||||
DLTHE_FLAG_UPDATE_FREQ))
|
||||
|
@ -1149,6 +1222,12 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
|
|||
|
||||
free (wont_exit);
|
||||
|
||||
if (si_info)
|
||||
{
|
||||
split_ivs_in_copies (si_info, npeel, false, false);
|
||||
free_si_info (si_info);
|
||||
}
|
||||
|
||||
if (desc->simple_p)
|
||||
{
|
||||
if (desc->const_iter)
|
||||
|
@ -1271,15 +1350,26 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
|
|||
sbitmap wont_exit;
|
||||
unsigned nunroll = loop->lpt_decision.times;
|
||||
struct niter_desc *desc = get_simple_loop_desc (loop);
|
||||
struct split_ivs_info *si_info = NULL;
|
||||
|
||||
if (flag_split_ivs_in_unroller)
|
||||
si_info = analyze_ivs_to_split (loop);
|
||||
|
||||
wont_exit = sbitmap_alloc (nunroll + 1);
|
||||
sbitmap_zero (wont_exit);
|
||||
|
||||
si_info_start_duplication (si_info);
|
||||
if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
|
||||
loops, nunroll, wont_exit, NULL, NULL, NULL,
|
||||
DLTHE_FLAG_UPDATE_FREQ))
|
||||
abort ();
|
||||
|
||||
if (si_info)
|
||||
{
|
||||
split_ivs_in_copies (si_info, nunroll, true, true);
|
||||
free_si_info (si_info);
|
||||
}
|
||||
|
||||
free (wont_exit);
|
||||
|
||||
if (desc->simple_p)
|
||||
|
@ -1297,3 +1387,375 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
|
|||
fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
|
||||
nunroll, num_loop_insns (loop));
|
||||
}
|
||||
|
||||
/* A hash function for information about insns to split. */
|
||||
|
||||
static hashval_t
|
||||
si_info_hash (const void *ivts)
|
||||
{
|
||||
return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
|
||||
}
|
||||
|
||||
/* An equality functions for information about insns to split. */
|
||||
|
||||
static int
|
||||
si_info_eq (const void *ivts1, const void *ivts2)
|
||||
{
|
||||
const struct iv_to_split *i1 = ivts1;
|
||||
const struct iv_to_split *i2 = ivts2;
|
||||
|
||||
return i1->insn == i2->insn;
|
||||
}
|
||||
|
||||
/* Determine whether there is an induction variable in INSN that
|
||||
we would like to split during unrolling. Return NULL if INSN
|
||||
contains no interesting IVs. Otherwise, allocate an IV_TO_SPLIT
|
||||
structure, fill it with the relevant information and return a
|
||||
pointer to it. */
|
||||
|
||||
static struct iv_to_split *
|
||||
analyze_iv_to_split_insn (rtx insn)
|
||||
{
|
||||
rtx set, dest;
|
||||
struct rtx_iv iv;
|
||||
struct iv_to_split *ivts;
|
||||
|
||||
/* For now we just split the basic induction variables. Later this may be
|
||||
extended for example by selecting also addresses of memory references. */
|
||||
set = single_set (insn);
|
||||
if (!set)
|
||||
return NULL;
|
||||
|
||||
dest = SET_DEST (set);
|
||||
if (!REG_P (dest))
|
||||
return NULL;
|
||||
|
||||
if (!biv_p (insn, dest))
|
||||
return NULL;
|
||||
|
||||
if (!iv_analyze (insn, dest, &iv))
|
||||
abort ();
|
||||
|
||||
if (iv.step == const0_rtx
|
||||
|| iv.mode != iv.extend_mode)
|
||||
return NULL;
|
||||
|
||||
/* Record the insn to split. */
|
||||
ivts = xmalloc (sizeof (struct iv_to_split));
|
||||
ivts->insn = insn;
|
||||
ivts->base_var = NULL_RTX;
|
||||
ivts->step = iv.step;
|
||||
ivts->n_loc = 1;
|
||||
ivts->loc[0] = 1;
|
||||
|
||||
return ivts;
|
||||
}
|
||||
|
||||
/* Determines which of induction variables in LOOP to split.
|
||||
Return a SPLIT_IVS_INFO struct with the hash table filled
|
||||
with all insns to split IVs in. The FIRST_NEW_BLOCK field
|
||||
is undefined for the return value. */
|
||||
|
||||
static struct split_ivs_info *
|
||||
analyze_ivs_to_split (struct loop *loop)
|
||||
{
|
||||
basic_block *body, bb;
|
||||
unsigned i;
|
||||
struct split_ivs_info *si_info = xcalloc (1, sizeof (struct split_ivs_info));
|
||||
rtx insn;
|
||||
struct iv_to_split *ivts;
|
||||
PTR *slot;
|
||||
|
||||
si_info->insns_to_split = htab_create (5 * loop->num_nodes,
|
||||
si_info_hash, si_info_eq, free);
|
||||
|
||||
iv_analysis_loop_init (loop);
|
||||
|
||||
body = get_loop_body (loop);
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
bb = body[i];
|
||||
if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
|
||||
continue;
|
||||
|
||||
FOR_BB_INSNS (bb, insn)
|
||||
{
|
||||
if (!INSN_P (insn))
|
||||
continue;
|
||||
|
||||
ivts = analyze_iv_to_split_insn (insn);
|
||||
|
||||
if (!ivts)
|
||||
continue;
|
||||
|
||||
slot = htab_find_slot (si_info->insns_to_split, ivts, INSERT);
|
||||
*slot = ivts;
|
||||
}
|
||||
}
|
||||
|
||||
free (body);
|
||||
|
||||
return si_info;
|
||||
}
|
||||
|
||||
/* Called just before loop duplication. Records start of duplicated area
|
||||
to SI_INFO. */
|
||||
|
||||
static void
|
||||
si_info_start_duplication (struct split_ivs_info *si_info)
|
||||
{
|
||||
if (si_info)
|
||||
si_info->first_new_block = last_basic_block;
|
||||
}
|
||||
|
||||
/* Determine the number of iterations between initialization of the base
|
||||
variable and the current copy (N_COPY). N_COPIES is the total number
|
||||
of newly created copies. UNROLLING is true if we are unrolling
|
||||
(not peeling) the loop. */
|
||||
|
||||
static unsigned
|
||||
determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
|
||||
{
|
||||
if (unrolling)
|
||||
{
|
||||
/* If we are unrolling, initialization is done in the original loop
|
||||
body (number 0). */
|
||||
return n_copy;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If we are peeling, the copy in that the initialization occurs has
|
||||
number 1. The original loop (number 0) is the last. */
|
||||
if (n_copy)
|
||||
return n_copy - 1;
|
||||
else
|
||||
return n_copies;
|
||||
}
|
||||
}
|
||||
|
||||
/* Locate in EXPR the expression corresponding to the location recorded
|
||||
in IVTS, and return a pointer to the RTX for this location. */
|
||||
|
||||
static rtx *
|
||||
get_ivts_expr (rtx expr, struct iv_to_split *ivts)
|
||||
{
|
||||
unsigned i;
|
||||
rtx *ret = &expr;
|
||||
|
||||
for (i = 0; i < ivts->n_loc; i++)
|
||||
ret = &XEXP (*ret, ivts->loc[i]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Allocate basic variable for the induction variable chain. Callback for
|
||||
htab_traverse. */
|
||||
|
||||
static int
|
||||
allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
|
||||
{
|
||||
struct iv_to_split *ivts = *slot;
|
||||
rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
|
||||
|
||||
ivts->base_var = gen_reg_rtx (GET_MODE (expr));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Insert initialization of basic variable of IVTS before INSN, taking
|
||||
the initial value from INSN. */
|
||||
|
||||
static void
|
||||
insert_base_initialization (struct iv_to_split *ivts, rtx insn)
|
||||
{
|
||||
rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
|
||||
rtx seq;
|
||||
|
||||
start_sequence ();
|
||||
expr = force_operand (expr, ivts->base_var);
|
||||
if (expr != ivts->base_var)
|
||||
emit_move_insn (ivts->base_var, expr);
|
||||
seq = get_insns ();
|
||||
end_sequence ();
|
||||
|
||||
emit_insn_before (seq, insn);
|
||||
}
|
||||
|
||||
/* Replace the use of induction variable described in IVTS in INSN
|
||||
by base variable + DELTA * step. */
|
||||
|
||||
static void
|
||||
split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
|
||||
{
|
||||
rtx expr, *loc, seq, incr, var;
|
||||
enum machine_mode mode = GET_MODE (ivts->base_var);
|
||||
rtx src, dest, set;
|
||||
|
||||
/* Construct base + DELTA * step. */
|
||||
if (!delta)
|
||||
expr = ivts->base_var;
|
||||
else
|
||||
{
|
||||
incr = simplify_gen_binary (MULT, mode,
|
||||
ivts->step, gen_int_mode (delta, mode));
|
||||
expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
|
||||
ivts->base_var, incr);
|
||||
}
|
||||
|
||||
/* Figure out where to do the replacement. */
|
||||
loc = get_ivts_expr (single_set (insn), ivts);
|
||||
|
||||
/* If we can make the replacement right away, we're done. */
|
||||
if (validate_change (insn, loc, expr, 0))
|
||||
return;
|
||||
|
||||
/* Otherwise, force EXPR into a register and try again. */
|
||||
start_sequence ();
|
||||
var = gen_reg_rtx (mode);
|
||||
expr = force_operand (expr, var);
|
||||
if (expr != var)
|
||||
emit_move_insn (var, expr);
|
||||
seq = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn_before (seq, insn);
|
||||
|
||||
if (validate_change (insn, loc, var, 0))
|
||||
return;
|
||||
|
||||
/* The last chance. Try recreating the assignment in insn
|
||||
completely from scratch. */
|
||||
set = single_set (insn);
|
||||
gcc_assert (set);
|
||||
|
||||
start_sequence ();
|
||||
*loc = var;
|
||||
src = copy_rtx (SET_SRC (set));
|
||||
dest = copy_rtx (SET_DEST (set));
|
||||
src = force_operand (src, dest);
|
||||
if (src != dest)
|
||||
emit_move_insn (dest, src);
|
||||
seq = get_insns ();
|
||||
end_sequence ();
|
||||
|
||||
emit_insn_before (seq, insn);
|
||||
delete_insn (insn);
|
||||
}
|
||||
|
||||
/* Splits induction variables (that are marked in SI_INFO) in copies of loop.
|
||||
I.e. replace
|
||||
|
||||
i = i + 1;
|
||||
...
|
||||
i = i + 1;
|
||||
...
|
||||
i = i + 1;
|
||||
...
|
||||
|
||||
type chains by
|
||||
|
||||
i0 = i + 1
|
||||
...
|
||||
i = i0 + 1
|
||||
...
|
||||
i = i0 + 2
|
||||
...
|
||||
|
||||
UNROLLING is true if we unrolled (not peeled) the loop.
|
||||
REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
|
||||
the loop (as it should happen in complete unrolling, but not in ordinary
|
||||
peeling of the loop). */
|
||||
|
||||
static void
|
||||
split_ivs_in_copies (struct split_ivs_info *si_info, unsigned n_copies,
|
||||
bool unrolling, bool rewrite_original_loop)
|
||||
{
|
||||
unsigned i, delta;
|
||||
basic_block bb, orig_bb;
|
||||
rtx insn, orig_insn, next;
|
||||
struct iv_to_split ivts_templ, *ivts;
|
||||
|
||||
/* Sanity check -- we need to put initialization in the original loop
|
||||
body. */
|
||||
gcc_assert (!unrolling || rewrite_original_loop);
|
||||
|
||||
/* Allocate the basic variables (i0). */
|
||||
htab_traverse (si_info->insns_to_split, allocate_basic_variable, NULL);
|
||||
|
||||
for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
|
||||
{
|
||||
bb = BASIC_BLOCK (i);
|
||||
orig_bb = bb->rbi->original;
|
||||
|
||||
delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
|
||||
unrolling);
|
||||
orig_insn = BB_HEAD (orig_bb);
|
||||
for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
|
||||
{
|
||||
next = NEXT_INSN (insn);
|
||||
if (!INSN_P (insn))
|
||||
continue;
|
||||
|
||||
while (!INSN_P (orig_insn))
|
||||
orig_insn = NEXT_INSN (orig_insn);
|
||||
|
||||
ivts_templ.insn = orig_insn;
|
||||
ivts = htab_find (si_info->insns_to_split, &ivts_templ);
|
||||
if (ivts)
|
||||
{
|
||||
|
||||
#ifdef ENABLE_CHECKING
|
||||
if (!rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)))
|
||||
abort ();
|
||||
#endif
|
||||
|
||||
if (!delta)
|
||||
insert_base_initialization (ivts, insn);
|
||||
split_iv (ivts, insn, delta);
|
||||
}
|
||||
orig_insn = NEXT_INSN (orig_insn);
|
||||
}
|
||||
}
|
||||
|
||||
if (!rewrite_original_loop)
|
||||
return;
|
||||
|
||||
/* Rewrite also the original loop body. Find them as originals of the blocks
|
||||
in the last copied iteration, i.e. those that have
|
||||
bb->rbi->original->copy == bb. */
|
||||
for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
|
||||
{
|
||||
bb = BASIC_BLOCK (i);
|
||||
orig_bb = bb->rbi->original;
|
||||
if (orig_bb->rbi->copy != bb)
|
||||
continue;
|
||||
|
||||
delta = determine_split_iv_delta (0, n_copies, unrolling);
|
||||
for (orig_insn = BB_HEAD (orig_bb);
|
||||
orig_insn != NEXT_INSN (BB_END (bb));
|
||||
orig_insn = next)
|
||||
{
|
||||
next = NEXT_INSN (orig_insn);
|
||||
|
||||
if (!INSN_P (orig_insn))
|
||||
continue;
|
||||
|
||||
ivts_templ.insn = orig_insn;
|
||||
ivts = htab_find (si_info->insns_to_split, &ivts_templ);
|
||||
if (!ivts)
|
||||
continue;
|
||||
|
||||
if (!delta)
|
||||
insert_base_initialization (ivts, orig_insn);
|
||||
split_iv (ivts, orig_insn, delta);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Release SI_INFO. */
|
||||
|
||||
static void
|
||||
free_si_info (struct split_ivs_info *si_info)
|
||||
{
|
||||
htab_delete (si_info->insns_to_split);
|
||||
free (si_info);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue