Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.

* Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency. * basic-block.h (struct reorder_block_def): Add copy_number field. * cfgloop.h (biv_p): Declare. * cfgloopmanip.c (duplicate_loop_to_header_edge): Set copy_number. * common.opt (fsplit-ivs-in-unroller): New flag. * loop-iv.c (biv_p): New function. * loop-unroll.c: Include hashtab.h and recog.h. (struct iv_to_split, struct split_ivs_info): New types. (analyze_ivs_to_split, si_info_start_duplication, split_ivs_in_copies, free_si_info, si_info_hash, si_info_eq, analyze_iv_to_split_insn, determine_split_iv_delta, get_ivts_expr, allocate_basic_variable, insert_base_initialization, split_iv): New functions. (peel_loop_completely, unroll_loop_constant_iterations, unroll_loop_runtime_iterations, peel_loop_simple, unroll_loop_stupid): Use them. * doc/invoke.texi (-fsplit-ivs-in-unroller): Document. Co-Authored-By: Steven Bosscher <stevenb@suse.de> From-SVN: r87487
2004-09-14 10:05:46 +02:00 · 2004-09-14 10:05:46 +02:00 · 113d659afd
parent d840495b15
commit 113d659afd
9 changed files with 533 additions and 8 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,23 @@
+2004-09-14  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>
+	    Steven Bosscher  <stevenb@suse.de>
+
+	* Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.
+	* basic-block.h (struct reorder_block_def): Add copy_number field.
+	* cfgloop.h (biv_p): Declare.
+	* cfgloopmanip.c (duplicate_loop_to_header_edge): Set copy_number.
+	* common.opt (fsplit-ivs-in-unroller): New flag.
+	* loop-iv.c (biv_p): New function.
+	* loop-unroll.c: Include hashtab.h and recog.h.
+	(struct iv_to_split, struct split_ivs_info): New types.
+	(analyze_ivs_to_split, si_info_start_duplication, split_ivs_in_copies,
+	free_si_info, si_info_hash, si_info_eq, analyze_iv_to_split_insn,
+	determine_split_iv_delta, get_ivts_expr, allocate_basic_variable,
+	insert_base_initialization, split_iv): New functions.
+	(peel_loop_completely, unroll_loop_constant_iterations,
+	unroll_loop_runtime_iterations, peel_loop_simple, unroll_loop_stupid):
+	Use them.
+	* doc/invoke.texi (-fsplit-ivs-in-unroller): Document.
+
 2004-09-14  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>

 	* tree-cfg.c (thread_jumps): Update dominators correctly in
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@ -2028,7 +2028,7 @@ loop-unswitch.o : loop-unswitch.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
   output.h $(EXPR_H) coretypes.h $(TM_H)
 loop-unroll.o: loop-unroll.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
   $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(CFGLAYOUT_H) $(PARAMS_H) \
-   output.h $(EXPR_H) coretypes.h $(TM_H)
+   output.h $(EXPR_H) coretypes.h $(TM_H) $(HASHTAB_H) $(RECOG_H)
 dominance.o : dominance.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
   hard-reg-set.h $(BASIC_BLOCK_H) et-forest.h
 et-forest.o : et-forest.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) et-forest.h alloc-pool.h
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@ -295,6 +295,7 @@ typedef struct reorder_block_def
  /* Used by loop copying.  */
  basic_block copy;
  int duplicated;
+  int copy_number;

  /* These fields are used by bb-reorder pass.  */
  int visited;
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@ -410,6 +410,7 @@ extern void iv_analysis_loop_init (struct loop *);
 extern rtx iv_get_reaching_def (rtx, rtx);
 extern bool iv_analyze (rtx, rtx, struct rtx_iv *);
 extern rtx get_iv_value (struct rtx_iv *, rtx);
+extern bool biv_p (rtx, rtx);
 extern void find_simple_exit (struct loop *, struct niter_desc *);
 extern void iv_number_of_iterations (struct loop *, rtx, rtx,
 				     struct niter_desc *);
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@ -991,6 +991,9 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, struct loops *loops,
      /* Copy bbs.  */
      copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop);

+      for (i = 0; i < n; i++)
+	new_bbs[i]->rbi->copy_number = j + 1;
+
      /* Note whether the blocks and edges belong to an irreducible loop.  */
      if (add_irreducible_flag)
 	{
@ -1069,6 +1072,8 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, struct loops *loops,
      int n_dom_bbs,j;

      bb = bbs[i];
+      bb->rbi->copy_number = 0;
+
      n_dom_bbs = get_dominated_by (CDI_DOMINATORS, bb, &dom_bbs);
      for (j = 0; j < n_dom_bbs; j++)
 	{
--- a/gcc/common.opt
+++ b/gcc/common.opt
@ -744,6 +744,10 @@ fspeculative-prefetching
 Common Report Var(flag_speculative_prefetching)
 Use value profiling for speculative prefetching

+fsplit-ivs-in-unroller
+Common Report Var(flag_split_ivs_in_unroller) Init(1)
+Split lifetimes of induction variables when loops are unrolled.
+
 ; Emit code to probe the stack, to help detect stack overflow; also
 ; may cause large objects to be allocated dynamically.
 fstack-check
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@ -315,7 +315,7 @@ Objective-C and Objective-C++ Dialects}.
 -fsignaling-nans -fsingle-precision-constant  -fspeculative-prefetching @gol
 -fstrength-reduce  -fstrict-aliasing  -ftracer  -fthread-jumps @gol
 -funroll-all-loops  -funroll-loops  -fpeel-loops @gol
-funswitch-loops @gol
+-fsplit-ivs-in-unroller -funswitch-loops @gol
 -ftree-pre  -ftree-ccp  -ftree-dce -ftree-loop-optimize @gol
 -ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
 -ftree-dominator-opts -ftree-dse -ftree-copyrename @gol
@ -4696,6 +4696,20 @@ the loop is entered.  This usually makes programs run more slowly.
@option{-funroll-all-loops} implies the same options as
@option{-funroll-loops},

+@item -fsplit-ivs-in-unroller
+@opindex -fsplit-ivs-in-unroller
+Enables expressing of values of induction variables in later iterations
+of the unrolled loop using the value in the first iteration.  This breaks
+long dependency chains, thus improving efficiency of the scheduling passes
+(for best results, @option{-fweb} should be used as well).
+
+Combination of @option{-fweb} and CSE is often sufficient to obtain the
+same effect. However in cases the loop body is more complicated than
+a single basic block, this is not reliable.  It also does not work at all
+on some of the architectures due to restrictions in the CSE pass.
+
+This optimization is enabled by default.
+
@item -fprefetch-loop-arrays
@opindex fprefetch-loop-arrays
 If supported by the target machine, generate instructions to prefetch
--- a/gcc/loop-iv.c
+++ b/gcc/loop-iv.c
@ -1183,6 +1183,24 @@ iv_analyze (rtx insn, rtx def, struct rtx_iv *iv)
  return iv->base != NULL_RTX;
 }

+/* Checks whether definition of register REG in INSN a basic induction
+   variable.  IV analysis must have been initialized (via a call to
+   iv_analysis_loop_init) for this function to produce a result.  */
+
+bool
+biv_p (rtx insn, rtx reg)
+{
+  struct rtx_iv iv;
+
+  if (!REG_P (reg))
+    return false;
+
+  if (last_def[REGNO (reg)] != insn)
+    return false;
+
+  return iv_analyze_biv (reg, &iv);
+}
+
 /* Calculates value of IV at ITERATION-th iteration.  */

 rtx
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@ -30,6 +30,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include "params.h"
 #include "output.h"
 #include "expr.h"
+#include "hashtab.h"
+#include "recog.h"

 /* This pass performs loop unrolling and peeling.  We only perform these
   optimizations on innermost loops (with single exception) because
@ -66,6 +68,28 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
   showed that this choice may affect performance in order of several %.
   */

+/* Information about induction variables to split.  */
+
+struct iv_to_split
+{
+  rtx insn;		/* The insn in that the induction variable occurs.  */
+  rtx base_var;		/* The variable on that the values in the further
+			   iterations are based.  */
+  rtx step;		/* Step of the induction variable.  */
+  unsigned n_loc;
+  unsigned loc[3];	/* Location where the definition of the induction
+			   variable occurs in the insn.  For example if
+			   N_LOC is 2, the expression is located at
+			   XEXP (XEXP (single_set, loc[0]), loc[1]).  */ 
+};
+
+struct split_ivs_info
+{
+  htab_t insns_to_split;	/* A hashtable of insns to split.  */
+  unsigned first_new_block;	/* The first basic block that was
+				   duplicated.  */
+};
+
 static void decide_unrolling_and_peeling (struct loops *, int);
 static void peel_loops_completely (struct loops *, int);
 static void decide_peel_simple (struct loop *, int);
@ -79,6 +103,10 @@ static void peel_loop_completely (struct loops *, struct loop *);
 static void unroll_loop_stupid (struct loops *, struct loop *);
 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
+static struct split_ivs_info *analyze_ivs_to_split (struct loop *);
+static void si_info_start_duplication (struct split_ivs_info *);
+static void split_ivs_in_copies (struct split_ivs_info *, unsigned, bool, bool);
+static void free_si_info (struct split_ivs_info *);

 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 void
@ -116,7 +144,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
 	{
 	case LPT_PEEL_COMPLETELY:
 	  /* Already done.  */
-	  abort ();
+	  gcc_unreachable ();
 	case LPT_PEEL_SIMPLE:
 	  peel_loop_simple (loops, loop);
 	  break;
@ -133,7 +161,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
 	  check = false;
 	  break;
 	default:
-	  abort ();
+	  gcc_unreachable ();
 	}
      if (check)
 	{
@ -428,6 +456,7 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
  unsigned n_remove_edges, i;
  edge *remove_edges, ei;
  struct niter_desc *desc = get_simple_loop_desc (loop);
+  struct split_ivs_info *si_info = NULL;

  npeel = desc->niter;

@ -442,6 +471,10 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
      remove_edges = xcalloc (npeel, sizeof (edge));
      n_remove_edges = 0;

+      if (flag_split_ivs_in_unroller)
+	si_info = analyze_ivs_to_split (loop);
+
+      si_info_start_duplication (si_info);
      if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 		loops, npeel,
 		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
@ -450,6 +483,12 @@ peel_loop_completely (struct loops *loops, struct loop *loop)

      free (wont_exit);

+      if (si_info)
+	{
+	  split_ivs_in_copies (si_info, npeel, false, true);
+	  free_si_info (si_info);
+	}
+
      /* Remove the exit edges.  */
      for (i = 0; i < n_remove_edges; i++)
 	remove_path (loops, remove_edges[i]);
@ -597,11 +636,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
  unsigned max_unroll = loop->lpt_decision.times;
  struct niter_desc *desc = get_simple_loop_desc (loop);
  bool exit_at_end = loop_exit_at_end_p (loop);
+  struct split_ivs_info *si_info = NULL;

  niter = desc->niter;

-  if (niter <= max_unroll + 1)
-    abort ();  /* Should not get here (such loop should be peeled instead).  */
+  /* Should not get here (such loop should be peeled instead).  */
+  gcc_assert (niter > max_unroll + 1);

  exit_mod = niter % (max_unroll + 1);

@ -611,6 +651,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
  remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
  n_remove_edges = 0;

+  if (flag_split_ivs_in_unroller)
+    si_info = analyze_ivs_to_split (loop);
+
  if (!exit_at_end)
    {
      /* The exit is not at the end of the loop; leave exit test
@ -627,6 +670,7 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)

      if (exit_mod)
 	{
+	  si_info_start_duplication (si_info);
 	  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 					      loops, exit_mod,
 					      wont_exit, desc->out_edge,
@ -634,6 +678,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 					      DLTHE_FLAG_UPDATE_FREQ))
 	    abort ();

+	  if (si_info && exit_mod > 1)
+	    split_ivs_in_copies (si_info, exit_mod, false, false);
+
 	  desc->noloop_assumptions = NULL_RTX;
 	  desc->niter -= exit_mod;
 	  desc->niter_max -= exit_mod;
@ -659,12 +706,16 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 	  if (desc->noloop_assumptions)
 	    RESET_BIT (wont_exit, 1);

+	  si_info_start_duplication (si_info);
 	  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 		loops, exit_mod + 1,
 		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 		DLTHE_FLAG_UPDATE_FREQ))
 	    abort ();

+	  if (si_info && exit_mod > 0)
+	    split_ivs_in_copies (si_info, exit_mod + 1, false, false);
+
 	  desc->niter -= exit_mod + 1;
 	  desc->niter_max -= exit_mod + 1;
 	  desc->noloop_assumptions = NULL_RTX;
@ -677,12 +728,19 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
    }

  /* Now unroll the loop.  */
+  si_info_start_duplication (si_info);
  if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 		loops, max_unroll,
 		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 		DLTHE_FLAG_UPDATE_FREQ))
    abort ();

+  if (si_info)
+    {
+      split_ivs_in_copies (si_info, max_unroll, true, true);
+      free_si_info (si_info);
+    }
+
  free (wont_exit);

  if (exit_at_end)
@ -842,6 +900,10 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
  unsigned max_unroll = loop->lpt_decision.times;
  struct niter_desc *desc = get_simple_loop_desc (loop);
  bool exit_at_end = loop_exit_at_end_p (loop);
+  struct split_ivs_info *si_info = NULL;
+
+  if (flag_split_ivs_in_unroller)
+    si_info = analyze_ivs_to_split (loop);

  /* Remember blocks whose dominators will have to be updated.  */
  dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
@ -979,12 +1041,19 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
  sbitmap_ones (wont_exit);
  RESET_BIT (wont_exit, may_exit_copy);

+  si_info_start_duplication (si_info);
  if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 		loops, max_unroll,
 		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 		DLTHE_FLAG_UPDATE_FREQ))
    abort ();

+  if (si_info)
+    {
+      split_ivs_in_copies (si_info, max_unroll, true, true);
+      free_si_info (si_info);
+    }
+
  free (wont_exit);

  if (exit_at_end)
@ -1013,8 +1082,7 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
     preconditioning and the fact that the value must be valid at entry
     of the loop.  After passing through the above code, we see that
     the correct new number of iterations is this:  */
-  if (desc->const_iter)
-    abort ();
+  gcc_assert (!desc->const_iter);
  desc->niter_expr =
    simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
  desc->niter_max /= max_unroll + 1;
@ -1138,10 +1206,15 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
  sbitmap wont_exit;
  unsigned npeel = loop->lpt_decision.times;
  struct niter_desc *desc = get_simple_loop_desc (loop);
+  struct split_ivs_info *si_info = NULL;
+
+  if (flag_split_ivs_in_unroller && npeel > 1)
+    si_info = analyze_ivs_to_split (loop);

  wont_exit = sbitmap_alloc (npeel + 1);
  sbitmap_zero (wont_exit);

+  si_info_start_duplication (si_info);
  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 		loops, npeel, wont_exit, NULL, NULL, NULL,
 		DLTHE_FLAG_UPDATE_FREQ))
@ -1149,6 +1222,12 @@ peel_loop_simple (struct loops *loops, struct loop *loop)

  free (wont_exit);

+  if (si_info)
+    {
+      split_ivs_in_copies (si_info, npeel, false, false);
+      free_si_info (si_info);
+    }
+
  if (desc->simple_p)
    {
      if (desc->const_iter)
@ -1271,15 +1350,26 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
  sbitmap wont_exit;
  unsigned nunroll = loop->lpt_decision.times;
  struct niter_desc *desc = get_simple_loop_desc (loop);
+  struct split_ivs_info *si_info = NULL;
+
+  if (flag_split_ivs_in_unroller)
+    si_info = analyze_ivs_to_split (loop);

  wont_exit = sbitmap_alloc (nunroll + 1);
  sbitmap_zero (wont_exit);

+  si_info_start_duplication (si_info);
  if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 		loops, nunroll, wont_exit, NULL, NULL, NULL,
 		DLTHE_FLAG_UPDATE_FREQ))
    abort ();

+  if (si_info)
+    {
+      split_ivs_in_copies (si_info, nunroll, true, true);
+      free_si_info (si_info);
+    }
+
  free (wont_exit);

  if (desc->simple_p)
@ -1297,3 +1387,375 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
    fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
 	     nunroll, num_loop_insns (loop));
 }
+
+/* A hash function for information about insns to split.  */
+
+static hashval_t
+si_info_hash (const void *ivts)
+{
+  return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
+}
+
+/* An equality functions for information about insns to split.  */
+
+static int
+si_info_eq (const void *ivts1, const void *ivts2)
+{
+  const struct iv_to_split *i1 = ivts1;
+  const struct iv_to_split *i2 = ivts2;
+
+  return i1->insn == i2->insn;
+}
+
+/* Determine whether there is an induction variable in INSN that
+   we would like to split during unrolling.  Return NULL if INSN
+   contains no interesting IVs.  Otherwise, allocate an IV_TO_SPLIT
+   structure, fill it with the relevant information and return a
+   pointer to it.  */
+
+static struct iv_to_split *
+analyze_iv_to_split_insn (rtx insn)
+{
+  rtx set, dest;
+  struct rtx_iv iv;
+  struct iv_to_split *ivts;
+
+  /* For now we just split the basic induction variables.  Later this may be
+     extended for example by selecting also addresses of memory references.  */
+  set = single_set (insn);
+  if (!set)
+    return NULL;
+
+  dest = SET_DEST (set);
+  if (!REG_P (dest))
+    return NULL;
+
+  if (!biv_p (insn, dest))
+    return NULL;
+
+  if (!iv_analyze (insn, dest, &iv))
+    abort ();
+
+  if (iv.step == const0_rtx
+      || iv.mode != iv.extend_mode)
+    return NULL;
+
+  /* Record the insn to split.  */
+  ivts = xmalloc (sizeof (struct iv_to_split));
+  ivts->insn = insn;
+  ivts->base_var = NULL_RTX;
+  ivts->step = iv.step;
+  ivts->n_loc = 1;
+  ivts->loc[0] = 1;
+  
+  return ivts;
+}
+
+/* Determines which of induction variables in LOOP to split.
+   Return a SPLIT_IVS_INFO struct with the hash table filled
+   with all insns to split IVs in.  The FIRST_NEW_BLOCK field
+   is undefined for the return value.  */
+
+static struct split_ivs_info *
+analyze_ivs_to_split (struct loop *loop)
+{
+  basic_block *body, bb;
+  unsigned i;
+  struct split_ivs_info *si_info = xcalloc (1, sizeof (struct split_ivs_info));
+  rtx insn;
+  struct iv_to_split *ivts;
+  PTR *slot;
+
+  si_info->insns_to_split = htab_create (5 * loop->num_nodes,
+					 si_info_hash, si_info_eq, free);
+
+  iv_analysis_loop_init (loop);
+
+  body = get_loop_body (loop);
+  for (i = 0; i < loop->num_nodes; i++)
+    {
+      bb = body[i];
+      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
+	continue;
+
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (!INSN_P (insn))
+	    continue;
+
+	  ivts = analyze_iv_to_split_insn (insn);
+
+	  if (!ivts)
+	    continue;
+
+	  slot = htab_find_slot (si_info->insns_to_split, ivts, INSERT);
+	  *slot = ivts;
+	}
+    }
+
+  free (body);
+
+  return si_info;
+}
+
+/* Called just before loop duplication.  Records start of duplicated area
+   to SI_INFO.  */
+
+static void 
+si_info_start_duplication (struct split_ivs_info *si_info)
+{
+  if (si_info)
+    si_info->first_new_block = last_basic_block;
+}
+
+/* Determine the number of iterations between initialization of the base
+   variable and the current copy (N_COPY).  N_COPIES is the total number
+   of newly created copies.  UNROLLING is true if we are unrolling
+   (not peeling) the loop.  */
+
+static unsigned
+determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
+{
+  if (unrolling)
+    {
+      /* If we are unrolling, initialization is done in the original loop
+	 body (number 0).  */
+      return n_copy;
+    }
+  else
+    {
+      /* If we are peeling, the copy in that the initialization occurs has
+	 number 1.  The original loop (number 0) is the last.  */
+      if (n_copy)
+	return n_copy - 1;
+      else
+	return n_copies;
+    }
+}
+
+/* Locate in EXPR the expression corresponding to the location recorded
+   in IVTS, and return a pointer to the RTX for this location.  */
+
+static rtx *
+get_ivts_expr (rtx expr, struct iv_to_split *ivts)
+{
+  unsigned i;
+  rtx *ret = &expr;
+
+  for (i = 0; i < ivts->n_loc; i++)
+    ret = &XEXP (*ret, ivts->loc[i]);
+
+  return ret;
+}
+
+/* Allocate basic variable for the induction variable chain.  Callback for
+   htab_traverse.  */
+
+static int
+allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
+{
+  struct iv_to_split *ivts = *slot;
+  rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
+
+  ivts->base_var = gen_reg_rtx (GET_MODE (expr));
+
+  return 1;
+}
+
+/* Insert initialization of basic variable of IVTS before INSN, taking
+   the initial value from INSN.  */
+
+static void
+insert_base_initialization (struct iv_to_split *ivts, rtx insn)
+{
+  rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
+  rtx seq;
+
+  start_sequence ();
+  expr = force_operand (expr, ivts->base_var);
+  if (expr != ivts->base_var)
+    emit_move_insn (ivts->base_var, expr);
+  seq = get_insns ();
+  end_sequence ();
+
+  emit_insn_before (seq, insn);
+}
+
+/* Replace the use of induction variable described in IVTS in INSN
+   by base variable + DELTA * step.  */
+
+static void
+split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
+{
+  rtx expr, *loc, seq, incr, var;
+  enum machine_mode mode = GET_MODE (ivts->base_var);
+  rtx src, dest, set;
+
+  /* Construct base + DELTA * step.  */
+  if (!delta)
+    expr = ivts->base_var;
+  else
+    {
+      incr = simplify_gen_binary (MULT, mode,
+				  ivts->step, gen_int_mode (delta, mode));
+      expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
+				  ivts->base_var, incr);
+    }
+
+  /* Figure out where to do the replacement.  */
+  loc = get_ivts_expr (single_set (insn), ivts);
+
+  /* If we can make the replacement right away, we're done.  */
+  if (validate_change (insn, loc, expr, 0))
+    return;
+
+  /* Otherwise, force EXPR into a register and try again.  */
+  start_sequence ();
+  var = gen_reg_rtx (mode);
+  expr = force_operand (expr, var);
+  if (expr != var)
+    emit_move_insn (var, expr);
+  seq = get_insns ();
+  end_sequence ();
+  emit_insn_before (seq, insn);
+      
+  if (validate_change (insn, loc, var, 0))
+    return;
+
+  /* The last chance.  Try recreating the assignment in insn
+     completely from scratch.  */
+  set = single_set (insn);
+  gcc_assert (set);
+
+  start_sequence ();
+  *loc = var;
+  src = copy_rtx (SET_SRC (set));
+  dest = copy_rtx (SET_DEST (set));
+  src = force_operand (src, dest);
+  if (src != dest)
+    emit_move_insn (dest, src);
+  seq = get_insns ();
+  end_sequence ();
+     
+  emit_insn_before (seq, insn);
+  delete_insn (insn);
+}
+
+/* Splits induction variables (that are marked in SI_INFO) in copies of loop.
+   I.e. replace
+
+   i = i + 1;
+   ...
+   i = i + 1;
+   ...
+   i = i + 1;
+   ...
+
+   type chains by
+
+   i0 = i + 1
+   ...
+   i = i0 + 1
+   ...
+   i = i0 + 2
+   ...
+
+   UNROLLING is true if we unrolled (not peeled) the loop.
+   REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
+   the loop (as it should happen in complete unrolling, but not in ordinary
+   peeling of the loop).  */
+
+static void
+split_ivs_in_copies (struct split_ivs_info *si_info, unsigned n_copies,
+		     bool unrolling, bool rewrite_original_loop)
+{
+  unsigned i, delta;
+  basic_block bb, orig_bb;
+  rtx insn, orig_insn, next;
+  struct iv_to_split ivts_templ, *ivts;
+
+  /* Sanity check -- we need to put initialization in the original loop
+     body.  */
+  gcc_assert (!unrolling || rewrite_original_loop);
+
+  /* Allocate the basic variables (i0).  */
+  htab_traverse (si_info->insns_to_split, allocate_basic_variable, NULL);
+
+  for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
+    {
+      bb = BASIC_BLOCK (i);
+      orig_bb = bb->rbi->original;
+
+      delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
+					unrolling);
+      orig_insn = BB_HEAD (orig_bb);
+      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
+	{
+	  next = NEXT_INSN (insn);
+	  if (!INSN_P (insn))
+	    continue;
+
+	  while (!INSN_P (orig_insn))
+	    orig_insn = NEXT_INSN (orig_insn);
+
+	  ivts_templ.insn = orig_insn;
+	  ivts = htab_find (si_info->insns_to_split, &ivts_templ);
+	  if (ivts)
+	    {
+
+#ifdef ENABLE_CHECKING
+	      if (!rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)))
+		abort ();
+#endif
+
+	      if (!delta)
+		insert_base_initialization (ivts, insn);
+	      split_iv (ivts, insn, delta);
+	    }
+	  orig_insn = NEXT_INSN (orig_insn);
+	}
+    }
+
+  if (!rewrite_original_loop)
+    return;
+
+  /* Rewrite also the original loop body.  Find them as originals of the blocks
+     in the last copied iteration, i.e. those that have
+     bb->rbi->original->copy == bb.  */
+  for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
+    {
+      bb = BASIC_BLOCK (i);
+      orig_bb = bb->rbi->original;
+      if (orig_bb->rbi->copy != bb)
+	continue;
+
+      delta = determine_split_iv_delta (0, n_copies, unrolling);
+      for (orig_insn = BB_HEAD (orig_bb);
+	   orig_insn != NEXT_INSN (BB_END (bb));
+	   orig_insn = next)
+	{
+	  next = NEXT_INSN (orig_insn);
+
+	  if (!INSN_P (orig_insn))
+	    continue;
+
+	  ivts_templ.insn = orig_insn;
+	  ivts = htab_find (si_info->insns_to_split, &ivts_templ);
+	  if (!ivts)
+	    continue;
+
+	  if (!delta)
+	    insert_base_initialization (ivts, orig_insn);
+	  split_iv (ivts, orig_insn, delta);
+	}
+    }
+}
+
+/* Release SI_INFO.  */
+
+static void
+free_si_info (struct split_ivs_info *si_info)
+{
+  htab_delete (si_info->insns_to_split);
+  free (si_info);
+}