loop-iv.c: New file.

* loop-iv.c: New file. * Makefile.in (loop-iv.o): New. * basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros. * cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order, num_loop_branches): New functions. * cfgloop.h (get_loop_body_in_dom_order, num_loop_branches, iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value, find_simple_exit, iv_number_of_iterations, iv_analysis_done, get_simple_loop_desc, free_simple_loop_desc): Declare. (simple_loop_desc): New inline function. (struct rtx_iv, struct niter_desc): New. * cfgloopmanip.c (loopify): Specify semantics more precisely. * expr.c (force_operand): Handle subregs of expressions created by loop unroller. * loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move parts of the initialization to toplev.c * loop-unroll.c (loop_exit_at_end_p): New. (unroll_and_peel_loops): Call iv_analysis_done. (decide_peel_once_rolling, decide_peel_completely, decide_unroll_stupid, decide_unroll_constant_iterations, decide_unroll_runtime_iterations, decide_peel_simple, peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations, unroll_loop_runtime_iterations): Use new simple loop analysis. * loop-unswitch.c (compare_and_jump_seq): New. (may_unswitch_on_p): Renamed to ... (may_unswitch_on): Use new iv analysis. (reversed_condition): Export. (unswitch_single_loop, unswitch_loop): Use new iv analysis. * predict.c (estimate_probability): Use new simple loop analysis. * rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq, canon_condition, simplify_using_condition): Declare. * stor-layout.c (get_mode_bounds): New. * toplev.c (rest_of_handle_loop2): Some parts of initialization/finalization moved here from loop-init.c. From-SVN: r77951
2004-02-17 17:41:44 +01:00 · 2004-02-17 17:41:44 +01:00 · 50654f6c03
parent cc7ce44e4c
commit 50654f6c03
15 changed files with 3174 additions and 224 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,40 @@
+2004-02-17  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>
+
+	* loop-iv.c: New file.
+	* Makefile.in (loop-iv.o): New.
+	* basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros.
+	* cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order,
+	num_loop_branches): New functions.
+	* cfgloop.h (get_loop_body_in_dom_order, num_loop_branches,
+	iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value,
+	find_simple_exit, iv_number_of_iterations, iv_analysis_done,
+	get_simple_loop_desc, free_simple_loop_desc): Declare.
+	(simple_loop_desc): New inline function.
+	(struct rtx_iv, struct niter_desc): New.
+	* cfgloopmanip.c (loopify): Specify semantics more precisely.
+	* expr.c (force_operand): Handle subregs of expressions created by
+	loop unroller.
+	* loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move
+	parts of the initialization to toplev.c
+	* loop-unroll.c (loop_exit_at_end_p): New.
+	(unroll_and_peel_loops): Call iv_analysis_done.
+	(decide_peel_once_rolling, decide_peel_completely,
+	decide_unroll_stupid, decide_unroll_constant_iterations,
+	decide_unroll_runtime_iterations, decide_peel_simple,
+	peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations,
+	unroll_loop_runtime_iterations): Use new simple loop analysis.
+	* loop-unswitch.c (compare_and_jump_seq): New.
+	(may_unswitch_on_p): Renamed to ...
+	(may_unswitch_on): Use new iv analysis.
+	(reversed_condition): Export.
+	(unswitch_single_loop, unswitch_loop): Use new iv analysis.
+	* predict.c (estimate_probability): Use new simple loop analysis.
+	* rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq,
+	canon_condition, simplify_using_condition): Declare.
+	* stor-layout.c (get_mode_bounds): New.
+	* toplev.c (rest_of_handle_loop2): Some parts of
+	initialization/finalization moved here from loop-init.c.
+
 2004-02-17  Kazu Hirata  <kazu@cs.umass.edu>

 	* config/h8300/h8300.h (FIXED_REGISTERS): Add the soft frame
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@ -848,7 +848,7 @@ OBJS-common = \
 cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o	   \
 cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o 	   \
 dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o	   \
- dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o	                   \
+ dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o		   \
 expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o		   \
 genrtl.o ggc-common.o global.o graph.o gtype-desc.o			   \
 haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o	   \
@ -1719,6 +1719,8 @@ cfgloop.o : cfgloop.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) coretypes.h $(TM_H) \
   $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h flags.h
 cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
   $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
+loop-iv.o : loop-iv.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \
+   $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
 cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
   $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h output.h coretypes.h $(TM_H)
 loop-init.o : loop-init.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@ -288,6 +288,17 @@ extern varray_type basic_block_info;
 #define FOR_EACH_BB_REVERSE(BB) \
  FOR_BB_BETWEEN (BB, EXIT_BLOCK_PTR->prev_bb, ENTRY_BLOCK_PTR, prev_bb)

+/* For iterating over insns in basic block.  */
+#define FOR_BB_INSNS(BB, INSN)			\
+  for ((INSN) = BB_HEAD (BB);			\
+       (INSN) != NEXT_INSN (BB_END (BB));	\
+       (INSN) = NEXT_INSN (INSN))
+
+#define FOR_BB_INSNS_REVERSE(BB, INSN)		\
+  for ((INSN) = BB_END (BB);			\
+       (INSN) != PREV_INSN (BB_HEAD (BB));	\
+       (INSN) = PREV_INSN (INSN))
+
 /* Cycles through _all_ basic blocks, even the fake ones (entry and
   exit block).  */

--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@ -959,6 +959,62 @@ get_loop_body (const struct loop *loop)
  return tovisit;
 }

+/* Fills dominance descendants inside LOOP of the basic block BB into
+   array TOVISIT from index *TV.  */
+
+static void
+fill_sons_in_loop (const struct loop *loop, basic_block bb,
+		   basic_block *tovisit, int *tv)
+{
+  basic_block son, postpone = NULL;
+
+  tovisit[(*tv)++] = bb;
+  for (son = first_dom_son (CDI_DOMINATORS, bb);
+       son;
+       son = next_dom_son (CDI_DOMINATORS, son))
+    {
+      if (!flow_bb_inside_loop_p (loop, son))
+	continue;
+
+      if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
+	{
+	  postpone = son;
+	  continue;
+	}
+      fill_sons_in_loop (loop, son, tovisit, tv);
+    }
+
+  if (postpone)
+    fill_sons_in_loop (loop, postpone, tovisit, tv);
+}
+
+/* Gets body of a LOOP (that must be different from the outermost loop)
+   sorted by dominance relation.  Additionally, if a basic block s dominates
+   the latch, then only blocks dominated by s are be after it.  */
+
+basic_block *
+get_loop_body_in_dom_order (const struct loop *loop)
+{
+  basic_block *tovisit;
+  int tv;
+
+  if (!loop->num_nodes)
+    abort ();
+
+  tovisit = xcalloc (loop->num_nodes, sizeof (basic_block));
+
+  if (loop->latch == EXIT_BLOCK_PTR)
+    abort ();
+
+  tv = 0;
+  fill_sons_in_loop (loop, loop->header, tovisit, &tv);
+
+  if (tv != (int) loop->num_nodes)
+    abort ();
+
+  return tovisit;
+}
+
 /* Gets exit edges of a LOOP, returning their number in N_EDGES.  */
 edge *
 get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges)
@ -988,6 +1044,27 @@ get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges)
  return edges;
 }

+/* Counts the number of conditional branches inside LOOP.  */
+
+unsigned
+num_loop_branches (const struct loop *loop)
+{
+  unsigned i, n;
+  basic_block * body;
+
+  if (loop->latch == EXIT_BLOCK_PTR)
+    abort ();
+
+  body = get_loop_body (loop);
+  n = 0;
+  for (i = 0; i < loop->num_nodes; i++)
+    if (body[i]->succ && body[i]->succ->succ_next)
+      n++;
+  free (body);
+
+  return n;
+}
+
 /* Adds basic block BB to LOOP.  */
 void
 add_bb_to_loop (basic_block bb, struct loop *loop)
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@ -278,7 +278,9 @@ extern int average_num_loop_insns (struct loop *);

 /* Loops & cfg manipulation.  */
 extern basic_block *get_loop_body (const struct loop *);
+extern basic_block *get_loop_body_in_dom_order (const struct loop *);
 extern edge *get_loop_exit_edges (const struct loop *, unsigned *);
+extern unsigned num_loop_branches (const struct loop *);

 extern edge loop_preheader_edge (const struct loop *);
 extern edge loop_latch_edge (const struct loop *);
@ -322,6 +324,114 @@ extern void unloop (struct loops *, struct loop *);
 extern bool remove_path (struct loops *, edge);
 extern edge split_loop_bb (basic_block, rtx);

+/* Induction variable analysis.  */
+
+/* The description of induction variable.  The things are a bit complicated
+   due to need to handle subregs and extends.  The value of the object described
+   by it can be obtained as follows (all computations are done in extend_mode):
+
+   Value in i-th iteration is
+     delta + mult * extend_{extend_mode} (subreg_{mode} (base + i * step)).
+
+   If first_special is true, the value in the first iteration is
+     delta + mult * base
+     
+   If extend = NIL, first_special must be false, delta 0, mult 1 and value is
+     subreg_{mode} (base + i * step)
+
+   The get_iv_value function can be used to obtain these expressions.
+
+   ??? Add a third mode field that would specify the mode in that inner
+   computation is done, which would enable it to be different from the
+   outer one?  */
+
+struct rtx_iv
+{
+  /* Its base and step (mode of base and step is supposed to be extend_mode,
+     see the description above).  */
+  rtx base, step;
+
+  /* The type of extend applied to it (SIGN_EXTEND, ZERO_EXTEND or NIL).  */
+  enum rtx_code extend;
+
+  /* Operations applied in the extended mode.  */
+  rtx delta, mult;
+
+  /* The mode it is extended to.  */
+  enum machine_mode extend_mode;
+
+  /* The mode the variable iterates in.  */
+  enum machine_mode mode;
+
+  /* Whether we have already filled the remaining fields.  */
+  unsigned analysed : 1;
+
+  /* Whether the first iteration needs to be handled specially.  */
+  unsigned first_special : 1;
+};
+
+/* This should replace struct loop_desc.  We keep this just so that we are
+   able to compare the results.  */
+
+struct niter_desc
+{
+  /* The edge out of the loop.  */
+  edge out_edge;
+
+  /* The other edge leading from the condition.  */
+  edge in_edge;
+
+  /* True if we are able to say anything about number of iterations of the
+     loop.  */
+  bool simple_p;
+
+  /* True if the loop iterates the constant number of times.  */
+  bool const_iter;
+
+  /* Number of iterations if constant.  */
+  unsigned HOST_WIDEST_INT niter;
+
+  /* Upper bound on the number of iterations.  */
+  unsigned HOST_WIDEST_INT niter_max;
+
+  /* Assumptions under that the rest of the information is valid.  */
+  rtx assumptions;
+
+  /* Assumptions under that the loop ends before reaching the latch,
+     even if value of niter_expr says otherwise.  */
+  rtx noloop_assumptions;
+
+  /* Condition under that the loop is infinite.  */
+  rtx infinite;
+
+  /* Whether the comparison is signed.  */
+  bool signed_p;
+
+  /* The mode in that niter_expr should be computed.  */
+  enum machine_mode mode;
+
+  /* The number of iterations of the loop.  */
+  rtx niter_expr;
+};
+
+extern void iv_analysis_loop_init (struct loop *);
+extern rtx iv_get_reaching_def (rtx, rtx);
+extern bool iv_analyse (rtx, rtx, struct rtx_iv *);
+extern rtx get_iv_value (struct rtx_iv *, rtx);
+extern void find_simple_exit (struct loop *, struct niter_desc *);
+extern void iv_number_of_iterations (struct loop *, rtx, rtx,
+				     struct niter_desc *);
+extern void iv_analysis_done (void);
+
+extern struct niter_desc *get_simple_loop_desc (struct loop *loop);
+extern void free_simple_loop_desc (struct loop *loop);
+
+static inline struct niter_desc *
+simple_loop_desc (struct loop *loop)
+{
+  return loop->aux;
+}
+
 /* Loop optimizer initialization.  */
 extern struct loops *loop_optimizer_init (FILE *);
 extern void loop_optimizer_finalize (struct loops *, FILE *);
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@ -480,11 +480,13 @@ scale_loop_frequencies (struct loop *loop, int num, int den)
   accordingly. Everything between them plus LATCH_EDGE destination must
   be dominated by HEADER_EDGE destination, and back-reachable from
   LATCH_EDGE source.  HEADER_EDGE is redirected to basic block SWITCH_BB,
-   SWITCH_BB->succ to original destination of LATCH_EDGE and
-   SWITCH_BB->succ->succ_next to original destination of HEADER_EDGE.
+   FALLTHRU_EDGE (SWITCH_BB) to original destination of HEADER_EDGE and
+   BRANCH_EDGE (SWITCH_BB) to original destination of LATCH_EDGE.
   Returns newly created loop.  */
+
 struct loop *
-loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block switch_bb)
+loopify (struct loops *loops, edge latch_edge, edge header_edge, 
+	 basic_block switch_bb)
 {
  basic_block succ_bb = latch_edge->dest;
  basic_block pred_bb = header_edge->src;
@ -509,13 +511,15 @@ loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block swi

  /* Redirect edges.  */
  loop_redirect_edge (latch_edge, loop->header);
+  loop_redirect_edge (BRANCH_EDGE (switch_bb), succ_bb);
+
  loop_redirect_edge (header_edge, switch_bb);
-  loop_redirect_edge (switch_bb->succ->succ_next, loop->header);
-  loop_redirect_edge (switch_bb->succ, succ_bb);
+  loop_redirect_edge (FALLTHRU_EDGE (switch_bb), loop->header); 

  /* Update dominators.  */
  set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
  set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
+
  set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);

  /* Compute new loop.  */
--- a/gcc/expr.c
+++ b/gcc/expr.c
@ -5588,6 +5588,20 @@ force_operand (rtx value, rtx target)
  rtx subtarget = get_subtarget (target);
  enum rtx_code code = GET_CODE (value);

+  /* Check for subreg applied to an expression produced by loop optimizer.  */
+  if (code == SUBREG
+      && GET_CODE (SUBREG_REG (value)) != REG
+      && GET_CODE (SUBREG_REG (value)) != MEM)
+    {
+      value = simplify_gen_subreg (GET_MODE (value),
+				   force_reg (GET_MODE (SUBREG_REG (value)),
+					      force_operand (SUBREG_REG (value),
+							     NULL_RTX)),
+				   GET_MODE (SUBREG_REG (value)),
+				   SUBREG_BYTE (value));
+      code = GET_CODE (value);
+    }
+
  /* Check for a PIC address load.  */
  if ((code == PLUS || code == MINUS)
      && XEXP (value, 0) == pic_offset_table_rtx
--- a/gcc/loop-init.c
+++ b/gcc/loop-init.c
@ -36,9 +36,6 @@ loop_optimizer_init (FILE *dumpfile)
  struct loops *loops = xcalloc (1, sizeof (struct loops));
  edge e;

-  /* Initialize structures for layout changes.  */
-  cfg_layout_initialize ();
-
  /* Avoid annoying special cases of edges going to exit
     block.  */
  for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
@ -49,18 +46,11 @@ loop_optimizer_init (FILE *dumpfile)

  if (flow_loops_find (loops, LOOP_TREE) <= 1)
    {
-      basic_block bb;
-
      /* No loops.  */
      flow_loops_free (loops);
      free_dominance_info (CDI_DOMINATORS);
      free (loops);

-      /* Make chain.  */
-      FOR_EACH_BB (bb)
-	if (bb->next_bb != EXIT_BLOCK_PTR)
-	  bb->rbi->next = bb->next_bb;
-	  cfg_layout_finalize ();
      return NULL;
    }

@ -94,13 +84,14 @@ loop_optimizer_init (FILE *dumpfile)
 void
 loop_optimizer_finalize (struct loops *loops, FILE *dumpfile)
 {
-  basic_block bb;
+  unsigned i;

-  /* Finalize layout changes.  */
-  /* Make chain.  */
-  FOR_EACH_BB (bb)
-    if (bb->next_bb != EXIT_BLOCK_PTR)
-      bb->rbi->next = bb->next_bb;
+  if (!loops)
+    return;
+
+  for (i = 1; i < loops->num; i++)
+    if (loops->parray[i])
+      free_simple_loop_desc (loops->parray[i]);

  /* Another dump.  */
  flow_loops_dump (loops, dumpfile, NULL, 1);
@ -110,9 +101,6 @@ loop_optimizer_finalize (struct loops *loops, FILE *dumpfile)
  free_dominance_info (CDI_DOMINATORS);
  free (loops);

-  /* Finalize changes.  */
-  cfg_layout_finalize ();
-
  /* Checking.  */
 #ifdef ENABLE_CHECKING
  verify_flow_info ();
--- a/gcc/loop-iv.c
+++ b/gcc/loop-iv.c
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@ -85,7 +85,7 @@ void
 unroll_and_peel_loops (struct loops *loops, int flags)
 {
  struct loop *loop, *next;
-  int check;
+  bool check;

  /* First perform complete loop peeling (it is almost surely a win,
     and affects parameters for further decision a lot).  */
@ -110,7 +110,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
      else
 	next = loop->outer;

-      check = 1;
+      check = true;
      /* And perform the appropriate transformations.  */
      switch (loop->lpt_decision.decision)
 	{
@ -130,7 +130,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
 	  unroll_loop_stupid (loops, loop);
 	  break;
 	case LPT_NONE:
-	  check = 0;
+	  check = false;
 	  break;
 	default:
 	  abort ();
@ -144,6 +144,29 @@ unroll_and_peel_loops (struct loops *loops, int flags)
 	}
      loop = next;
    }
+
+  iv_analysis_done ();
+}
+
+/* Check whether exit of the LOOP is at the end of loop body.  */
+
+static bool
+loop_exit_at_end_p (struct loop *loop)
+{
+  struct niter_desc *desc = get_simple_loop_desc (loop);
+  rtx insn;
+
+  if (desc->in_edge->dest != loop->latch)
+    return false;
+
+  /* Check that the latch is empty.  */
+  FOR_BB_INSNS (loop->latch, insn)
+    {
+      if (INSN_P (insn))
+	return false;
+    }
+
+  return true;
 }

 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
@ -168,10 +191,9 @@ peel_loops_completely (struct loops *loops, int flags)
 	next = loop->outer;

      loop->lpt_decision.decision = LPT_NONE;
-      loop->has_desc = 0;

      if (rtl_dump_file)
-	fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n",
+	fprintf (rtl_dump_file, "\n;; *** Considering loop %d for complete peeling ***\n",
 		 loop->num);

      loop->ninsns = num_loop_insns (loop);
@ -216,7 +238,7 @@ decide_unrolling_and_peeling (struct loops *loops, int flags)
      loop->lpt_decision.decision = LPT_NONE;

      if (rtl_dump_file)
-	fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num);
+	fprintf (rtl_dump_file, "\n;; *** Considering loop %d ***\n", loop->num);

      /* Do not peel cold areas.  */
      if (!maybe_hot_bb_p (loop->header))
@ -269,8 +291,10 @@ decide_unrolling_and_peeling (struct loops *loops, int flags)
 static void
 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 {
+  struct niter_desc *desc;
+
  if (rtl_dump_file)
-    fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n");
+    fprintf (rtl_dump_file, "\n;; Considering peeling once rolling loop\n");

  /* Is the loop small enough?  */
  if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
@ -281,11 +305,13 @@ decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
    }

  /* Check for simple loops.  */
-  loop->simple = simple_loop_p (loop, &loop->desc);
-  loop->has_desc = 1;
+  desc = get_simple_loop_desc (loop);

  /* Check number of iterations.  */
-  if (!loop->simple || !loop->desc.const_iter || loop->desc.niter != 0)
+  if (!desc->simple_p
+      || desc->assumptions
+      || !desc->const_iter
+      || desc->niter != 0)
    {
      if (rtl_dump_file)
 	fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
@ -303,9 +329,10 @@ static void
 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 {
  unsigned npeel;
+  struct niter_desc *desc;

  if (rtl_dump_file)
-    fprintf (rtl_dump_file, ";; Considering peeling completely\n");
+    fprintf (rtl_dump_file, "\n;; Considering peeling completely\n");

  /* Skip non-innermost loops.  */
  if (loop->inner)
@ -346,26 +373,24 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
    }

  /* Check for simple loops.  */
-  if (!loop->has_desc)
-    {
-      loop->simple = simple_loop_p (loop, &loop->desc);
-      loop->has_desc = 1;
-    }
+  desc = get_simple_loop_desc (loop);

  /* Check number of iterations.  */
-  if (!loop->simple || !loop->desc.const_iter)
+  if (!desc->simple_p
+      || desc->assumptions
+      || !desc->const_iter)
    {
      if (rtl_dump_file)
 	fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
      return;
    }

-  if (loop->desc.niter > npeel - 1)
+  if (desc->niter > npeel - 1)
    {
      if (rtl_dump_file)
 	{
 	  fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
-	  fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter);
+	  fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 	  fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
 	}
      return;
@ -397,8 +422,8 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
  sbitmap wont_exit;
  unsigned HOST_WIDE_INT npeel;
  unsigned n_remove_edges, i;
-  edge *remove_edges;
-  struct loop_desc *desc = &loop->desc;
+  edge *remove_edges, ei;
+  struct niter_desc *desc = get_simple_loop_desc (loop);

  npeel = desc->niter;

@ -407,7 +432,7 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
      wont_exit = sbitmap_alloc (npeel + 1);
      sbitmap_ones (wont_exit);
      RESET_BIT (wont_exit, 0);
-      if (desc->may_be_zero)
+      if (desc->noloop_assumptions)
 	RESET_BIT (wont_exit, 1);

      remove_edges = xcalloc (npeel, sizeof (edge));
@ -427,19 +452,24 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
      free (remove_edges);
    }

+  ei = desc->in_edge;
+  free_simple_loop_desc (loop);
+
  /* Now remove the unreachable part of the last iteration and cancel
     the loop.  */
-  remove_path (loops, desc->in_edge);
+  remove_path (loops, ei);

  if (rtl_dump_file)
    fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 }

 /* Decide whether to unroll LOOP iterating constant number of times and how much.  */
+
 static void
 decide_unroll_constant_iterations (struct loop *loop, int flags)
 {
-  unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i;
+  unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
+  struct niter_desc *desc;

  if (!(flags & UAP_UNROLL))
    {
@ -448,7 +478,8 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
    }

  if (rtl_dump_file)
-    fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n");
+    fprintf (rtl_dump_file,
+	     "\n;; Considering unrolling loop with constant number of iterations\n");

  /* nunroll = total number of copies of the original loop body in
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
@ -468,14 +499,10 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
    }

  /* Check for simple loops.  */
-  if (!loop->has_desc)
-    {
-      loop->simple = simple_loop_p (loop, &loop->desc);
-      loop->has_desc = 1;
-    }
+  desc = get_simple_loop_desc (loop);

  /* Check number of iterations.  */
-  if (!loop->simple || !loop->desc.const_iter)
+  if (!desc->simple_p || !desc->const_iter || desc->assumptions)
    {
      if (rtl_dump_file)
 	fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
@ -483,7 +510,7 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
    }

  /* Check whether the loop rolls enough to consider.  */
-  if (loop->desc.niter < 2 * nunroll)
+  if (desc->niter < 2 * nunroll)
    {
      if (rtl_dump_file)
 	fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
@ -497,16 +524,17 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
  best_copies = 2 * nunroll + 10;

  i = 2 * nunroll + 2;
-  if ((unsigned) i - 1 >= loop->desc.niter)
-    i = loop->desc.niter - 2;
+  if (i - 1 >= desc->niter)
+    i = desc->niter - 2;

  for (; i >= nunroll - 1; i--)
    {
-      unsigned exit_mod = loop->desc.niter % (i + 1);
+      unsigned exit_mod = desc->niter % (i + 1);

-      if (loop->desc.postincr)
+      if (!loop_exit_at_end_p (loop))
 	n_copies = exit_mod + i + 1;
-      else if (exit_mod != (unsigned) i || loop->desc.may_be_zero)
+      else if (exit_mod != (unsigned) i
+	       || desc->noloop_assumptions != NULL_RTX)
 	n_copies = exit_mod + i + 2;
      else
 	n_copies = i + 1;
@ -524,6 +552,11 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)

  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
  loop->lpt_decision.times = best_unroll;
+  
+  if (rtl_dump_file)
+    fprintf (rtl_dump_file,
+	     ";; Decided to unroll the constant times rolling loop, %d times.\n",
+	     loop->lpt_decision.times);
 }

 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
@ -554,11 +587,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
  unsigned n_remove_edges, i;
  edge *remove_edges;
  unsigned max_unroll = loop->lpt_decision.times;
-  struct loop_desc *desc = &loop->desc;
+  struct niter_desc *desc = get_simple_loop_desc (loop);
+  bool exit_at_end = loop_exit_at_end_p (loop);

  niter = desc->niter;

-  if (niter <= (unsigned) max_unroll + 1)
+  if (niter <= max_unroll + 1)
    abort ();  /* Should not get here (such loop should be peeled instead).  */

  exit_mod = niter % (max_unroll + 1);
@ -569,9 +603,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
  remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
  n_remove_edges = 0;

-  if (desc->postincr)
+  if (!exit_at_end)
    {
-      /* Counter is incremented after the exit test; leave exit test
+      /* The exit is not at the end of the loop; leave exit test
 	 in the first copy, so that the loops that start with test
 	 of exit condition have continuous body after unrolling.  */

@ -580,15 +614,22 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)

      /* Peel exit_mod iterations.  */
      RESET_BIT (wont_exit, 0);
-      if (desc->may_be_zero)
+      if (desc->noloop_assumptions)
 	RESET_BIT (wont_exit, 1);

-      if (exit_mod
-	  && !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
-		loops, exit_mod,
-		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
-		DLTHE_FLAG_UPDATE_FREQ))
-	abort ();
+      if (exit_mod)
+	{
+	  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
+					      loops, exit_mod,
+					      wont_exit, desc->out_edge,
+					      remove_edges, &n_remove_edges,
+					      DLTHE_FLAG_UPDATE_FREQ))
+	    abort ();
+
+	  desc->noloop_assumptions = NULL_RTX;
+	  desc->niter -= exit_mod;
+	  desc->niter_max -= exit_mod;
+	}

      SET_BIT (wont_exit, 1);
    }
@ -602,12 +643,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)

      /* We know that niter >= max_unroll + 2; so we do not need to care of
 	 case when we would exit before reaching the loop.  So just peel
-	 exit_mod + 1 iterations.
-	 */
-      if (exit_mod != (unsigned) max_unroll || desc->may_be_zero)
+	 exit_mod + 1 iterations.  */
+      if (exit_mod != max_unroll
+	  || desc->noloop_assumptions)
 	{
 	  RESET_BIT (wont_exit, 0);
-	  if (desc->may_be_zero)
+	  if (desc->noloop_assumptions)
 	    RESET_BIT (wont_exit, 1);

 	  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
@ -616,6 +657,10 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 		DLTHE_FLAG_UPDATE_FREQ))
 	    abort ();

+	  desc->niter -= exit_mod + 1;
+	  desc->niter_max -= exit_mod + 1;
+	  desc->noloop_assumptions = NULL_RTX;
+
 	  SET_BIT (wont_exit, 0);
 	  SET_BIT (wont_exit, 1);
 	}
@ -632,6 +677,27 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)

  free (wont_exit);

+  if (exit_at_end)
+    {
+      basic_block exit_block = desc->in_edge->src->rbi->copy;
+      /* Find a new in and out edge; they are in the last copy we have made.  */
+      
+      if (exit_block->succ->dest == desc->out_edge->dest)
+	{
+	  desc->out_edge = exit_block->succ;
+	  desc->in_edge = exit_block->succ->succ_next;
+	}
+      else
+	{
+	  desc->out_edge = exit_block->succ->succ_next;
+	  desc->in_edge = exit_block->succ;
+	}
+    }
+
+  desc->niter /= max_unroll + 1;
+  desc->niter_max /= max_unroll + 1;
+  desc->niter_expr = GEN_INT (desc->niter);
+
  /* Remove the edges.  */
  for (i = 0; i < n_remove_edges; i++)
    remove_path (loops, remove_edges[i]);
@ -647,6 +713,7 @@ static void
 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 {
  unsigned nunroll, nunroll_by_av, i;
+  struct niter_desc *desc;

  if (!(flags & UAP_UNROLL))
    {
@ -655,7 +722,8 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
    }

  if (rtl_dump_file)
-    fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n");
+    fprintf (rtl_dump_file,
+	     "\n;; Considering unrolling loop with runtime computable number of iterations\n");

  /* nunroll = total number of copies of the original loop body in
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
@ -675,21 +743,18 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
    }

  /* Check for simple loops.  */
-  if (!loop->has_desc)
-    {
-      loop->simple = simple_loop_p (loop, &loop->desc);
-      loop->has_desc = 1;
-    }
+  desc = get_simple_loop_desc (loop);

  /* Check simpleness.  */
-  if (!loop->simple)
+  if (!desc->simple_p || desc->assumptions)
    {
      if (rtl_dump_file)
-	fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n");
+	fprintf (rtl_dump_file,
+		 ";; Unable to prove that the number of iterations can be counted in runtime\n");
      return;
    }

-  if (loop->desc.const_iter)
+  if (desc->const_iter)
    {
      if (rtl_dump_file)
 	fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
@ -706,10 +771,16 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)

  /* Success; now force nunroll to be power of 2, as we are unable to
     cope with overflows in computation of number of iterations.  */
-  for (i = 1; 2 * i <= nunroll; i *= 2);
+  for (i = 1; 2 * i <= nunroll; i *= 2)
+    continue;

  loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
  loop->lpt_decision.times = i - 1;
+  
+  if (rtl_dump_file)
+    fprintf (rtl_dump_file,
+	     ";; Decided to unroll the runtime computable times rolling loop, %d times.\n",
+	     loop->lpt_decision.times);
 }

 /* Unroll LOOP for that we are able to count number of iterations in runtime
@ -746,7 +817,7 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
 static void
 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 {
-  rtx niter, init_code, branch_code, jump, label;
+  rtx old_niter, niter, init_code, branch_code, tmp;
  unsigned i, j, p;
  basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
  unsigned n_dom_bbs;
@ -756,7 +827,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
  edge *remove_edges, e;
  bool extra_zero_check, last_may_exit;
  unsigned max_unroll = loop->lpt_decision.times;
-  struct loop_desc *desc = &loop->desc;
+  struct niter_desc *desc = get_simple_loop_desc (loop);
+  bool exit_at_end = loop_exit_at_end_p (loop);

  /* Remember blocks whose dominators will have to be updated.  */
  dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
@ -777,7 +849,7 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
    }
  free (body);

-  if (desc->postincr)
+  if (!exit_at_end)
    {
      /* Leave exit in first copy (for explanation why see comment in
 	 unroll_loop_constant_iterations).  */
@ -798,15 +870,15 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)

  /* Get expression for number of iterations.  */
  start_sequence ();
-  niter = count_loop_iterations (desc, NULL, NULL);
-  if (!niter)
-    abort ();
-  niter = force_operand (niter, NULL);
+  old_niter = niter = gen_reg_rtx (desc->mode);
+  tmp = force_operand (copy_rtx (desc->niter_expr), niter);
+  if (tmp != niter)
+    emit_move_insn (niter, tmp);

  /* Count modulo by ANDing it with max_unroll; we use the fact that
     the number of unrollings is a power of two, and thus this is correct
     even if there is overflow in the computation.  */
-  niter = expand_simple_binop (GET_MODE (desc->var), AND,
+  niter = expand_simple_binop (desc->mode, AND,
 			       niter,
 			       GEN_INT (max_unroll),
 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
@ -824,10 +896,11 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)

  /* Peel the first copy of loop body (almost always we must leave exit test
     here; the only exception is when we have extra zero check and the number
-     of iterations is reliable (i.e. comes out of NE condition).  Also record
-     the place of (possible) extra zero check.  */
+     of iterations is reliable.  Also record the place of (possible) extra
+     zero check.  */
  sbitmap_zero (wont_exit);
-  if (extra_zero_check && desc->cond == NE)
+  if (extra_zero_check
+      && !desc->noloop_assumptions)
    SET_BIT (wont_exit, 1);
  ezc_swtch = loop_preheader_edge (loop)->src;
  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
@ -857,20 +930,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
      p = REG_BR_PROB_BASE / (i + 2);

      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
-      label = block_label (preheader);
-      start_sequence ();
-      do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0,
-			       GET_MODE (desc->var), NULL_RTX, NULL_RTX,
-			       label);
-      jump = get_last_insn ();
-      JUMP_LABEL (jump) = label;
-      REG_NOTES (jump)
-	      = gen_rtx_EXPR_LIST (REG_BR_PROB,
-				   GEN_INT (p), REG_NOTES (jump));
-
-      LABEL_NUSES (label)++;
-      branch_code = get_insns ();
-      end_sequence ();
+      branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
+					  block_label (preheader), p, NULL_RTX);

      swtch = loop_split_edge_with (swtch->pred, branch_code);
      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
@ -886,20 +947,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
      p = REG_BR_PROB_BASE / (max_unroll + 1);
      swtch = ezc_swtch;
      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
-      label = block_label (preheader);
-      start_sequence ();
-      do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0,
-			       GET_MODE (desc->var), NULL_RTX, NULL_RTX,
-			       label);
-      jump = get_last_insn ();
-      JUMP_LABEL (jump) = label;
-      REG_NOTES (jump)
-	      = gen_rtx_EXPR_LIST (REG_BR_PROB,
-				   GEN_INT (p), REG_NOTES (jump));
-
-      LABEL_NUSES (label)++;
-      branch_code = get_insns ();
-      end_sequence ();
+      branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
+					  block_label (preheader), p, NULL_RTX);

      swtch = loop_split_edge_with (swtch->succ, branch_code);
      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
@ -925,11 +974,45 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)

  free (wont_exit);

+  if (exit_at_end)
+    {
+      basic_block exit_block = desc->in_edge->src->rbi->copy;
+      /* Find a new in and out edge; they are in the last copy we have made.  */
+      
+      if (exit_block->succ->dest == desc->out_edge->dest)
+	{
+	  desc->out_edge = exit_block->succ;
+	  desc->in_edge = exit_block->succ->succ_next;
+	}
+      else
+	{
+	  desc->out_edge = exit_block->succ->succ_next;
+	  desc->in_edge = exit_block->succ;
+	}
+    }
+
  /* Remove the edges.  */
  for (i = 0; i < n_remove_edges; i++)
    remove_path (loops, remove_edges[i]);
  free (remove_edges);

+  /* We must be careful when updating the number of iterations due to
+     preconditioning and the fact that the value must be valid at entry
+     of the loop.  After passing through the above code, we see that
+     the correct new number of iterations is this:  */
+  if (desc->const_iter)
+    abort ();
+  desc->niter_expr =
+    simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
+  desc->niter_max /= max_unroll + 1;
+  if (exit_at_end)
+    {
+      desc->niter_expr =
+	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
+      desc->noloop_assumptions = NULL_RTX;
+      desc->niter_max--;
+    }
+
  if (rtl_dump_file)
    fprintf (rtl_dump_file,
 	     ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
@ -941,6 +1024,7 @@ static void
 decide_peel_simple (struct loop *loop, int flags)
 {
  unsigned npeel;
+  struct niter_desc *desc;

  if (!(flags & UAP_PEEL))
    {
@ -949,7 +1033,7 @@ decide_peel_simple (struct loop *loop, int flags)
    }

  if (rtl_dump_file)
-    fprintf (rtl_dump_file, ";; Considering simply peeling loop\n");
+    fprintf (rtl_dump_file, "\n;; Considering simply peeling loop\n");

  /* npeel = number of iterations to peel.  */
  npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
@ -965,14 +1049,10 @@ decide_peel_simple (struct loop *loop, int flags)
    }

  /* Check for simple loops.  */
-  if (!loop->has_desc)
-    {
-      loop->simple = simple_loop_p (loop, &loop->desc);
-      loop->has_desc = 1;
-    }
+  desc = get_simple_loop_desc (loop);

  /* Check number of iterations.  */
-  if (loop->simple && loop->desc.const_iter)
+  if (desc->simple_p && !desc->assumptions && desc->const_iter)
    {
      if (rtl_dump_file)
 	fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
@ -981,7 +1061,7 @@ decide_peel_simple (struct loop *loop, int flags)

  /* Do not simply peel loops with branches inside -- it increases number
     of mispredicts.  */
-  if (loop->desc.n_branches > 1)
+  if (num_loop_branches (loop) > 1)
    {
      if (rtl_dump_file)
 	fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
@ -1016,6 +1096,10 @@ decide_peel_simple (struct loop *loop, int flags)
  /* Success.  */
  loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
  loop->lpt_decision.times = npeel;
+      
+  if (rtl_dump_file)
+    fprintf (rtl_dump_file, ";; Decided to simply peel the loop, %d times.\n",
+	     loop->lpt_decision.times);
 }

 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
@ -1037,6 +1121,7 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
 {
  sbitmap wont_exit;
  unsigned npeel = loop->lpt_decision.times;
+  struct niter_desc *desc = get_simple_loop_desc (loop);

  wont_exit = sbitmap_alloc (npeel + 1);
  sbitmap_zero (wont_exit);
@ -1048,6 +1133,23 @@ peel_loop_simple (struct loops *loops, struct loop *loop)

  free (wont_exit);

+  if (desc->simple_p)
+    {
+      if (desc->const_iter)
+	{
+	  desc->niter -= npeel;
+	  desc->niter_expr = GEN_INT (desc->niter);
+	  desc->noloop_assumptions = NULL_RTX;
+	}
+      else
+	{
+	  /* We cannot just update niter_expr, as its value might be clobbered
+	     inside loop.  We could handle this by counting the number into
+	     temporary just like we do in runtime unrolling, but it does not
+	     seem worthwhile.  */
+	  free_simple_loop_desc (loop);
+	}
+    }
  if (rtl_dump_file)
    fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel);
 }
@ -1057,6 +1159,7 @@ static void
 decide_unroll_stupid (struct loop *loop, int flags)
 {
  unsigned nunroll, nunroll_by_av, i;
+  struct niter_desc *desc;

  if (!(flags & UAP_UNROLL_ALL))
    {
@ -1065,7 +1168,7 @@ decide_unroll_stupid (struct loop *loop, int flags)
    }

  if (rtl_dump_file)
-    fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n");
+    fprintf (rtl_dump_file, "\n;; Considering unrolling loop stupidly\n");

  /* nunroll = total number of copies of the original loop body in
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
@ -1085,14 +1188,10 @@ decide_unroll_stupid (struct loop *loop, int flags)
    }

  /* Check for simple loops.  */
-  if (!loop->has_desc)
-    {
-      loop->simple = simple_loop_p (loop, &loop->desc);
-      loop->has_desc = 1;
-    }
+  desc = get_simple_loop_desc (loop);

  /* Check simpleness.  */
-  if (loop->simple)
+  if (desc->simple_p && !desc->assumptions)
    {
      if (rtl_dump_file)
 	fprintf (rtl_dump_file, ";; The loop is simple\n");
@ -1101,7 +1200,7 @@ decide_unroll_stupid (struct loop *loop, int flags)

  /* Do not unroll loops with branches inside -- it increases number
     of mispredicts.  */
-  if (loop->desc.n_branches > 1)
+  if (num_loop_branches (loop) > 1)
    {
      if (rtl_dump_file)
 	fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
@ -1109,7 +1208,8 @@ decide_unroll_stupid (struct loop *loop, int flags)
    }

  /* If we have profile feedback, check whether the loop rolls.  */
-  if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
+  if (loop->header->count
+      && expected_loop_iterations (loop) < 2 * nunroll)
    {
      if (rtl_dump_file)
 	fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
@ -1119,10 +1219,16 @@ decide_unroll_stupid (struct loop *loop, int flags)
  /* Success.  Now force nunroll to be power of 2, as it seems that this
     improves results (partially because of better alignments, partially
     because of some dark magic).  */
-  for (i = 1; 2 * i <= nunroll; i *= 2);
+  for (i = 1; 2 * i <= nunroll; i *= 2)
+    continue;

  loop->lpt_decision.decision = LPT_UNROLL_STUPID;
  loop->lpt_decision.times = i - 1;
+      
+  if (rtl_dump_file)
+    fprintf (rtl_dump_file,
+	     ";; Decided to unroll the loop stupidly, %d times.\n",
+	     loop->lpt_decision.times);
 }

 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
@ -1147,6 +1253,7 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
 {
  sbitmap wont_exit;
  unsigned nunroll = loop->lpt_decision.times;
+  struct niter_desc *desc = get_simple_loop_desc (loop);

  wont_exit = sbitmap_alloc (nunroll + 1);
  sbitmap_zero (wont_exit);
@ -1158,6 +1265,17 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)

  free (wont_exit);

+  if (desc->simple_p)
+    {
+      /* We indeed may get here provided that there are nontrivial assumptions
+	 for a loop to be really simple.  We could update the counts, but the
+	 problem is that we are unable to decide which exit will be taken
+	 (not really true in case the number of iterations is constant,
+	 but noone will do anything with this information, so we do not
+	 worry about it).  */
+      desc->simple_p = false;
+    }
+
  if (rtl_dump_file)
    fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n",
 	     nunroll, num_loop_insns (loop));
--- a/gcc/loop-unswitch.c
+++ b/gcc/loop-unswitch.c
@ -79,11 +79,63 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  with handling this case.  */

 static struct loop *unswitch_loop (struct loops *, struct loop *,
-				   basic_block);
+				   basic_block, rtx, rtx);
 static void unswitch_single_loop (struct loops *, struct loop *, rtx, int);
-static bool may_unswitch_on_p (basic_block, struct loop *,
-			       basic_block *);
-static rtx reversed_condition (rtx);
+static rtx may_unswitch_on (basic_block, struct loop *, rtx *);
+
+/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
+   true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
+   in order to create a jump.  */
+
+rtx
+compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
+		      rtx cinsn)
+{
+  rtx seq, jump, cond;
+  enum machine_mode mode;
+
+  mode = GET_MODE (op0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (op1);
+
+  start_sequence ();
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    {
+      /* A hack -- there seems to be no easy generic way how to make a
+	 conditional jump from a ccmode comparison.  */
+      if (!cinsn)
+	abort ();
+      cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
+      if (GET_CODE (cond) != comp
+	  || !rtx_equal_p (op0, XEXP (cond, 0))
+	  || !rtx_equal_p (op1, XEXP (cond, 1)))
+	abort ();
+      emit_jump_insn (copy_insn (PATTERN (cinsn)));
+      jump = get_last_insn ();
+      JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
+      LABEL_NUSES (JUMP_LABEL (jump))++;
+      redirect_jump (jump, label, 0);
+    }
+  else
+    {
+      if (cinsn)
+	abort ();
+
+      op0 = force_operand (op0, NULL_RTX);
+      op1 = force_operand (op1, NULL_RTX);
+      do_compare_rtx_and_jump (op0, op1, comp, 0,
+			       mode, NULL_RTX, NULL_RTX, label);
+      jump = get_last_insn ();
+      JUMP_LABEL (jump) = label;
+      LABEL_NUSES (label)++;
+    }
+  REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
+					REG_NOTES (jump));
+  seq = get_insns ();
+  end_sequence ();
+
+  return seq;
+}

 /* Main entry point.  Perform loop unswitching on all suitable LOOPS.  */
 void
@ -111,48 +163,82 @@ unswitch_loops (struct loops *loops)
      verify_loop_structure (loops);
 #endif
    }
+
+  iv_analysis_done ();
 }

 /* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
-   basic blocks (for what it means see comments below).  List of basic blocks
-   inside LOOP is provided in BODY to save time.  */
-static bool
-may_unswitch_on_p (basic_block bb, struct loop *loop, basic_block *body)
+   basic blocks (for what it means see comments below).  In case condition
+   compares loop invariant cc mode register, return the jump in CINSN.  */
+
+static rtx
+may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
 {
-  rtx test;
+  rtx test, at, insn, op[2];
+  struct rtx_iv iv;
  unsigned i;
+  enum machine_mode mode;

  /* BB must end in a simple conditional jump.  */
  if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next)
-    return false;
+    return NULL_RTX;
  if (!any_condjump_p (BB_END (bb)))
-    return false;
+    return NULL_RTX;

  /* With branches inside loop.  */
  if (!flow_bb_inside_loop_p (loop, bb->succ->dest)
      || !flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest))
-    return false;
+    return NULL_RTX;

  /* It must be executed just once each iteration (because otherwise we
     are unable to update dominator/irreducible loop information correctly).  */
  if (!just_once_each_iteration_p (loop, bb))
-    return false;
+    return NULL_RTX;

-  /* Condition must be invariant.  We use just a stupid test of invariantness
-     of the condition: all used regs must not be modified inside loop body.  */
-  test = get_condition (BB_END (bb), NULL, true);
+  /* Condition must be invariant.  */
+  test = get_condition (BB_END (bb), &at, true);
  if (!test)
-    return false;
+    return NULL_RTX;

-  for (i = 0; i < loop->num_nodes; i++)
-    if (modified_between_p (test, BB_HEAD (body[i]), NEXT_INSN (BB_END (body[i]))))
-      return false;
+  for (i = 0; i < 2; i++)
+    {
+      op[i] = XEXP (test, i);

-  return true;
+      if (CONSTANT_P (op[i]))
+	continue;
+
+      insn = iv_get_reaching_def (at, op[i]);
+      if (!iv_analyse (insn, op[i], &iv))
+	return NULL_RTX;
+      if (iv.step != const0_rtx
+	  || iv.first_special)
+	return NULL_RTX;
+
+      op[i] = get_iv_value (&iv, const0_rtx);
+    }
+
+  mode = GET_MODE (op[0]);
+  if (mode == VOIDmode)
+    mode = GET_MODE (op[1]);
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    {
+      if (at != BB_END (bb))
+	return NULL_RTX;
+
+      *cinsn = BB_END (bb);
+      if (!rtx_equal_p (op[0], XEXP (test, 0))
+	  || !rtx_equal_p (op[1], XEXP (test, 1)))
+	return NULL_RTX;
+
+      return test;
+    }
+
+  return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
+					  op[0], op[1]));
 }

 /* Reverses CONDition; returns NULL if we cannot.  */
-static rtx
+rtx
 reversed_condition (rtx cond)
 {
  enum rtx_code reversed;
@ -173,13 +259,10 @@ static void
 unswitch_single_loop (struct loops *loops, struct loop *loop,
 		      rtx cond_checked, int num)
 {
-  basic_block *bbs, bb;
+  basic_block *bbs;
  struct loop *nloop;
  unsigned i;
-  int true_first;
-  rtx cond, rcond, conds, rconds, acond, split_before;
-  int always_true;
-  int always_false;
+  rtx cond, rcond, conds, rconds, acond, cinsn = NULL_RTX;
  int repeat;
  edge e;

@ -237,8 +320,9 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,

      /* Find a bb to unswitch on.  */
      bbs = get_loop_body (loop);
+      iv_analysis_loop_init (loop);
      for (i = 0; i < loop->num_nodes; i++)
-	if (may_unswitch_on_p (bbs[i], loop, bbs))
+	if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
 	  break;

      if (i == loop->num_nodes)
@ -247,39 +331,26 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
 	  return;
 	}

-      if (!(cond = get_condition (BB_END (bbs[i]), &split_before, true)))
-	abort ();
      rcond = reversed_condition (cond);
+      if (rcond)
+	rcond = canon_condition (rcond);

      /* Check whether the result can be predicted.  */
-      always_true = 0;
-      always_false = 0;
      for (acond = cond_checked; acond; acond = XEXP (acond, 1))
-	{
-	  if (rtx_equal_p (cond, XEXP (acond, 0)))
-	    {
-	      always_true = 1;
-	      break;
-	    }
-	  if (rtx_equal_p (rcond, XEXP (acond, 0)))
-	    {
-	      always_false = 1;
-	      break;
-	    }
-	}
+	simplify_using_condition (XEXP (acond, 0), &cond, NULL);

-      if (always_true)
+      if (cond == const_true_rtx)
 	{
 	  /* Remove false path.  */
-	  for (e = bbs[i]->succ; !(e->flags & EDGE_FALLTHRU); e = e->succ_next);
+	  e = FALLTHRU_EDGE (bbs[i]);
 	  remove_path (loops, e);
 	  free (bbs);
 	  repeat = 1;
 	}
-      else if (always_false)
+      else if (cond == const0_rtx)
 	{
 	  /* Remove true path.  */
-	  for (e = bbs[i]->succ; e->flags & EDGE_FALLTHRU; e = e->succ_next);
+	  e = BRANCH_EDGE (bbs[i]);
 	  remove_path (loops, e);
 	  free (bbs);
 	  repeat = 1;
@ -293,21 +364,17 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
  else
    rconds = cond_checked;

-  /* Separate condition in a single basic block.  */
-  bb = split_loop_bb (bbs[i], PREV_INSN (split_before))->dest;
-  free (bbs);
-  true_first = !(bb->succ->flags & EDGE_FALLTHRU);
  if (rtl_dump_file)
    fprintf (rtl_dump_file, ";; Unswitching loop\n");

  /* Unswitch the loop on this condition.  */
-  nloop = unswitch_loop (loops, loop, bb);
+  nloop = unswitch_loop (loops, loop, bbs[i], cond, cinsn);
  if (!nloop)
  abort ();

  /* Invoke itself on modified loops.  */
-  unswitch_single_loop (loops, nloop, true_first ? conds : rconds, num + 1);
-  unswitch_single_loop (loops, loop, true_first ? rconds : conds, num + 1);
+  unswitch_single_loop (loops, nloop, rconds, num + 1);
+  unswitch_single_loop (loops, loop, conds, num + 1);

  free_EXPR_LIST_node (conds);
  if (rcond)
@ -316,17 +383,21 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,

 /* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON.  We only support
   unswitching of innermost loops.  UNSWITCH_ON must be executed in every
-   iteration, i.e. it must dominate LOOP latch, and should only contain code
-   for the condition we unswitch on.  Returns NULL if impossible, new
-   loop otherwise.  */
+   iteration, i.e. it must dominate LOOP latch.  COND is the condition
+   determining which loop is entered.  Returns NULL if impossible, new loop
+   otherwise.  The new loop is entered if COND is true.  If CINSN is not
+   NULL, it is the insn in that COND is compared.  */
+
 static struct loop *
-unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
+unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on,
+	       rtx cond, rtx cinsn)
 {
-  edge entry, latch_edge;
+  edge entry, latch_edge, true_edge, false_edge, e;
  basic_block switch_bb, unswitch_on_alt, src;
  struct loop *nloop;
  sbitmap zero_bitmap;
-  int irred_flag;
+  int irred_flag, prob;
+  rtx seq;

  /* Some sanity checking.  */
  if (!flow_bb_inside_loop_p (loop, unswitch_on))
@ -343,12 +414,6 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
  if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->succ_next->dest))
    abort ();

-  /* Will we be able to perform redirection?  */
-  if (!any_condjump_p (BB_END (unswitch_on)))
-    return NULL;
-  if (!cfg_layout_can_duplicate_bb_p (unswitch_on))
-    return NULL;
-
  entry = loop_preheader_edge (loop);

  /* Make a copy.  */
@ -365,10 +430,24 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)

  /* Record the block with condition we unswitch on.  */
  unswitch_on_alt = unswitch_on->rbi->copy;
+  true_edge = BRANCH_EDGE (unswitch_on_alt);
+  false_edge = FALLTHRU_EDGE (unswitch_on);
+  latch_edge = loop->latch->rbi->copy->succ;
+
+  /* Create a block with the condition.  */
+  prob = true_edge->probability;
+  switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
+  seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
+			      block_label (true_edge->dest),
+			      prob, cinsn);
+  emit_insn_after (seq, BB_END (switch_bb));
+  e = make_edge (switch_bb, true_edge->dest, 0);
+  e->probability = prob;
+  e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
+  e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
+  e->probability = false_edge->probability;
+  e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;

-  /* Make a copy of the block containing the condition; we will use
-     it as switch to decide which loop we want to use.  */
-  switch_bb = cfg_layout_duplicate_bb (unswitch_on, NULL);
  if (irred_flag)
    {
      switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
@ -381,19 +460,14 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
      switch_bb->succ->flags &= ~EDGE_IRREDUCIBLE_LOOP;
      switch_bb->succ->succ_next->flags &= ~EDGE_IRREDUCIBLE_LOOP;
    }
-  unswitch_on->rbi->copy = unswitch_on_alt;

  /* Loopify from the copy of LOOP body, constructing the new loop.  */
-  for (latch_edge = loop->latch->rbi->copy->succ;
-       latch_edge->dest != loop->header;
-       latch_edge = latch_edge->succ_next);
  nloop = loopify (loops, latch_edge,
 		   loop->header->rbi->copy->pred, switch_bb);

-  /* Remove branches that are now unreachable in new loops.  We rely on the
-     fact that cfg_layout_duplicate_bb reverses list of edges.  */
-  remove_path (loops, unswitch_on->succ);
-  remove_path (loops, unswitch_on_alt->succ);
+  /* Remove branches that are now unreachable in new loops.  */
+  remove_path (loops, true_edge);
+  remove_path (loops, false_edge);

  /* One of created loops do not have to be subloop of the outer loop now,
     so fix its placement in loop data structure.  */
--- a/gcc/predict.c
+++ b/gcc/predict.c
@ -406,13 +406,16 @@ estimate_probability (struct loops *loops_info)
      unsigned j;
      int exits;
      struct loop *loop = loops_info->parray[i];
-      struct loop_desc desc;
+      struct niter_desc desc;
      unsigned HOST_WIDE_INT niter;

      flow_loop_scan (loop, LOOP_EXIT_EDGES);
      exits = loop->num_exits;

-      if (simple_loop_p (loop, &desc) && desc.const_iter)
+      iv_analysis_loop_init (loop);
+      find_simple_exit (loop, &desc);
+
+      if (desc.simple_p && desc.const_iter)
 	{
 	  int prob;
 	  niter = desc.niter + 1;
@ -472,6 +475,8 @@ estimate_probability (struct loops *loops_info)
      free (bbs);
    }

+  iv_analysis_done ();
+
  /* Attempt to predict conditional jumps using a number of heuristics.  */
  FOR_EACH_BB (bb)
    {
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@ -2361,4 +2361,15 @@ extern void tracer (void);
 /* In var-tracking.c */
 extern void variable_tracking_main (void);

+/* In stor-layout.c.  */
+extern void get_mode_bounds (enum machine_mode, int, rtx *, rtx *);
+
+/* In loop-unswitch.c  */
+extern rtx reversed_condition (rtx);
+extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx);
+
+/* In loop-iv.c  */
+extern rtx canon_condition (rtx);
+extern void simplify_using_condition (rtx, rtx *, struct bitmap_head_def *);
+
 #endif /* ! GCC_RTL_H */
--- a/gcc/stor-layout.c
+++ b/gcc/stor-layout.c
@ -2118,4 +2118,27 @@ get_best_mode (int bitsize, int bitpos, unsigned int align,
  return mode;
 }

+/* Gets minimal and maximal values for MODE (signed or unsigned depending on
+   SIGN).  */
+
+void
+get_mode_bounds (enum machine_mode mode, int sign, rtx *mmin, rtx *mmax)
+{
+  int size = GET_MODE_BITSIZE (mode);
+
+  if (size > HOST_BITS_PER_WIDE_INT)
+    abort ();
+
+  if (sign)
+    {
+      *mmin = GEN_INT (-((unsigned HOST_WIDE_INT) 1 << (size - 1)));
+      *mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1)) - 1);
+    }
+  else
+    {
+      *mmin = const0_rtx;
+      *mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1) << 1) - 1);
+    }
+}
+
 #include "gt-stor-layout.h"
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@ -3034,11 +3034,16 @@ static void
 rest_of_handle_loop2 (tree decl, rtx insns)
 {
  struct loops *loops;
+  basic_block bb;
+
  timevar_push (TV_LOOP);
  open_dump_file (DFI_loop2, decl);
  if (rtl_dump_file)
    dump_flow_info (rtl_dump_file);

+  /* Initialize structures for layout changes.  */
+  cfg_layout_initialize ();
+
  loops = loop_optimizer_init (rtl_dump_file);

  if (loops)
@ -3056,6 +3061,12 @@ rest_of_handle_loop2 (tree decl, rtx insns)
      loop_optimizer_finalize (loops, rtl_dump_file);
    }

+  /* Finalize layout changes.  */
+  FOR_EACH_BB (bb)
+    if (bb->next_bb != EXIT_BLOCK_PTR)
+      bb->rbi->next = bb->next_bb;
+  cfg_layout_finalize ();
+
  cleanup_cfg (CLEANUP_EXPENSIVE);
  delete_trivially_dead_insns (insns, max_reg_num ());
  reg_scan (insns, max_reg_num (), 0);