loop-invariant.c: New file.

* loop-invariant.c: New file. * Makefile.in (loop-invariant.o): New. * cfgloop.h (global_cost_for_size, init_set_costs, move_loop_invariants): Declare. * cfgloopanal.c (seq_cost, init_set_costs, global_cost_for_size): New functions. (avail_regs, res_regs, small_cost, pres_cost, spill_cost): New variables. * common.opt (floop-optimize2, fmove-loop-invariants): New options. * loop-init.c (loop_optimizer_init): Call init_set_costs. * passes.c (rest_of_handle_loop2): Call move_loop_invariants. (rest_of_compilation): Check flag_loop_optimize2. * toplev.c (process_options): Handle flag_loop_optimize2. * doc/invoke.texi (-floop-optimize2, -fmove-loop-invariants): Document. * doc/passes.texi (loop-invariant.c): Document. From-SVN: r83419
2004-06-20 23:31:32 +02:00 · 2004-06-20 23:31:32 +02:00 · 5e96277660
commit 5e96277660
parent 2e24fa83ab
11 changed files with 1101 additions and 4 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,21 @@
+2004-06-20  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>
+
+	* loop-invariant.c: New file.
+	* Makefile.in (loop-invariant.o): New.
+	* cfgloop.h (global_cost_for_size, init_set_costs,
+	move_loop_invariants): Declare.
+	* cfgloopanal.c (seq_cost, init_set_costs, global_cost_for_size): New
+	functions.
+	(avail_regs, res_regs, small_cost, pres_cost, spill_cost): New
+	variables.
+	* common.opt (floop-optimize2, fmove-loop-invariants): New options.
+	* loop-init.c (loop_optimizer_init): Call init_set_costs.
+	* passes.c (rest_of_handle_loop2): Call move_loop_invariants.
+	(rest_of_compilation): Check flag_loop_optimize2.
+	* toplev.c (process_options): Handle flag_loop_optimize2.
+	* doc/invoke.texi (-floop-optimize2, -fmove-loop-invariants): Document.
+	* doc/passes.texi (loop-invariant.c): Document.
+
 2004-06-20  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>

 	* tree-ssa-pre.c (compute_antic): Keep BB_VISITED flag zeroed.
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@ -897,7 +897,7 @@ OBJS-common = \
 cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
 cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o	   \
 cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o 	   \
- dbxout.o ddg.o								   \
+ dbxout.o ddg.o loop-invariant.o					   \
 debug.o df.o diagnostic.o dojump.o dominance.o loop-doloop.o		   \
 dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o		   \
 expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o		   \
@ -1952,6 +1952,9 @@ cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
   $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(EXPR_H) coretypes.h $(TM_H)
 loop-iv.o : loop-iv.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \
   $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(EXPR_H) coretypes.h $(TM_H)
+loop-invariant.o : loop-invariant.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \
+   $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H) \
+   function.h flags.h df.h
 cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
   $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(CFGLAYOUT_H) output.h coretypes.h $(TM_H)
 loop-init.o : loop-init.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@ -411,6 +411,11 @@ simple_loop_desc (struct loop *loop)
  return loop->aux;
 }

+/* Register pressure estimation for induction variable optimizations & loop
+   invariant motion.  */
+extern unsigned global_cost_for_size (unsigned, unsigned, unsigned);
+extern void init_set_costs (void);
+
 /* Loop optimizer initialization.  */
 extern struct loops *loop_optimizer_init (FILE *);
 extern void loop_optimizer_finalize (struct loops *, FILE *);
@ -427,5 +432,6 @@ enum

 extern void unroll_and_peel_loops (struct loops *, int);
 extern void doloop_optimize_loops (struct loops *);
+extern void move_loop_invariants (struct loops *);

 #endif /* GCC_CFGLOOP_H */
--- a/gcc/cfgloopanal.c
+++ b/gcc/cfgloopanal.c
@ -474,3 +474,92 @@ get_loop_level (const struct loop *loop)
    }
  return mx;
 }
+
+/* Returns estimate on cost of computing SEQ.  */
+
+static unsigned
+seq_cost (rtx seq)
+{
+  unsigned cost = 0;
+  rtx set;
+
+  for (; seq; seq = NEXT_INSN (seq))
+    {
+      set = single_set (seq);
+      if (set)
+	cost += rtx_cost (set, SET);
+      else
+	cost++;
+    }
+
+  return cost;
+}
+
+/* The properties of the target.  */
+
+static unsigned avail_regs;	/* Number of available registers.  */
+static unsigned res_regs;	/* Number of reserved registers.  */
+static unsigned small_cost;	/* The cost for register when there is a free one.  */
+static unsigned pres_cost;	/* The cost for register when there are not too many
+				   free ones.  */
+static unsigned spill_cost;	/* The cost for register when we need to spill.  */
+
+/* Initialize the constants for computing set costs.  */
+
+void
+init_set_costs (void)
+{
+  rtx seq;
+  rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
+  rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
+  rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
+  rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
+  unsigned i;
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
+	&& !fixed_regs[i])
+      avail_regs++;
+
+  res_regs = 3;
+
+  /* These are really just heuristic values.  */
+  
+  start_sequence ();
+  emit_move_insn (reg1, reg2);
+  seq = get_insns ();
+  end_sequence ();
+  small_cost = seq_cost (seq);
+  pres_cost = 2 * small_cost;
+
+  start_sequence ();
+  emit_move_insn (mem, reg1);
+  emit_move_insn (reg2, mem);
+  seq = get_insns ();
+  end_sequence ();
+  spill_cost = seq_cost (seq);
+}
+
+/* Calculates cost for having SIZE new loop global variables.  REGS_USED is the
+   number of global registers used in loop.  N_USES is the number of relevant
+   variable uses.  */
+
+unsigned
+global_cost_for_size (unsigned size, unsigned regs_used, unsigned n_uses)
+{
+  unsigned regs_needed = regs_used + size;
+  unsigned cost = 0;
+
+  if (regs_needed + res_regs <= avail_regs)
+    cost += small_cost * size;
+  else if (regs_needed <= avail_regs)
+    cost += pres_cost * size;
+  else
+    {
+      cost += pres_cost * size;
+      cost += spill_cost * n_uses * (regs_needed - avail_regs) / regs_needed;
+    }
+
+  return cost;
+}
+
--- a/gcc/common.opt
+++ b/gcc/common.opt
@ -447,6 +447,10 @@ floop-optimize
 Common Report Var(flag_loop_optimize)
 Perform loop optimizations

+floop-optimize2
+Common Report Var(flag_loop_optimize2)
+Perform loop optimizations using the new loop optimizer
+
 fmath-errno
 Common Report Var(flag_errno_math) Init(1)
 Set errno after built-in math functions
@ -475,6 +479,10 @@ fmove-all-movables
 Common Report Var(flag_move_all_movables)
 Force all loop invariant computations out of loops

+fmove-loop-invariants
+Common Report Var(flag_move_loop_invariants)
+Move loop invariant computations out of loops
+
 fmudflap
 Common RejectNegative Report Var(flag_mudflap)
 Add mudflap bounds-checking instrumentation for single-threaded program.
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@ -290,7 +290,7 @@ in the following sections.
 -finline-functions  -finline-limit=@var{n}  -fkeep-inline-functions @gol
 -fkeep-static-consts  -fmerge-constants  -fmerge-all-constants @gol
 -fmodulo-sched -fmove-all-movables  -fnew-ra  -fno-branch-count-reg @gol
-fno-default-inline  -fno-defer-pop @gol
+-fno-default-inline  -fno-defer-pop -floop-optimize2 -fmove-loop-invariants @gol
 -fno-function-cse  -fno-guess-branch-probability @gol
 -fno-inline  -fno-math-errno  -fno-peephole  -fno-peephole2 @gol
 -funsafe-math-optimizations  -ffinite-math-only @gol
@ -4191,6 +4191,12 @@ well.

 Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}.

+@item -floop-optimize2
+@opindex floop-optimize2
+Perform loop optimizations using the new loop optimizer.  The optimizations
+(loop unrolling, peeling and unswitching, loop invariant motion) are enabled
+by separate flags.
+
@item -fcrossjumping
@opindex crossjumping
 Perform cross-jumping transformation. This transformation unifies equivalent code and save code size. The
@ -4922,6 +4928,11 @@ roll much (from profile feedback).  It also turns on complete loop peeling

 Enabled with @option{-fprofile-use}.

+@item -fmove-loop-invariants
+@opindex fmove-loop-invariants
+Enables the loop invariant motion pass in the new loop optimizer.  Enabled
+at level @option{-O1}
+
@item -funswitch-loops
@opindex funswitch-loops
 Move branches with loop invariant conditions out of the loop, with duplicates
--- a/gcc/doc/passes.texi
+++ b/gcc/doc/passes.texi
@ -520,6 +520,8 @@ Its source files are @file{loop.c} and @file{unroll.c}, plus the header
@file{loop.h} used for communication between them.  Loop unrolling uses
 some functions in @file{integrate.c} and the header @file{integrate.h}.
 Loop dependency analysis routines are contained in @file{dependence.c}.
+This pass is seriously out-of-date and is supposed to be replaced by
+a new one described below in near future.

 A second loop optimization pass takes care of basic block level
 optimizations---unrolling, peeling and unswitching loops. The source
@ -527,6 +529,8 @@ files are @file{cfgloopanal.c} and @file{cfgloopmanip.c} containing
 generic loop analysis and manipulation code, @file{loop-init.c} with
 initialization and finalization code, @file{loop-unswitch.c} for loop
 unswitching and @file{loop-unroll.c} for loop unrolling and peeling.
+It also contains a separate loop invariant motion pass implemented in
+@file{loop-invariant.c}.

@item Jump bypassing

--- a/gcc/loop-init.c
+++ b/gcc/loop-init.c
@ -35,6 +35,13 @@ loop_optimizer_init (FILE *dumpfile)
 {
  struct loops *loops = xcalloc (1, sizeof (struct loops));
  edge e;
+  static bool first_time = true;
+
+  if (first_time)
+    {
+      first_time = false;
+      init_set_costs ();
+    }

  /* Avoid annoying special cases of edges going to exit
     block.  */
--- a/gcc/loop-invariant.c
+++ b/gcc/loop-invariant.c
@ -0,0 +1,933 @@
+/* Rtl-level loop invariant motion.
+   Copyright (C) 2004 Free Software Foundation, Inc.
+   
+This file is part of GCC.
+   
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+   
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+   
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.  */
+
+/* This implements the loop invariant motion pass.  It is very simple
+   (no calls, libcalls, etc.).  This should be sufficient to cleanup things like
+   address arithmetics -- other more complicated invariants should be
+   eliminated on tree level either in tree-ssa-loop-im.c or in tree-ssa-pre.c.
+   
+   We proceed loop by loop -- it is simpler than trying to handle things
+   globally and should not lose much.  First we inspect all sets inside loop
+   and create a dependency graph on insns (saying "to move this insn, you must
+   also move the following insns").
+
+   We then need to determine what to move.  We estimate the number of registers
+   used and move as many invariants as possible while we still have enough free
+   registers.  We prefer the expensive invariants.
+   
+   Then we move the selected invariants out of the loop, creating a new
+   temporaries for them if necessary.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "cfgloop.h"
+#include "expr.h"
+#include "output.h"
+#include "function.h"
+#include "flags.h"
+#include "df.h"
+
+/* The data stored for the loop.  */
+
+struct loop_data
+{
+  struct loop *outermost_exit;	/* The outermost exit of the loop.  */
+  bool has_call;		/* True if the loop contains a call.  */
+};
+
+#define LOOP_DATA(LOOP) ((struct loop_data *) (LOOP)->aux)
+
+/* The description of an use.  */
+
+struct use
+{
+  rtx *pos;			/* Position of the use.  */
+  rtx insn;			/* The insn in that the use occurs.  */
+
+  struct use *next;		/* Next use in the list.  */
+};
+
+/* The description of a def.  */
+
+struct def
+{
+  struct use *uses;		/* The list of uses that are uniquely reached
+				   by it.  */
+  unsigned n_uses;		/* Number of such uses.  */
+  unsigned invno;		/* The corresponding invariant.  */
+};
+
+/* The data stored for each invariant.  */
+
+struct invariant
+{
+  /* The number of the invariant.  */
+  unsigned invno;
+
+  /* Whether we already processed the invariant.  */
+  bool processed;
+
+  /* The definition of the invariant.  */
+  struct def *def;
+
+  /* The insn in that it is defined.  */
+  rtx insn;
+
+  /* Whether it is always executed.  */
+  bool always_executed;
+
+  /* Whether to move the invariant.  */
+  bool move;
+
+  /* Cost if the invariant.  */
+  unsigned cost;
+
+  /* The invariants it depends on.  */
+  bitmap depends_on;
+
+  /* Used for detecting already visited invariants during determining
+     costs of movements.  */
+  unsigned stamp;
+};
+
+/* The actual stamp for marking already visited invariants during determining
+   costs of movements.  */
+
+static unsigned actual_stamp;
+
+/* The invariants.  */
+
+static varray_type invariants;
+
+/* Test for possibility of invariantness of X.  */
+
+static bool
+check_maybe_invariant (rtx x)
+{
+  enum rtx_code code = GET_CODE (x);
+  int i, j;
+  const char *fmt;
+
+  switch (code)
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case SYMBOL_REF:
+    case CONST:
+    case LABEL_REF:
+      return true;
+
+    case PC:
+    case CC0:
+    case UNSPEC_VOLATILE:
+    case CALL:
+      return false;
+
+    case REG:
+      return true;
+
+    case MEM:
+      /* Load/store motion is done elsewhere.  ??? Perhaps also add it here?
+	 It should not be hard, and might be faster than "elsewhere".  */
+
+      /* Just handle the most trivial case where we load from an unchanging
+	 location (most importantly, pic tables).  */
+      if (RTX_UNCHANGING_P (x))
+	break;
+
+      return false;
+
+    case ASM_OPERANDS:
+      /* Don't mess with insns declared volatile.  */
+      if (MEM_VOLATILE_P (x))
+	return false;
+      break;
+
+    default:
+      break;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	{
+	  if (!check_maybe_invariant (XEXP (x, i)))
+	    return false;
+	}
+      else if (fmt[i] == 'E')
+	{
+	  for (j = 0; j < XVECLEN (x, i); j++)
+	    if (!check_maybe_invariant (XVECEXP (x, i, j)))
+	      return false;
+	}
+    }
+
+  return true;
+}
+
+/* Determines the basic blocks inside LOOP that are always executed and
+   stores their bitmap to ALWAYS_REACHED.  MAY_EXIT is a bitmap of
+   basic blocks that may either exit the loop, or contain the call that
+   does not have to return.  BODY is body of the loop obtained by
+   get_loop_body_in_dom_order.  */
+
+static void
+compute_always_reached (struct loop *loop, basic_block *body,
+			bitmap may_exit, bitmap always_reached)
+{
+  unsigned i;
+
+  for (i = 0; i < loop->num_nodes; i++)
+    {
+      if (dominated_by_p (CDI_DOMINATORS, loop->latch, body[i]))
+	bitmap_set_bit (always_reached, i);
+
+      if (bitmap_bit_p (may_exit, i))
+	return;
+    }
+}
+
+/* Finds exits out of the LOOP with body BODY.  Marks blocks in that we may
+   exit the loop by cfg edge to HAS_EXIT and MAY_EXIT.  In MAY_EXIT
+   additionally mark blocks that may exit due to a call.  */
+
+static void
+find_exits (struct loop *loop, basic_block *body,
+	    bitmap may_exit, bitmap has_exit)
+{
+  unsigned i;
+  edge e;
+  struct loop *outermost_exit = loop, *aexit;
+  bool has_call = false;
+  rtx insn;
+
+  for (i = 0; i < loop->num_nodes; i++)
+    {
+      if (body[i]->loop_father == loop)
+	{
+	  FOR_BB_INSNS (body[i], insn)
+	    {
+	      if (GET_CODE (insn) == CALL_INSN
+		  && !CONST_OR_PURE_CALL_P (insn))
+		{
+		  has_call = true;
+		  bitmap_set_bit (may_exit, i);
+		  break;
+		}
+	    }
+
+	  for (e = body[i]->succ; e; e = e->succ_next)
+	    {
+	      if (flow_bb_inside_loop_p (loop, e->dest))
+		continue;
+
+	      bitmap_set_bit (may_exit, i);
+	      bitmap_set_bit (has_exit, i);
+	      outermost_exit = find_common_loop (outermost_exit,
+						 e->dest->loop_father);
+	    }
+	  continue;
+	}
+     
+      /* Use the data stored for the subloop to decide whether we may exit
+	 through it.  It is sufficient to do this for header of the loop,
+	 as other basic blocks inside it must be dominated by it.  */
+      if (body[i]->loop_father->header != body[i])
+	continue;
+
+      if (LOOP_DATA (body[i]->loop_father)->has_call)
+	{
+	  has_call = true;
+	  bitmap_set_bit (may_exit, i);
+	}
+      aexit = LOOP_DATA (body[i]->loop_father)->outermost_exit;
+      if (aexit != loop)
+	{
+	  bitmap_set_bit (may_exit, i);
+	  bitmap_set_bit (has_exit, i);
+
+	  if (flow_loop_nested_p (aexit, outermost_exit))
+	    outermost_exit = aexit;
+	}
+    }
+
+  loop->aux = xcalloc (1, sizeof (struct loop_data));
+  LOOP_DATA (loop)->outermost_exit = outermost_exit;
+  LOOP_DATA (loop)->has_call = has_call;
+}
+
+/* Check whether we may assign a value to X from a register.  */
+
+static bool
+may_assign_reg_p (rtx x)
+{
+  return can_copy_p (GET_MODE (x));
+}
+
+/* Finds definitions that may correspond to invariants in LOOP with body BODY.
+   DF is the dataflow object.  */
+
+static void
+find_defs (struct loop *loop, basic_block *body, struct df *df)
+{
+  unsigned i;
+  bitmap blocks = BITMAP_XMALLOC ();
+
+  for (i = 0; i < loop->num_nodes; i++)
+    bitmap_set_bit (blocks, body[i]->index);
+
+  df_analyze_subcfg (df, blocks, DF_UD_CHAIN | DF_HARD_REGS | DF_EQUIV_NOTES);
+  BITMAP_XFREE (blocks);
+}
+
+/* Creates a new invariant for definition DEF in INSN, depending on invariants
+   in DEPENDS_ON.  ALWAYS_EXECUTED is true if the insn is always executed,
+   unless the program ends due to a function call.  */
+
+static void
+create_new_invariant (struct def *def, rtx insn, bitmap depends_on,
+		      bool always_executed)
+{
+  struct invariant *inv = xmalloc (sizeof (struct invariant));
+  rtx set = single_set (insn);
+
+  inv->def = def;
+  inv->always_executed = always_executed;
+  inv->depends_on = depends_on;
+
+  /* If the set is simple, usually by moving it we move the whole store out of
+     the loop.  Otherwise we save only cost of the computation.  */
+  if (def)
+    inv->cost = rtx_cost (set, SET);
+  else
+    inv->cost = rtx_cost (SET_SRC (set), SET);
+
+  inv->move = false;
+  inv->processed = false;
+  inv->stamp = 0;
+  inv->insn = insn;
+
+  inv->invno = VARRAY_ACTIVE_SIZE (invariants);
+  if (def)
+    def->invno = inv->invno;
+  VARRAY_PUSH_GENERIC_PTR_NOGC (invariants, inv);
+
+  if (dump_file)
+    {
+      fprintf (dump_file,
+	       "Set in insn %d is invariant (%d), cost %d, depends on ",
+	       INSN_UID (insn), inv->invno, inv->cost);
+      dump_bitmap (dump_file, inv->depends_on);
+    }
+}
+
+/* Record USE at DEF.  */
+
+static void
+record_use (struct def *def, rtx *use, rtx insn)
+{
+  struct use *u = xmalloc (sizeof (struct use));
+
+  if (GET_CODE (*use) == SUBREG)
+    use = &SUBREG_REG (*use);
+  if (!REG_P (*use))
+    abort ();
+
+  u->pos = use;
+  u->insn = insn;
+  u->next = def->uses;
+  def->uses = u;
+  def->n_uses++;
+}
+
+/* Finds the invariants INSN depends on and store them to the DEPENDS_ON
+   bitmap.  DF is the dataflow object.  */
+
+static bool
+check_dependencies (rtx insn, struct df *df, bitmap depends_on)
+{
+  struct df_link *uses, *defs;
+  struct ref *use, *def;
+  basic_block bb = BLOCK_FOR_INSN (insn), def_bb;
+  struct def *def_data;
+  
+  for (uses = DF_INSN_USES (df, insn); uses; uses = uses->next)
+    {
+      use = uses->ref;
+
+      defs = DF_REF_CHAIN (use);
+      if (!defs)
+	continue;
+
+      if (defs->next)
+	return false;
+
+      def = defs->ref;
+      def_data = DF_REF_DATA (def);
+      if (!def_data)
+	return false;
+
+      def_bb = DF_REF_BB (def);
+      if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb))
+	return false;
+
+      bitmap_set_bit (depends_on, def_data->invno);
+    }
+
+  return true;
+}
+
+/* Finds invariant in INSN.  ALWAYS_REACHED is true if the insn is always
+   executed.  ALWAYS_EXECUTED is true if the insn is always executed,
+   unless the program ends due to a function call.  DF is the dataflow
+   object.  */
+
+static void
+find_invariant_insn (rtx insn, bool always_reached, bool always_executed,
+		     struct df *df)
+{
+  struct ref *ref;
+  struct def *def;
+  bitmap depends_on;
+  rtx set, dest;
+  bool simple = true;
+
+  /* Until we get rid of LIBCALLS.  */
+  if (find_reg_note (insn, REG_RETVAL, NULL_RTX)
+      || find_reg_note (insn, REG_LIBCALL, NULL_RTX)
+      || find_reg_note (insn, REG_NO_CONFLICT, NULL_RTX))
+    return;
+      
+  set = single_set (insn);
+  if (!set)
+    return;
+  dest = SET_DEST (set);
+
+  if (GET_CODE (dest) != REG
+      || HARD_REGISTER_P (dest))
+    simple = false;
+
+  if (!check_maybe_invariant (SET_SRC (set))
+      || !may_assign_reg_p (SET_DEST (set)))
+    return;
+
+  if (may_trap_p (PATTERN (insn)))
+    {
+      if (!always_reached)
+	return;
+
+      /* Unless the exceptions are handled, the behavior is undefined
+ 	 if the trap occurs.  */
+      if (flag_non_call_exceptions)
+	return;
+    }
+
+  depends_on = BITMAP_XMALLOC ();
+  if (!check_dependencies (insn, df, depends_on))
+    {
+      BITMAP_XFREE (depends_on);
+      return;
+    }
+
+  if (simple)
+    {
+      ref = df_find_def (df, insn, dest);
+      def = xcalloc (1, sizeof (struct def));
+      DF_REF_DATA (ref) = def;
+    }
+  else
+    def = NULL;
+
+  create_new_invariant (def, insn, depends_on, always_executed);
+}
+
+/* Record registers used in INSN that have an unique invariant definition.
+   DF is the dataflow object.  */
+
+static void
+record_uses (rtx insn, struct df *df)
+{
+  struct df_link *uses, *defs;
+  struct ref *use, *def;
+  basic_block bb = BLOCK_FOR_INSN (insn), def_bb;
+  
+  for (uses = DF_INSN_USES (df, insn); uses; uses = uses->next)
+    {
+      use = uses->ref;
+
+      defs = DF_REF_CHAIN (use);
+      if (!defs || defs->next)
+	continue;
+      def = defs->ref;
+      if (!DF_REF_DATA (def))
+	continue;
+
+      def_bb = DF_REF_BB (def);
+      if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb))
+	continue;
+
+      record_use (DF_REF_DATA (def), DF_REF_LOC (use), DF_REF_INSN (use));
+    }
+}
+
+/* Finds invariants in INSN.  ALWAYS_REACHED is true if the insn is always
+   executed.  ALWAYS_EXECUTED is true if the insn is always executed,
+   unless the program ends due to a function call.  DF is the dataflow
+   object.  */
+
+static void
+find_invariants_insn (rtx insn, bool always_reached, bool always_executed,
+		      struct df *df)
+{
+  find_invariant_insn (insn, always_reached, always_executed, df);
+  record_uses (insn, df);
+}
+
+/* Finds invariants in basic block BB.  ALWAYS_REACHED is true if the
+   basic block is always executed.  ALWAYS_EXECUTED is true if the basic
+   block is always executed, unless the program ends due to a function
+   call.  DF is the dataflow object.  */
+
+static void
+find_invariants_bb (basic_block bb, bool always_reached, bool always_executed,
+		    struct df *df)
+{
+  rtx insn;
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (!INSN_P (insn))
+	continue;
+
+      find_invariants_insn (insn, always_reached, always_executed, df);
+
+      if (always_reached
+	  && GET_CODE (insn) == CALL_INSN
+	  && !CONST_OR_PURE_CALL_P (insn))
+	always_reached = false;
+    }
+}
+
+/* Finds invariants in LOOP with body BODY.  ALWAYS_REACHED is the bitmap of
+   basic blocks in BODY that are always executed.  ALWAYS_EXECUTED is the
+   bitmap of basic blocks in BODY that are always executed unless the program
+   ends due to a function call.  DF is the dataflow object.  */
+
+static void
+find_invariants_body (struct loop *loop, basic_block *body,
+		      bitmap always_reached, bitmap always_executed,
+		      struct df *df)
+{
+  unsigned i;
+
+  for (i = 0; i < loop->num_nodes; i++)
+    find_invariants_bb (body[i],
+			bitmap_bit_p (always_reached, i),
+			bitmap_bit_p (always_executed, i),
+			df);
+}
+
+/* Finds invariants in LOOP.  DF is the dataflow object.  */
+
+static void
+find_invariants (struct loop *loop, struct df *df)
+{
+  bitmap may_exit = BITMAP_XMALLOC ();
+  bitmap always_reached = BITMAP_XMALLOC ();
+  bitmap has_exit = BITMAP_XMALLOC ();
+  bitmap always_executed = BITMAP_XMALLOC ();
+  basic_block *body = get_loop_body_in_dom_order (loop);
+
+  find_exits (loop, body, may_exit, has_exit);
+  compute_always_reached (loop, body, may_exit, always_reached);
+  compute_always_reached (loop, body, has_exit, always_executed);
+
+  find_defs (loop, body, df);
+  find_invariants_body (loop, body, always_reached, always_executed, df);
+
+  BITMAP_XFREE (always_reached);
+  BITMAP_XFREE (always_executed);
+  BITMAP_XFREE (may_exit);
+  BITMAP_XFREE (has_exit);
+  free (body);
+}
+
+/* Frees a list of uses USE.  */
+
+static void
+free_use_list (struct use *use)
+{
+  struct use *next;
+
+  for (; use; use = next)
+    {
+      next = use->next;
+      free (use);
+    }
+}
+
+/* Calculates cost and number of registers needed for moving invariant INV
+   out of the loop and stores them to *COST and *REGS_NEEDED.  */
+
+static void
+get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed)
+{
+  int acomp_cost;
+  unsigned aregs_needed;
+  unsigned depno;
+  struct invariant *dep;
+
+  *comp_cost = 0;
+  *regs_needed = 0;
+  if (inv->move
+      || inv->stamp == actual_stamp)
+    return;
+  inv->stamp = actual_stamp;
+
+  (*regs_needed)++;
+  (*comp_cost) += inv->cost;
+
+  EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno,
+    {
+      dep = VARRAY_GENERIC_PTR_NOGC (invariants, depno);
+
+      get_inv_cost (dep, &acomp_cost, &aregs_needed);
+
+      if (aregs_needed
+	  /* We need to check always_executed, since if the original value of
+	     the invariant may be preserved, we may need to keep it in a
+	     separate register.  TODO check whether the register has an
+	     use outside of the loop.  */
+	  && dep->always_executed
+	  && !dep->def->uses->next)
+	{
+	  /* If this is a single use, after moving the dependency we will not
+	     need a new register.  */
+	  aregs_needed--;
+	}
+
+      (*regs_needed) += aregs_needed;
+      (*comp_cost) += acomp_cost;
+    });
+}
+
+/* Calculates gain for eliminating invariant INV.  REGS_USED is the number
+   of registers used in the loop, N_INV_USES is the number of uses of
+   invariants, NEW_REGS is the number of new variables already added due to
+   the invariant motion.  The number of registers needed for it is stored in
+   *REGS_NEEDED.  */
+
+static int
+gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
+		    unsigned new_regs, unsigned regs_used, unsigned n_inv_uses)
+{
+  int comp_cost, size_cost;
+
+  get_inv_cost (inv, &comp_cost, regs_needed);
+  actual_stamp++;
+
+  size_cost = (global_cost_for_size (new_regs + *regs_needed,
+				     regs_used, n_inv_uses)
+	       - global_cost_for_size (new_regs, regs_used, n_inv_uses));
+
+  return comp_cost - size_cost;
+}
+
+/* Finds invariant with best gain for moving.  Returns the gain, stores
+   the invariant in *BEST and number of registers needed for it to
+   *REGS_NEEDED.  REGS_USED is the number of registers used in
+   the loop, N_INV_USES is the number of uses of invariants.  NEW_REGS
+   is the number of new variables already added due to invariant motion.  */
+
+static int
+best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
+			 unsigned new_regs, unsigned regs_used,
+			 unsigned n_inv_uses)
+{
+  struct invariant *inv;
+  int gain = 0, again;
+  unsigned aregs_needed, invno;
+
+  for (invno = 0; invno < VARRAY_ACTIVE_SIZE (invariants); invno++)
+    {
+      inv = VARRAY_GENERIC_PTR_NOGC (invariants, invno);
+      if (inv->move)
+	continue;
+
+      again = gain_for_invariant (inv, &aregs_needed,
+				  new_regs, regs_used, n_inv_uses);
+      if (again > gain)
+	{
+	  gain = again;
+	  *best = inv;
+	  *regs_needed = aregs_needed;
+	}
+    }
+
+  return gain;
+}
+
+/* Marks invariant INVNO and all its dependencies for moving.  */
+
+static void
+set_move_mark (unsigned invno)
+{
+  struct invariant *inv = VARRAY_GENERIC_PTR_NOGC (invariants, invno);
+
+  if (inv->move)
+    return;
+  inv->move = true;
+
+  if (dump_file)
+    fprintf (dump_file, "Decided to move invariant %d\n", invno);
+
+  EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, invno, set_move_mark (invno));
+}
+
+/* Determines which invariants to move.  DF is the dataflow object.  */
+
+static void
+find_invariants_to_move (struct df *df)
+{
+  unsigned i, regs_used, n_inv_uses, regs_needed = 0, new_regs;
+  struct invariant *inv = NULL;
+
+  if (flag_move_all_movables)
+    {
+      /* This is easy & stupid.  */
+      for (i = 0; i < VARRAY_ACTIVE_SIZE (invariants); i++)
+	{
+	  inv = VARRAY_GENERIC_PTR_NOGC (invariants, i);
+	  inv->move = true;
+	}
+      return;
+    }
+
+  if (!VARRAY_ACTIVE_SIZE (invariants))
+    return;
+
+  /* Now something slightly more involved.  First estimate the number of used
+     registers.  */
+  n_inv_uses = 0;
+
+  /* We do not really do a good job in this estimation; put some initial bound
+     here to stand for induction variables etc. that we do not detect.  */
+  regs_used = 2;
+
+  for (i = 0; i < df->n_regs; i++)
+    {
+      if (!DF_REGNO_FIRST_DEF (df, i) && DF_REGNO_LAST_USE (df, i))
+	{
+	  /* This is a value that is used but not changed inside loop.  */
+	  regs_used++;
+	}
+    }
+
+  for (i = 0; i < VARRAY_ACTIVE_SIZE (invariants); i++)
+    {
+      inv = VARRAY_GENERIC_PTR_NOGC (invariants, i);
+      if (inv->def)
+	n_inv_uses += inv->def->n_uses;
+    }
+
+  new_regs = 0;
+  while (best_gain_for_invariant (&inv, &regs_needed,
+				  new_regs, regs_used, n_inv_uses) > 0)
+    {
+      set_move_mark (inv->invno);
+      new_regs += regs_needed;
+    }
+}
+
+/* Move invariant INVNO out of the LOOP.  DF is the dataflow object.  */
+
+static void
+move_invariant_reg (struct loop *loop, unsigned invno, struct df *df)
+{
+  struct invariant *inv = VARRAY_GENERIC_PTR_NOGC (invariants, invno);
+  unsigned i;
+  basic_block preheader = loop_preheader_edge (loop)->src;
+  rtx reg, set;
+  struct use *use;
+
+  if (inv->processed)
+    return;
+  inv->processed = true;
+
+  if (inv->depends_on)
+    {
+      EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, i,
+	{
+	  move_invariant_reg (loop, i, df);
+	});
+    }
+
+  /* Move the set out of the loop.  If the set is always executed (we could
+     omit this condition if we know that the register is unused outside of the
+     loop, but it does not seem worth finding out) and it has no uses that
+     would not be dominated by it, we may just move it (TODO).  Otherwise we
+     need to create a temporary register.  */
+  set = single_set (inv->insn);
+  reg = gen_reg_rtx (GET_MODE (SET_DEST (set)));
+  df_pattern_emit_after (df, gen_move_insn (SET_DEST (set), reg),
+			 BLOCK_FOR_INSN (inv->insn), inv->insn);
+  SET_DEST (set) = reg;
+  reorder_insns (inv->insn, inv->insn, BB_END (preheader));
+  df_insn_modify (df, preheader, inv->insn);
+
+  /* Replace the uses we know to be dominated.  It saves work for copy
+     propagation, and also it is necessary so that dependent invariants
+     are computed right.  */
+  if (inv->def)
+    {
+      for (use = inv->def->uses; use; use = use->next)
+	{
+	  *use->pos = reg;
+	  df_insn_modify (df, BLOCK_FOR_INSN (use->insn), use->insn);
+	}
+    }
+}
+
+/* Move selected invariant out of the LOOP.  Newly created regs are marked
+   in TEMPORARY_REGS.  DF is the dataflow object.  */
+
+static void
+move_invariants (struct loop *loop, struct df *df)
+{
+  struct invariant *inv;
+  unsigned i;
+
+  for (i = 0; i < VARRAY_ACTIVE_SIZE (invariants); i++)
+    {
+      inv = VARRAY_GENERIC_PTR_NOGC (invariants, i);
+      if (inv->move)
+	move_invariant_reg (loop, i, df);
+    }
+}
+
+/* Initializes invariant motion data.  */
+
+static void
+init_inv_motion_data (void)
+{
+  actual_stamp = 1;
+
+  if (!invariants)
+    VARRAY_GENERIC_PTR_NOGC_INIT (invariants, 100, "invariants");
+}
+
+/* Frees the data allocated by invariant motion.  DF is the dataflow
+   object.  */
+
+static void
+free_inv_motion_data (struct df *df)
+{
+  unsigned i;
+  struct def *def;
+  struct invariant *inv;
+
+  for (i = 0; i < df->n_defs; i++)
+    {
+      if (!df->defs[i])
+	continue;
+
+      def = DF_REF_DATA (df->defs[i]);
+      if (!def)
+	continue;
+
+      free_use_list (def->uses);
+      free (def);
+      DF_REF_DATA (df->defs[i]) = NULL;
+    }
+
+  for (i = 0; i < VARRAY_ACTIVE_SIZE (invariants); i++)
+    {
+      inv = VARRAY_GENERIC_PTR_NOGC (invariants, i);
+      BITMAP_XFREE (inv->depends_on);
+      free (inv);
+    }
+  VARRAY_POP_ALL (invariants);
+}
+
+/* Move the invariants out of the LOOP.  DF is the dataflow object.  */
+
+static void
+move_single_loop_invariants (struct loop *loop, struct df *df)
+{
+  init_inv_motion_data ();
+
+  find_invariants (loop, df);
+  find_invariants_to_move (df);
+  move_invariants (loop, df);
+
+  free_inv_motion_data (df);
+}
+
+/* Releases the auxiliary data for LOOP.  */
+
+static void
+free_loop_data (struct loop *loop)
+{
+  struct loop_data *data = LOOP_DATA (loop);
+
+  free (data);
+  loop->aux = NULL;
+}
+
+/* Move the invariants out of the LOOPS.  */
+
+void
+move_loop_invariants (struct loops *loops)
+{
+  struct loop *loop;
+  unsigned i;
+  struct df *df = df_init ();
+
+  /* Process the loops, innermost first.  */
+  loop = loops->tree_root;
+  while (loop->inner)
+    loop = loop->inner;
+
+  while (loop != loops->tree_root)
+    {
+      move_single_loop_invariants (loop, df);
+
+      if (loop->next)
+	{
+	  loop = loop->next;
+	  while (loop->inner)
+	    loop = loop->inner;
+	}
+      else
+	loop = loop->outer;
+    }
+
+  for (i = 1; i < loops->num; i++)
+    if (loops->parray[i])
+      free_loop_data (loops->parray[i]);
+
+  df_finish (df);
+}
--- a/gcc/passes.c
+++ b/gcc/passes.c
@ -1290,7 +1290,8 @@ rest_of_handle_loop2 (void)
  struct loops *loops;
  basic_block bb;

-  if (!flag_unswitch_loops
+  if (!flag_move_loop_invariants
+      && !flag_unswitch_loops
      && !flag_peel_loops
      && !flag_unroll_loops
      && !flag_branch_on_count_reg)
@ -1309,6 +1310,9 @@ rest_of_handle_loop2 (void)
  if (loops)
    {
      /* The optimizations:  */
+      if (flag_move_loop_invariants)
+	move_loop_invariants (loops);
+
      if (flag_unswitch_loops)
 	unswitch_loops (loops);

@ -1598,7 +1602,8 @@ rest_of_compilation (void)
  if (flag_tracer)
    rest_of_handle_tracer ();

-  if (optimize > 0)
+  if (optimize > 0
+      && flag_loop_optimize2)
    rest_of_handle_loop2 ();

  if (flag_web)
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@ -1673,6 +1673,19 @@ process_options (void)
  if (flag_unroll_loops || flag_peel_loops)
    flag_rerun_cse_after_loop = 1;

+  /* If explicitly asked to run new loop optimizer, switch off the old
+     one.  */
+  if (flag_loop_optimize2)
+    flag_loop_optimize = 0;
+
+  /* Enable new loop optimizer pass if any of its optimizations is called.  */
+  if (flag_move_loop_invariants
+      || flag_unswitch_loops
+      || flag_peel_loops
+      || flag_unroll_loops
+      || flag_branch_on_count_reg)
+    flag_loop_optimize2 = 1;
+
  if (flag_non_call_exceptions)
    flag_asynchronous_unwind_tables = 1;
  if (flag_asynchronous_unwind_tables)