loop-unswitch.c (unswitch_single_loop): Use optimize_loop_for_speed_p.

* loop-unswitch.c (unswitch_single_loop): Use optimize_loop_for_speed_p. * tree-ssa-threadupdate.c (mark_threaded_blocks): Use optimize_function_for_size_p. * tracer.c (ignore_bb_p): Use optimize_bb_for_size_p. * postreload-gcse.c (eliminate_partially_redundant_load): Use optimize_bb_for_size_p. * value-prof.c (gimple_divmod_fixed_value_transform, gimple_mod_pow2_value_transform, gimple_mod_subtract_transform, gimple_stringops_transform): Use optimize_bb_for_size_p. * ipa-cp.c (ipcp_insert_stage): Use optimize_function_for_size_p. * final.c (compute_alignments): Use optimize_function_for_size_p. * builtins.c (fold_builtin_cabs): Use optimize_function_for_speed_p. (fold_builtin_strcpy, fold_builtin_fputs): Use optimize_function_for_size_p. * fold-const.c (tree_swap_operands_p): Use optimize_function_for_size_p. * recog.c (relax_delay_slots): Likewise. * tree-ssa-math-opts.c (replace_reciprocal): Use optimize_bb_for_speed_p. (execute_cse_reciprocals): Use optimize_bb_for_size_p. * ipa-inline.c (cgraph_decide_recursive_inlining): Use optimize_function_for_size_p. (cgraph_decide_inlining_of_small_function): Use optimize_function_for_size_p. * global.c (find_reg): Use optimize_function_for_size_p. * opts.c (decode_options): Do not clear flag_tree_ch, flag_inline_functions, flag_unswitch_loops, flag_unroll_loops, flag_unroll_all_loops and flag_prefetch_loop_arrays. Those can work it out from profile. * tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely): Use optimize_loop_for_speed_p. * predict.c (optimize_bb_for_size_p, optimize_bb_for_speed_p): Constify argument. (optimize_loop_nest_for_size_p, optimize_loop_nest_for_speed_p): New. * tree-parloops.c (parallelize_loops): Use optimize_loop_for_size_p. * tree-eh.c (decide_copy_try_finally): Use optimize_function_for_size_p. * local-alloc.c (block_alloc): Pass BB pointer. (find_free_reg): Add BB pointer, use optimize_bb_for_size_p. * gcse.c (gcse_main): Use optimize_function_for_size_p. * loop-unroll.c (decide_unrolling_and_peeling): Use optimize_loop_for_size_p. (decide_peel_completely): Likewise. * tree-vect-analyze.c (vect_mark_for_runtime_alias_test): Use optimize_loop_for_size_p. (vect_enhance_data_refs_alignment): Likewise. * tree-ssa-coalesce.c (coalesce_cost): Add optimize_for_size argument. (coalesce_cost_bb, coalesce_cost_edge, create_outofssa_var_map): Update call. * cfgcleanup.c (outgoing_edges_match): Use optimize_bb_for_speed_p. (try_crossjump_bb): Use optimize_bb_for_size_p. * tree-ssa-loop-prefetch.c (loop_prefetch_arrays): Use optimize_loop_for_speed_p. * bb-reorder.c (find_traces_1_round): Likewise. (copy_bb): Use optimize_bb_for_speed_p. (duplicate_computed_gotos): Likewise. * basic-block.h (optimize_loop_nest_for_size_p, optimize_loop_nest_for_speed_p): New. * stmt.c (expand_case): Use optimize_insn_for_size_p. From-SVN: r139760
2008-08-29 12:35:57 +02:00 · 2008-08-29 12:35:57 +02:00 · efd8f7507b
parent e3536b82d7
commit efd8f7507b
29 changed files with 167 additions and 94 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,57 @@
+2008-08-29  Jan Hubicka  <jh@suse.cz>
+
+	* loop-unswitch.c (unswitch_single_loop): Use optimize_loop_for_speed_p.
+	* tree-ssa-threadupdate.c (mark_threaded_blocks): Use optimize_function_for_size_p.
+	* tracer.c (ignore_bb_p): Use optimize_bb_for_size_p.
+	* postreload-gcse.c (eliminate_partially_redundant_load): Use optimize_bb_for_size_p.
+	* value-prof.c (gimple_divmod_fixed_value_transform,
+	gimple_mod_pow2_value_transform, gimple_mod_subtract_transform,
+	gimple_stringops_transform): Use optimize_bb_for_size_p.
+	* ipa-cp.c (ipcp_insert_stage): Use optimize_function_for_size_p.
+	* final.c (compute_alignments): Use optimize_function_for_size_p.
+	* builtins.c (fold_builtin_cabs): Use optimize_function_for_speed_p.
+	(fold_builtin_strcpy, fold_builtin_fputs): Use
+	optimize_function_for_size_p.
+	* fold-const.c (tree_swap_operands_p): Use optimize_function_for_size_p.
+	* recog.c (relax_delay_slots): Likewise.
+	* tree-ssa-math-opts.c (replace_reciprocal): Use optimize_bb_for_speed_p.
+	(execute_cse_reciprocals): Use optimize_bb_for_size_p.
+	* ipa-inline.c (cgraph_decide_recursive_inlining): Use
+	optimize_function_for_size_p.
+	(cgraph_decide_inlining_of_small_function): Use
+	optimize_function_for_size_p.
+	* global.c (find_reg): Use optimize_function_for_size_p.
+	* opts.c (decode_options): Do not clear flag_tree_ch, flag_inline_functions,
+	flag_unswitch_loops, flag_unroll_loops, flag_unroll_all_loops and
+	flag_prefetch_loop_arrays. Those can work it out from profile.
+	* tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely): Use
+	optimize_loop_for_speed_p.
+	* predict.c (optimize_bb_for_size_p, optimize_bb_for_speed_p): Constify
+	argument.
+	(optimize_loop_nest_for_size_p, optimize_loop_nest_for_speed_p): New.
+	* tree-parloops.c (parallelize_loops): Use optimize_loop_for_size_p.
+	* tree-eh.c (decide_copy_try_finally): Use optimize_function_for_size_p.
+	* local-alloc.c (block_alloc): Pass BB pointer.
+	(find_free_reg): Add BB pointer, use optimize_bb_for_size_p.
+	* gcse.c (gcse_main): Use optimize_function_for_size_p.
+	* loop-unroll.c (decide_unrolling_and_peeling): Use optimize_loop_for_size_p.
+	(decide_peel_completely): Likewise.
+	* tree-vect-analyze.c (vect_mark_for_runtime_alias_test): Use
+	optimize_loop_for_size_p.
+	(vect_enhance_data_refs_alignment): Likewise.
+	* tree-ssa-coalesce.c (coalesce_cost): Add optimize_for_size argument.
+	(coalesce_cost_bb, coalesce_cost_edge, create_outofssa_var_map): Update call.
+	* cfgcleanup.c (outgoing_edges_match): Use optimize_bb_for_speed_p.
+	(try_crossjump_bb): Use optimize_bb_for_size_p.
+	* tree-ssa-loop-prefetch.c (loop_prefetch_arrays): Use
+	optimize_loop_for_speed_p.
+	* bb-reorder.c (find_traces_1_round): Likewise.
+	(copy_bb): Use optimize_bb_for_speed_p.
+	(duplicate_computed_gotos): Likewise.
+	* basic-block.h (optimize_loop_nest_for_size_p,
+	optimize_loop_nest_for_speed_p): New.
+	* stmt.c (expand_case): Use optimize_insn_for_size_p.
+
 2008-08-29  Tristan Gingold  <gingold@adacore.com>

 	* gcov.c (main): Call expandargv.
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@ -841,6 +841,8 @@ extern bool optimize_function_for_size_p (struct function *);
 extern bool optimize_function_for_speed_p (struct function *);
 extern bool optimize_loop_for_size_p (struct loop *);
 extern bool optimize_loop_for_speed_p (struct loop *);
+extern bool optimize_loop_nest_for_size_p (struct loop *);
+extern bool optimize_loop_nest_for_speed_p (struct loop *);
 extern bool gimple_predicted_by_p (const_basic_block, enum br_predictor);
 extern bool rtl_predicted_by_p (const_basic_block, enum br_predictor);
 extern void gimple_predict_edge (edge, enum br_predictor, int);
--- a/gcc/bb-reorder.c
+++ b/gcc/bb-reorder.c
@ -648,7 +648,8 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 			  /* The loop has less than 4 iterations.  */

 			  if (single_succ_p (bb)
-			      && copy_bb_p (best_edge->dest, !optimize_size))
+			      && copy_bb_p (best_edge->dest,
+			      		    optimize_edge_for_speed_p (best_edge)))
 			    {
 			      bb = copy_bb (best_edge->dest, best_edge, bb,
 					    *n_traces);
@ -1102,7 +1103,7 @@ connect_traces (int n_traces, struct trace *traces)
 		 edge is traversed frequently enough.  */
 	      if (try_copy
 		  && copy_bb_p (best->dest,
-				!optimize_size
+				optimize_edge_for_speed_p (best)
 				&& EDGE_FREQUENCY (best) >= freq_threshold
 				&& best->count >= count_threshold))
 		{
@ -1173,7 +1174,7 @@ copy_bb_p (const_basic_block bb, int code_may_grow)
  if (EDGE_COUNT (bb->succs) > 8)
    return false;

-  if (code_may_grow && maybe_hot_bb_p (bb))
+  if (code_may_grow && optimize_bb_for_speed_p (bb))
    max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);

  FOR_BB_INSNS (bb, insn)
@ -1984,7 +1985,7 @@ gate_duplicate_computed_gotos (void)
 {
  if (targetm.cannot_modify_jumps_p ())
    return false;
-  return (optimize > 0 && flag_expensive_optimizations && !optimize_size);
+  return (optimize > 0 && flag_expensive_optimizations);
 }


@ -2075,6 +2076,9 @@ duplicate_computed_gotos (void)
 	  || single_pred_p (single_succ (bb)))
 	continue;

+      if (!optimize_bb_for_size_p (bb))
+	continue;
+
      /* The successor block has to be a duplication candidate.  */
      if (!bitmap_bit_p (candidates, single_succ (bb)->index))
 	continue;
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@ -7530,7 +7530,7 @@ fold_builtin_cabs (tree arg, tree type, tree fndecl)

  /* Don't do this when optimizing for size.  */
  if (flag_unsafe_math_optimizations
-      && optimize && !optimize_size)
+      && optimize && optimize_function_for_speed_p (cfun))
    {
      tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);

@ -8882,7 +8882,7 @@ fold_builtin_strcpy (tree fndecl, tree dest, tree src, tree len)
  if (operand_equal_p (src, dest, 0))
    return fold_convert (TREE_TYPE (TREE_TYPE (fndecl)), dest);

-  if (optimize_size)
+  if (optimize_function_for_size_p (cfun))
    return NULL_TREE;

  fn = implicit_built_in_decls[BUILT_IN_MEMCPY];
@ -11501,7 +11501,7 @@ fold_builtin_fputs (tree arg0, tree arg1, bool ignore, bool unlocked, tree len)
    case 1: /* length is greater than 1, call fwrite.  */
      {
 	/* If optimizing for size keep fputs.  */
-	if (optimize_size)
+	if (optimize_function_for_size_p (cfun))
 	  return NULL_TREE;
 	/* New argument list transforming fputs(string, stream) to
 	   fwrite(string, 1, len, stream).  */
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@ -1235,9 +1235,8 @@ outgoing_edges_match (int mode, basic_block bb1, basic_block bb2)
 	 we require the existing branches to have probabilities that are
 	 roughly similar.  */
      if (match
-	  && !optimize_size
-	  && maybe_hot_bb_p (bb1)
-	  && maybe_hot_bb_p (bb2))
+	  && optimize_bb_for_speed_p (bb1)
+	  && optimize_bb_for_speed_p (bb2))
 	{
 	  int prob2;

@ -1684,7 +1683,7 @@ try_crossjump_bb (int mode, basic_block bb)

  /* Don't crossjump if this block ends in a computed jump,
     unless we are optimizing for size.  */
-  if (!optimize_size
+  if (optimize_bb_for_size_p (bb)
      && bb != EXIT_BLOCK_PTR
      && computed_jump_p (BB_END (bb)))
    return false;
--- a/gcc/final.c
+++ b/gcc/final.c
@ -683,7 +683,7 @@ compute_alignments (void)
  label_align = XCNEWVEC (struct label_alignment, max_labelno - min_labelno + 1);

  /* If not optimizing or optimizing for size, don't assign any alignments.  */
-  if (! optimize || optimize_size)
+  if (! optimize || optimize_function_for_size_p (cfun))
    return 0;

  if (dump_file)
@ -765,7 +765,7 @@ compute_alignments (void)
      /* In case block is frequent and reached mostly by non-fallthru edge,
 	 align it.  It is most likely a first block of loop.  */
      if (has_fallthru
-	  && maybe_hot_bb_p (bb)
+	  && optimize_bb_for_speed_p (bb)
 	  && branch_frequency + fallthru_frequency > freq_threshold
 	  && (branch_frequency
 	      > fallthru_frequency * PARAM_VALUE (PARAM_ALIGN_LOOP_ITERATIONS)))
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@ -6679,7 +6679,7 @@ tree_swap_operands_p (const_tree arg0, const_tree arg1, bool reorder)
  if (TREE_CONSTANT (arg0))
    return 1;

-  if (optimize_size)
+  if (cfun && optimize_function_for_size_p (cfun))
    return 0;

  if (reorder && flag_evaluation_order
@ -10407,7 +10407,7 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1)
 		}

 	      /* Optimize x*x as pow(x,2.0), which is expanded as x*x.  */
-	      if (! optimize_size
+	      if (optimize_function_for_speed_p (cfun)
 		  && operand_equal_p (arg0, arg1, 0))
 		{
 		  tree powfn = mathfn_built_in (type, BUILT_IN_POW);
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@ -738,9 +738,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
 	  timevar_pop (TV_CPROP1);
 	}

-      if (optimize_size)
-	/* Do nothing.  */ ;
-      else
+      if (optimize_function_for_speed_p (cfun))
 	{
 	  timevar_push (TV_PRE);
 	  changed |= one_pre_gcse_pass (pass + 1);
@ -773,7 +771,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
 	 for code size -- it rarely makes programs faster, and can make
 	 them bigger if we did partial redundancy elimination (when optimizing
 	 for space, we don't run the partial redundancy algorithms).  */
-      if (optimize_size)
+      if (optimize_function_for_size_p (cfun))
 	{
 	  timevar_push (TV_HOIST);
 	  max_gcse_regno = max_reg_num ();
@ -825,7 +823,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
  /* We are finished with alias.  */
  end_alias_analysis ();

-  if (!optimize_size && flag_gcse_sm)
+  if (optimize_function_for_speed_p (cfun) && flag_gcse_sm)
    {
      timevar_push (TV_LSM);
      store_motion ();
--- a/gcc/global.c
+++ b/gcc/global.c
@ -1168,8 +1168,8 @@ find_reg (int num, HARD_REG_SET losers, int alt_regs_p, int accept_call_clobbere
      if (! accept_call_clobbered
 	  && allocno[num].calls_crossed != 0
 	  && allocno[num].throwing_calls_crossed == 0
-	  && CALLER_SAVE_PROFITABLE (optimize_size ? allocno[num].n_refs : allocno[num].freq,
-				     optimize_size ? allocno[num].calls_crossed
+	  && CALLER_SAVE_PROFITABLE (optimize_function_for_size_p (cfun) ? allocno[num].n_refs : allocno[num].freq,
+				     optimize_function_for_size_p (cfun) ? allocno[num].calls_crossed
 				     : allocno[num].freq_calls_crossed))
 	{
 	  HARD_REG_SET new_losers;
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@ -1019,9 +1019,7 @@ ipcp_insert_stage (void)
      if (new_insns + growth > max_new_insns)
 	break;
      if (growth
-          && (optimize_size
-	      || (DECL_STRUCT_FUNCTION (node->decl)
-	          ->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)))
+	  && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl)))
 	{
 	  if (dump_file)
 	    fprintf (dump_file, "Not versioning, cold code would grow");
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@ -674,7 +674,7 @@ cgraph_decide_recursive_inlining (struct cgraph_node *node,
  int depth = 0;
  int n = 0;

-  if (optimize_size
+  if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl))
      || (!flag_inline_functions && !DECL_DECLARED_INLINE_P (node->decl)))
    return false;

@ -951,7 +951,7 @@ cgraph_decide_inlining_of_small_functions (void)
      if (!flag_inline_functions
 	  && !DECL_DECLARED_INLINE_P (edge->callee->decl))
 	not_good = N_("function not declared inline and code size would grow");
-      if (optimize_size)
+      if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION(edge->caller->decl)))
 	not_good = N_("optimizing for size and code size would grow");
      if (not_good && growth > 0 && cgraph_estimate_growth (edge->callee) > 0)
 	{
--- a/gcc/local-alloc.c
+++ b/gcc/local-alloc.c
@ -299,7 +299,7 @@ static int contains_replace_regs (rtx);
 static int memref_referenced_p (rtx, rtx);
 static int memref_used_between_p (rtx, rtx, rtx);
 static void no_equiv (rtx, const_rtx, void *);
-static void block_alloc (int);
+static void block_alloc (basic_block);
 static int qty_sugg_compare (int, int);
 static int qty_sugg_compare_1 (const void *, const void *);
 static int qty_compare (int, int);
@ -311,7 +311,7 @@ static void reg_is_set (rtx, const_rtx, void *);
 static void reg_is_born (rtx, int);
 static void wipe_dead_reg (rtx, int);
 static int find_free_reg (enum reg_class, enum machine_mode, int, int, int,
-			  int, int);
+			  int, int, basic_block);
 static void mark_life (int, enum machine_mode, int);
 static void post_mark_life (int, enum machine_mode, int, int, int);
 static int requires_inout (const char *);
@ -436,7 +436,7 @@ local_alloc (void)

      next_qty = 0;

-      block_alloc (b->index);
+      block_alloc (b);
    }

  free (qty);
@ -1270,7 +1270,7 @@ no_equiv (rtx reg, const_rtx store ATTRIBUTE_UNUSED, void *data ATTRIBUTE_UNUSED
   Only the pseudos that die but once can be handled.  */

 static void
-block_alloc (int b)
+block_alloc (basic_block b)
 {
  int i, q;
  rtx insn;
@ -1283,7 +1283,7 @@ block_alloc (int b)

  /* Count the instructions in the basic block.  */

-  insn = BB_END (BASIC_BLOCK (b));
+  insn = BB_END (b);
  while (1)
    {
      if (!NOTE_P (insn))
@ -1291,7 +1291,7 @@ block_alloc (int b)
 	  ++insn_count;
 	  gcc_assert (insn_count <= max_uid);
 	}
-      if (insn == BB_HEAD (BASIC_BLOCK (b)))
+      if (insn == BB_HEAD (b))
 	break;
      insn = PREV_INSN (insn);
    }
@ -1302,14 +1302,14 @@ block_alloc (int b)

  /* Initialize table of hardware registers currently live.  */

-  REG_SET_TO_HARD_REG_SET (regs_live, DF_LR_IN (BASIC_BLOCK (b)));
+  REG_SET_TO_HARD_REG_SET (regs_live, DF_LR_IN (b));

  /* This is conservative, as this would include registers that are
     artificial-def'ed-but-not-used.  However, artificial-defs are
     rare, and such uninitialized use is rarer still, and the chance
     of this having any performance impact is even less, while the
     benefit is not having to compute and keep the TOP set around.  */
-  for (def_rec = df_get_artificial_defs (b); *def_rec; def_rec++)
+  for (def_rec = df_get_artificial_defs (b->index); *def_rec; def_rec++)
    {
      int regno = DF_REF_REGNO (*def_rec);
      if (regno < FIRST_PSEUDO_REGISTER)
@ -1320,7 +1320,7 @@ block_alloc (int b)
     and assigns quantities to registers.
     It computes which registers to tie.  */

-  insn = BB_HEAD (BASIC_BLOCK (b));
+  insn = BB_HEAD (b);
  while (1)
    {
      if (!NOTE_P (insn))
@ -1487,7 +1487,7 @@ block_alloc (int b)
      IOR_HARD_REG_SET (regs_live_at[2 * insn_number], regs_live);
      IOR_HARD_REG_SET (regs_live_at[2 * insn_number + 1], regs_live);

-      if (insn == BB_END (BASIC_BLOCK (b)))
+      if (insn == BB_END (b))
 	break;

      insn = NEXT_INSN (insn);
@ -1542,7 +1542,7 @@ block_alloc (int b)
      q = qty_order[i];
      if (qty_phys_num_sugg[q] != 0 || qty_phys_num_copy_sugg[q] != 0)
 	qty[q].phys_reg = find_free_reg (qty[q].min_class, qty[q].mode, q,
-					 0, 1, qty[q].birth, qty[q].death);
+					 0, 1, qty[q].birth, qty[q].death, b);
      else
 	qty[q].phys_reg = -1;
    }
@ -1627,19 +1627,19 @@ block_alloc (int b)
 		 a scheduling pass after reload and we are not optimizing
 		 for code size.  */
 	      if (flag_schedule_insns_after_reload && dbg_cnt (local_alloc_for_sched)
-		  && !optimize_size
+		  && optimize_bb_for_speed_p (b)
 		  && !SMALL_REGISTER_CLASSES)
 		{
 		  qty[q].phys_reg = find_free_reg (qty[q].min_class,
 						   qty[q].mode, q, 0, 0,
-						   fake_birth, fake_death);
+						   fake_birth, fake_death, b);
 		  if (qty[q].phys_reg >= 0)
 		    continue;
 		}
 #endif
 	      qty[q].phys_reg = find_free_reg (qty[q].min_class,
 					       qty[q].mode, q, 0, 0,
-					       qty[q].birth, qty[q].death);
+					       qty[q].birth, qty[q].death, b);
 	      if (qty[q].phys_reg >= 0)
 		continue;
 	    }
@ -1647,17 +1647,17 @@ block_alloc (int b)
 #ifdef INSN_SCHEDULING
 	  /* Similarly, avoid false dependencies.  */
 	  if (flag_schedule_insns_after_reload && dbg_cnt (local_alloc_for_sched)
-	      && !optimize_size
+	      && optimize_bb_for_speed_p (b)
 	      && !SMALL_REGISTER_CLASSES
 	      && qty[q].alternate_class != NO_REGS)
 	    qty[q].phys_reg = find_free_reg (qty[q].alternate_class,
 					     qty[q].mode, q, 0, 0,
-					     fake_birth, fake_death);
+					     fake_birth, fake_death, b);
 #endif
 	  if (qty[q].alternate_class != NO_REGS)
 	    qty[q].phys_reg = find_free_reg (qty[q].alternate_class,
 					     qty[q].mode, q, 0, 0,
-					     qty[q].birth, qty[q].death);
+					     qty[q].birth, qty[q].death, b);
 	}
    }

@ -2145,7 +2145,7 @@ wipe_dead_reg (rtx reg, int output_p)
 static int
 find_free_reg (enum reg_class rclass, enum machine_mode mode, int qtyno,
 	       int accept_call_clobbered, int just_try_suggested,
-	       int born_index, int dead_index)
+	       int born_index, int dead_index, basic_block bb)
 {
  int i, ins;
  HARD_REG_SET first_used, used;
@ -2261,7 +2261,7 @@ find_free_reg (enum reg_class rclass, enum machine_mode mode, int qtyno,
      /* Don't try the copy-suggested regs again.  */
      qty_phys_num_copy_sugg[qtyno] = 0;
      return find_free_reg (rclass, mode, qtyno, accept_call_clobbered, 1,
-			    born_index, dead_index);
+			    born_index, dead_index, bb);
    }

  /* We need not check to see if the current function has nonlocal
@ -2274,11 +2274,12 @@ find_free_reg (enum reg_class rclass, enum machine_mode mode, int qtyno,
      && ! just_try_suggested
      && qty[qtyno].n_calls_crossed != 0
      && qty[qtyno].n_throwing_calls_crossed == 0
-      && CALLER_SAVE_PROFITABLE (optimize_size ? qty[qtyno].n_refs : qty[qtyno].freq,
-				 optimize_size ? qty[qtyno].n_calls_crossed
+      && CALLER_SAVE_PROFITABLE (optimize_bb_for_size_p (bb) ? qty[qtyno].n_refs
+      				 : qty[qtyno].freq,
+				 optimize_bb_for_size_p (bb) ? qty[qtyno].n_calls_crossed
 				 : qty[qtyno].freq_calls_crossed))
    {
-      i = find_free_reg (rclass, mode, qtyno, 1, 0, born_index, dead_index);
+      i = find_free_reg (rclass, mode, qtyno, 1, 0, born_index, dead_index, bb);
      if (i >= 0)
 	caller_save_needed = 1;
      return i;
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@ -269,7 +269,7 @@ decide_unrolling_and_peeling (int flags)
 	fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);

      /* Do not peel cold areas.  */
-      if (!maybe_hot_bb_p (loop->header))
+      if (optimize_loop_for_size_p (loop))
 	{
 	  if (dump_file)
 	    fprintf (dump_file, ";; Not considering loop, cold area\n");
@ -368,7 +368,7 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
    }

  /* Do not peel cold areas.  */
-  if (!maybe_hot_bb_p (loop->header))
+  if (optimize_loop_for_size_p (loop))
    {
      if (dump_file)
 	fprintf (dump_file, ";; Not considering loop, cold area\n");
--- a/gcc/loop-unswitch.c
+++ b/gcc/loop-unswitch.c
@ -290,7 +290,7 @@ unswitch_single_loop (struct loop *loop, rtx cond_checked, int num)
    }

  /* Do not unswitch in cold areas.  */
-  if (!maybe_hot_bb_p (loop->header))
+  if (optimize_loop_for_size_p (loop))
    {
      if (dump_file)
 	fprintf (dump_file, ";; Not unswitching, not hot area\n");
--- a/gcc/opts.c
+++ b/gcc/opts.c
@ -990,12 +990,6 @@ decode_options (unsigned int argc, const char **argv)

  if (optimize_size)
    {
-      /* Loop header copying usually increases size of the code.  This used not to
-	 be true, since quite often it is possible to verify that the condition is
-	 satisfied in the first iteration and therefore to eliminate it.  Jump
-	 threading handles these cases now.  */
-      flag_tree_ch = 0;
-
      /* Conditional DCE generates bigger code.  */
      flag_tree_builtin_call_dce = 0;

@ -1004,8 +998,6 @@ decode_options (unsigned int argc, const char **argv)

      /* These options are set with -O3, so reset for -Os */
      flag_predictive_commoning = 0;
-      flag_inline_functions = 0;
-      flag_unswitch_loops = 0;
      flag_gcse_after_reload = 0;
      flag_tree_vectorize = 0;

@ -1029,12 +1021,6 @@ decode_options (unsigned int argc, const char **argv)
      align_labels = 1;
      align_functions = 1;

-      /* Unroll/prefetch switches that may be set on the command line, and tend to
-	 generate bigger code.  */
-      flag_unroll_loops = 0;
-      flag_unroll_all_loops = 0;
-      flag_prefetch_loop_arrays = 0;
-
      /* Basic optimization options.  */
      optimize_size = 1;
      if (optimize > 2)
--- a/gcc/postreload-gcse.c
+++ b/gcc/postreload-gcse.c
@ -1066,7 +1066,7 @@ eliminate_partially_redundant_load (basic_block bb, rtx insn,
  if (/* No load can be replaced by copy.  */
      npred_ok == 0
      /* Prevent exploding the code.  */ 
-      || (optimize_size && npred_ok > 1)
+      || (optimize_bb_for_size_p (bb) && npred_ok > 1)
      /* If we don't have profile information we cannot tell if splitting 
         a critical edge is profitable or not so don't do it.  */
      || ((! profile_info || ! flag_branch_probabilities
--- a/gcc/predict.c
+++ b/gcc/predict.c
@ -261,6 +261,37 @@ optimize_loop_for_speed_p (struct loop *loop)
  return optimize_bb_for_speed_p (loop->header);
 }

+/* Return TRUE when LOOP nest should be optimized for speed.  */
+
+bool
+optimize_loop_nest_for_speed_p (struct loop *loop)
+{
+  struct loop *l = loop;
+  if (optimize_loop_for_speed_p (loop))
+    return true;
+  l = loop->inner;
+  while (l != loop)
+    {
+      if (optimize_loop_for_speed_p (l))
+        return true;
+      if (l->inner)
+        l = l->inner;
+      else if (l->next)
+        l = l->next;
+      else
+	l = loop_outer (l);
+    }
+  return false;
+}
+
+/* Return TRUE when LOOP nest should be optimized for size.  */
+
+bool
+optimize_loop_nest_for_size_p (struct loop *loop)
+{
+  return !optimize_loop_nest_for_speed_p (loop);
+}
+
 /* Set RTL expansion for BB profile.  */

 void
--- a/gcc/reorg.c
+++ b/gcc/reorg.c
@ -3439,7 +3439,7 @@ relax_delay_slots (rtx first)

 	 Only do so if optimizing for size since this results in slower, but
 	 smaller code.  */
-      if (optimize_size
+      if (optimize_function_for_size_p (cfun)
 	  && GET_CODE (PATTERN (delay_insn)) == RETURN
 	  && next
 	  && JUMP_P (next)
--- a/gcc/stmt.c
+++ b/gcc/stmt.c
@ -2419,7 +2419,7 @@ expand_case (tree exp)

      else if (count < case_values_threshold ()
 	       || compare_tree_int (range,
-				    (optimize_size ? 3 : 10) * count) > 0
+				    (optimize_insn_for_size_p () ? 3 : 10) * count) > 0
 	       /* RANGE may be signed, and really large ranges will show up
 		  as negative numbers.  */
 	       || compare_tree_int (range, 0) < 0
@ -2489,7 +2489,7 @@ expand_case (tree exp)

 	      /* Index jumptables from zero for suitable values of
                 minval to avoid a subtraction.  */
-	      if (! optimize_size
+	      if (optimize_insn_for_speed_p ()
 		  && compare_tree_int (minval, 0) > 0
 		  && compare_tree_int (minval, 3) < 0)
 		{
--- a/gcc/tracer.c
+++ b/gcc/tracer.c
@ -92,7 +92,7 @@ ignore_bb_p (const_basic_block bb)
 {
  if (bb->index < NUM_FIXED_BLOCKS)
    return true;
-  if (!maybe_hot_bb_p (bb))
+  if (optimize_bb_for_size_p (bb))
    return true;
  return false;
 }
--- a/gcc/tree-eh.c
+++ b/gcc/tree-eh.c
@ -1535,7 +1535,7 @@ decide_copy_try_finally (int ndests, gimple_seq finally)
  sw_estimate = 10 + 2 * ndests;

  /* Optimize for size clearly wants our best guess.  */
-  if (optimize_size)
+  if (optimize_function_for_size_p (cfun))
    return f_estimate < sw_estimate;

  /* ??? These numbers are completely made up so far.  */
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@ -1843,7 +1843,7 @@ parallelize_loops (void)
    {
      htab_empty (reduction_list);
      if (/* Do not bother with loops in cold areas.  */
-	  !maybe_hot_bb_p (loop->header)
+	  optimize_loop_nest_for_size_p (loop)
 	  /* Or loops that roll too little.  */
 	  || expected_loop_iterations (loop) <= n_threads
 	  /* And of course, the loop must be parallelizable.  */
--- a/gcc/tree-ssa-coalesce.c
+++ b/gcc/tree-ssa-coalesce.c
@ -75,7 +75,7 @@ typedef struct coalesce_list_d
   possibly on CRITICAL edge and in HOT basic block.  */

 static inline int
-coalesce_cost (int frequency, bool hot, bool critical)
+coalesce_cost (int frequency, bool optimize_for_size, bool critical)
 {
  /* Base costs on BB frequencies bounded by 1.  */
  int cost = frequency;
@ -83,12 +83,8 @@ coalesce_cost (int frequency, bool hot, bool critical)
  if (!cost)
    cost = 1;

-  if (optimize_size)
+  if (optimize_for_size)
    cost = 1;
-  else
-    /* It is more important to coalesce in HOT blocks.  */
-    if (hot)
-      cost *= 2;

  /* Inserting copy on critical edge costs more than inserting it elsewhere.  */
  if (critical)
@ -102,7 +98,7 @@ coalesce_cost (int frequency, bool hot, bool critical)
 static inline int 
 coalesce_cost_bb (basic_block bb)
 {
-  return coalesce_cost (bb->frequency, maybe_hot_bb_p (bb), false);
+  return coalesce_cost (bb->frequency, optimize_bb_for_size_p (bb), false);
 }


@ -115,7 +111,7 @@ coalesce_cost_edge (edge e)
    return MUST_COALESCE_COST;

  return coalesce_cost (EDGE_FREQUENCY (e), 
-			maybe_hot_edge_p (e), 
+			optimize_edge_for_size_p (e), 
 			EDGE_CRITICAL_P (e));
 }

@ -1099,7 +1095,7 @@ create_outofssa_var_map (coalesce_list_p cl, bitmap used_in_copy)
 		    if (SSA_NAME_VAR (outputs[match]) == SSA_NAME_VAR (input))
 		      {
 			cost = coalesce_cost (REG_BR_PROB_BASE, 
-					      maybe_hot_bb_p (bb),
+					      optimize_bb_for_size_p (bb),
 					      false);
 			add_coalesce (cl, v1, v2, cost);
 			bitmap_set_bit (used_in_copy, v1);
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@ -359,7 +359,7 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)

      FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
 	{
-	  if (may_increase_size && maybe_hot_bb_p (loop->header)
+	  if (may_increase_size && optimize_loop_for_speed_p (loop)
 	      /* Unroll outermost loops only if asked to do so or they do
 		 not cause code growth.  */
 	      && (unroll_outer
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@ -1460,7 +1460,7 @@ loop_prefetch_arrays (struct loop *loop)
  struct tree_niter_desc desc;
  bool unrolled = false, no_other_refs;

-  if (!maybe_hot_bb_p (loop->header))
+  if (optimize_loop_nest_for_size_p (loop))
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "  ignored (cold area)\n");
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@ -353,7 +353,8 @@ replace_reciprocal (use_operand_p use_p)
  basic_block bb = gimple_bb (use_stmt);
  struct occurrence *occ = (struct occurrence *) bb->aux;

-  if (occ->recip_def && use_stmt != occ->recip_def_stmt)
+  if (optimize_bb_for_speed_p (bb)
+      && occ->recip_def && use_stmt != occ->recip_def_stmt)
    {
      gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
      SET_USE (use_p, occ->recip_def);
@ -445,7 +446,7 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 static bool
 gate_cse_reciprocals (void)
 {
-  return optimize && !optimize_size && flag_reciprocal_math;
+  return optimize && flag_reciprocal_math;
 }

 /* Go through all the floating-point SSA_NAMEs, and call
@ -500,6 +501,9 @@ execute_cse_reciprocals (void)
 	    execute_cse_reciprocals_1 (&gsi, def);
 	}

+      if (optimize_bb_for_size_p (bb))
+        continue;
+
      /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
        {
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@ -994,7 +994,7 @@ mark_threaded_blocks (bitmap threaded_blocks)

  /* If optimizing for size, only thread through block if we don't have
     to duplicate it or it's an otherwise empty redirection block.  */
-  if (optimize_size)
+  if (optimize_function_for_size_p (cfun))
    {
      EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
 	{
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@ -1219,7 +1219,7 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
      print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
    }

-  if (optimize_size)
+  if (optimize_loop_nest_for_size_p (loop))
    {
      if (vect_print_dump_info (REPORT_DR_DETAILS))
 	fprintf (vect_dump, "versioning not supported when optimizing for size.");
@ -1993,7 +1993,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)

  /* Try versioning if:
     1) flag_tree_vect_loop_version is TRUE
-     2) optimize_size is FALSE
+     2) optimize loop for speed
     3) there is at least one unsupported misaligned data ref with an unknown
        misalignment, and
     4) all misaligned data refs with a known misalignment are supported, and
@ -2001,7 +2001,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)

  do_versioning = 
 	flag_tree_vect_loop_version 
-	&& (!optimize_size)
+	&& optimize_loop_nest_for_speed_p (loop)
 	&& (!loop->inner); /* FORNOW */

  if (do_versioning)
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@ -669,7 +669,7 @@ gimple_divmod_fixed_value_transform (gimple_stmt_iterator *si)
     at least 50% of time (and 75% gives the guarantee of usage).  */
  if (simple_cst_equal (gimple_assign_rhs2 (stmt), value) != 1
      || 2 * count < all
-      || !maybe_hot_bb_p (gimple_bb (stmt)))
+      || optimize_bb_for_size_p (gimple_bb (stmt)))
    return false;

  if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
@ -820,7 +820,7 @@ gimple_mod_pow2_value_transform (gimple_stmt_iterator *si)
  /* We require that we hit a power of 2 at least half of all evaluations.  */
  if (simple_cst_equal (gimple_assign_rhs2 (stmt), value) != 1
      || count < wrong_values
-      || !maybe_hot_bb_p (gimple_bb (stmt)))
+      || optimize_bb_for_size_p (gimple_bb (stmt)))
    return false;

  if (dump_file)
@ -1017,7 +1017,7 @@ gimple_mod_subtract_transform (gimple_stmt_iterator *si)
 	break;
    }
  if (i == steps
-      || !maybe_hot_bb_p (gimple_bb (stmt)))
+      || optimize_bb_for_size_p (gimple_bb (stmt)))
    return false;

  gimple_remove_histogram_value (cfun, stmt, histogram);
@ -1397,7 +1397,7 @@ gimple_stringops_transform (gimple_stmt_iterator *gsi)
  /* We require that count is at least half of all; this means
     that for the transformation to fire the value must be constant
     at least 80% of time.  */
-  if ((6 * count / 5) < all || !maybe_hot_bb_p (gimple_bb (stmt)))
+  if ((6 * count / 5) < all || optimize_bb_for_size_p (gimple_bb (stmt)))
    return false;
  if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
    return false;