loop-unswitch.c (unswitch_single_loop): Use optimize_loop_for_speed_p.
* loop-unswitch.c (unswitch_single_loop): Use optimize_loop_for_speed_p. * tree-ssa-threadupdate.c (mark_threaded_blocks): Use optimize_function_for_size_p. * tracer.c (ignore_bb_p): Use optimize_bb_for_size_p. * postreload-gcse.c (eliminate_partially_redundant_load): Use optimize_bb_for_size_p. * value-prof.c (gimple_divmod_fixed_value_transform, gimple_mod_pow2_value_transform, gimple_mod_subtract_transform, gimple_stringops_transform): Use optimize_bb_for_size_p. * ipa-cp.c (ipcp_insert_stage): Use optimize_function_for_size_p. * final.c (compute_alignments): Use optimize_function_for_size_p. * builtins.c (fold_builtin_cabs): Use optimize_function_for_speed_p. (fold_builtin_strcpy, fold_builtin_fputs): Use optimize_function_for_size_p. * fold-const.c (tree_swap_operands_p): Use optimize_function_for_size_p. * recog.c (relax_delay_slots): Likewise. * tree-ssa-math-opts.c (replace_reciprocal): Use optimize_bb_for_speed_p. (execute_cse_reciprocals): Use optimize_bb_for_size_p. * ipa-inline.c (cgraph_decide_recursive_inlining): Use optimize_function_for_size_p. (cgraph_decide_inlining_of_small_function): Use optimize_function_for_size_p. * global.c (find_reg): Use optimize_function_for_size_p. * opts.c (decode_options): Do not clear flag_tree_ch, flag_inline_functions, flag_unswitch_loops, flag_unroll_loops, flag_unroll_all_loops and flag_prefetch_loop_arrays. Those can work it out from profile. * tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely): Use optimize_loop_for_speed_p. * predict.c (optimize_bb_for_size_p, optimize_bb_for_speed_p): Constify argument. (optimize_loop_nest_for_size_p, optimize_loop_nest_for_speed_p): New. * tree-parloops.c (parallelize_loops): Use optimize_loop_for_size_p. * tree-eh.c (decide_copy_try_finally): Use optimize_function_for_size_p. * local-alloc.c (block_alloc): Pass BB pointer. (find_free_reg): Add BB pointer, use optimize_bb_for_size_p. * gcse.c (gcse_main): Use optimize_function_for_size_p. * loop-unroll.c (decide_unrolling_and_peeling): Use optimize_loop_for_size_p. (decide_peel_completely): Likewise. * tree-vect-analyze.c (vect_mark_for_runtime_alias_test): Use optimize_loop_for_size_p. (vect_enhance_data_refs_alignment): Likewise. * tree-ssa-coalesce.c (coalesce_cost): Add optimize_for_size argument. (coalesce_cost_bb, coalesce_cost_edge, create_outofssa_var_map): Update call. * cfgcleanup.c (outgoing_edges_match): Use optimize_bb_for_speed_p. (try_crossjump_bb): Use optimize_bb_for_size_p. * tree-ssa-loop-prefetch.c (loop_prefetch_arrays): Use optimize_loop_for_speed_p. * bb-reorder.c (find_traces_1_round): Likewise. (copy_bb): Use optimize_bb_for_speed_p. (duplicate_computed_gotos): Likewise. * basic-block.h (optimize_loop_nest_for_size_p, optimize_loop_nest_for_speed_p): New. * stmt.c (expand_case): Use optimize_insn_for_size_p. From-SVN: r139760
This commit is contained in:
parent
e3536b82d7
commit
efd8f7507b
@ -1,3 +1,57 @@
|
||||
2008-08-29 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* loop-unswitch.c (unswitch_single_loop): Use optimize_loop_for_speed_p.
|
||||
* tree-ssa-threadupdate.c (mark_threaded_blocks): Use optimize_function_for_size_p.
|
||||
* tracer.c (ignore_bb_p): Use optimize_bb_for_size_p.
|
||||
* postreload-gcse.c (eliminate_partially_redundant_load): Use optimize_bb_for_size_p.
|
||||
* value-prof.c (gimple_divmod_fixed_value_transform,
|
||||
gimple_mod_pow2_value_transform, gimple_mod_subtract_transform,
|
||||
gimple_stringops_transform): Use optimize_bb_for_size_p.
|
||||
* ipa-cp.c (ipcp_insert_stage): Use optimize_function_for_size_p.
|
||||
* final.c (compute_alignments): Use optimize_function_for_size_p.
|
||||
* builtins.c (fold_builtin_cabs): Use optimize_function_for_speed_p.
|
||||
(fold_builtin_strcpy, fold_builtin_fputs): Use
|
||||
optimize_function_for_size_p.
|
||||
* fold-const.c (tree_swap_operands_p): Use optimize_function_for_size_p.
|
||||
* recog.c (relax_delay_slots): Likewise.
|
||||
* tree-ssa-math-opts.c (replace_reciprocal): Use optimize_bb_for_speed_p.
|
||||
(execute_cse_reciprocals): Use optimize_bb_for_size_p.
|
||||
* ipa-inline.c (cgraph_decide_recursive_inlining): Use
|
||||
optimize_function_for_size_p.
|
||||
(cgraph_decide_inlining_of_small_function): Use
|
||||
optimize_function_for_size_p.
|
||||
* global.c (find_reg): Use optimize_function_for_size_p.
|
||||
* opts.c (decode_options): Do not clear flag_tree_ch, flag_inline_functions,
|
||||
flag_unswitch_loops, flag_unroll_loops, flag_unroll_all_loops and
|
||||
flag_prefetch_loop_arrays. Those can work it out from profile.
|
||||
* tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely): Use
|
||||
optimize_loop_for_speed_p.
|
||||
* predict.c (optimize_bb_for_size_p, optimize_bb_for_speed_p): Constify
|
||||
argument.
|
||||
(optimize_loop_nest_for_size_p, optimize_loop_nest_for_speed_p): New.
|
||||
* tree-parloops.c (parallelize_loops): Use optimize_loop_for_size_p.
|
||||
* tree-eh.c (decide_copy_try_finally): Use optimize_function_for_size_p.
|
||||
* local-alloc.c (block_alloc): Pass BB pointer.
|
||||
(find_free_reg): Add BB pointer, use optimize_bb_for_size_p.
|
||||
* gcse.c (gcse_main): Use optimize_function_for_size_p.
|
||||
* loop-unroll.c (decide_unrolling_and_peeling): Use optimize_loop_for_size_p.
|
||||
(decide_peel_completely): Likewise.
|
||||
* tree-vect-analyze.c (vect_mark_for_runtime_alias_test): Use
|
||||
optimize_loop_for_size_p.
|
||||
(vect_enhance_data_refs_alignment): Likewise.
|
||||
* tree-ssa-coalesce.c (coalesce_cost): Add optimize_for_size argument.
|
||||
(coalesce_cost_bb, coalesce_cost_edge, create_outofssa_var_map): Update call.
|
||||
* cfgcleanup.c (outgoing_edges_match): Use optimize_bb_for_speed_p.
|
||||
(try_crossjump_bb): Use optimize_bb_for_size_p.
|
||||
* tree-ssa-loop-prefetch.c (loop_prefetch_arrays): Use
|
||||
optimize_loop_for_speed_p.
|
||||
* bb-reorder.c (find_traces_1_round): Likewise.
|
||||
(copy_bb): Use optimize_bb_for_speed_p.
|
||||
(duplicate_computed_gotos): Likewise.
|
||||
* basic-block.h (optimize_loop_nest_for_size_p,
|
||||
optimize_loop_nest_for_speed_p): New.
|
||||
* stmt.c (expand_case): Use optimize_insn_for_size_p.
|
||||
|
||||
2008-08-29 Tristan Gingold <gingold@adacore.com>
|
||||
|
||||
* gcov.c (main): Call expandargv.
|
||||
|
@ -841,6 +841,8 @@ extern bool optimize_function_for_size_p (struct function *);
|
||||
extern bool optimize_function_for_speed_p (struct function *);
|
||||
extern bool optimize_loop_for_size_p (struct loop *);
|
||||
extern bool optimize_loop_for_speed_p (struct loop *);
|
||||
extern bool optimize_loop_nest_for_size_p (struct loop *);
|
||||
extern bool optimize_loop_nest_for_speed_p (struct loop *);
|
||||
extern bool gimple_predicted_by_p (const_basic_block, enum br_predictor);
|
||||
extern bool rtl_predicted_by_p (const_basic_block, enum br_predictor);
|
||||
extern void gimple_predict_edge (edge, enum br_predictor, int);
|
||||
|
@ -648,7 +648,8 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
|
||||
/* The loop has less than 4 iterations. */
|
||||
|
||||
if (single_succ_p (bb)
|
||||
&& copy_bb_p (best_edge->dest, !optimize_size))
|
||||
&& copy_bb_p (best_edge->dest,
|
||||
optimize_edge_for_speed_p (best_edge)))
|
||||
{
|
||||
bb = copy_bb (best_edge->dest, best_edge, bb,
|
||||
*n_traces);
|
||||
@ -1102,7 +1103,7 @@ connect_traces (int n_traces, struct trace *traces)
|
||||
edge is traversed frequently enough. */
|
||||
if (try_copy
|
||||
&& copy_bb_p (best->dest,
|
||||
!optimize_size
|
||||
optimize_edge_for_speed_p (best)
|
||||
&& EDGE_FREQUENCY (best) >= freq_threshold
|
||||
&& best->count >= count_threshold))
|
||||
{
|
||||
@ -1173,7 +1174,7 @@ copy_bb_p (const_basic_block bb, int code_may_grow)
|
||||
if (EDGE_COUNT (bb->succs) > 8)
|
||||
return false;
|
||||
|
||||
if (code_may_grow && maybe_hot_bb_p (bb))
|
||||
if (code_may_grow && optimize_bb_for_speed_p (bb))
|
||||
max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
|
||||
|
||||
FOR_BB_INSNS (bb, insn)
|
||||
@ -1984,7 +1985,7 @@ gate_duplicate_computed_gotos (void)
|
||||
{
|
||||
if (targetm.cannot_modify_jumps_p ())
|
||||
return false;
|
||||
return (optimize > 0 && flag_expensive_optimizations && !optimize_size);
|
||||
return (optimize > 0 && flag_expensive_optimizations);
|
||||
}
|
||||
|
||||
|
||||
@ -2075,6 +2076,9 @@ duplicate_computed_gotos (void)
|
||||
|| single_pred_p (single_succ (bb)))
|
||||
continue;
|
||||
|
||||
if (!optimize_bb_for_size_p (bb))
|
||||
continue;
|
||||
|
||||
/* The successor block has to be a duplication candidate. */
|
||||
if (!bitmap_bit_p (candidates, single_succ (bb)->index))
|
||||
continue;
|
||||
|
@ -7530,7 +7530,7 @@ fold_builtin_cabs (tree arg, tree type, tree fndecl)
|
||||
|
||||
/* Don't do this when optimizing for size. */
|
||||
if (flag_unsafe_math_optimizations
|
||||
&& optimize && !optimize_size)
|
||||
&& optimize && optimize_function_for_speed_p (cfun))
|
||||
{
|
||||
tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
|
||||
|
||||
@ -8882,7 +8882,7 @@ fold_builtin_strcpy (tree fndecl, tree dest, tree src, tree len)
|
||||
if (operand_equal_p (src, dest, 0))
|
||||
return fold_convert (TREE_TYPE (TREE_TYPE (fndecl)), dest);
|
||||
|
||||
if (optimize_size)
|
||||
if (optimize_function_for_size_p (cfun))
|
||||
return NULL_TREE;
|
||||
|
||||
fn = implicit_built_in_decls[BUILT_IN_MEMCPY];
|
||||
@ -11501,7 +11501,7 @@ fold_builtin_fputs (tree arg0, tree arg1, bool ignore, bool unlocked, tree len)
|
||||
case 1: /* length is greater than 1, call fwrite. */
|
||||
{
|
||||
/* If optimizing for size keep fputs. */
|
||||
if (optimize_size)
|
||||
if (optimize_function_for_size_p (cfun))
|
||||
return NULL_TREE;
|
||||
/* New argument list transforming fputs(string, stream) to
|
||||
fwrite(string, 1, len, stream). */
|
||||
|
@ -1235,9 +1235,8 @@ outgoing_edges_match (int mode, basic_block bb1, basic_block bb2)
|
||||
we require the existing branches to have probabilities that are
|
||||
roughly similar. */
|
||||
if (match
|
||||
&& !optimize_size
|
||||
&& maybe_hot_bb_p (bb1)
|
||||
&& maybe_hot_bb_p (bb2))
|
||||
&& optimize_bb_for_speed_p (bb1)
|
||||
&& optimize_bb_for_speed_p (bb2))
|
||||
{
|
||||
int prob2;
|
||||
|
||||
@ -1684,7 +1683,7 @@ try_crossjump_bb (int mode, basic_block bb)
|
||||
|
||||
/* Don't crossjump if this block ends in a computed jump,
|
||||
unless we are optimizing for size. */
|
||||
if (!optimize_size
|
||||
if (optimize_bb_for_size_p (bb)
|
||||
&& bb != EXIT_BLOCK_PTR
|
||||
&& computed_jump_p (BB_END (bb)))
|
||||
return false;
|
||||
|
@ -683,7 +683,7 @@ compute_alignments (void)
|
||||
label_align = XCNEWVEC (struct label_alignment, max_labelno - min_labelno + 1);
|
||||
|
||||
/* If not optimizing or optimizing for size, don't assign any alignments. */
|
||||
if (! optimize || optimize_size)
|
||||
if (! optimize || optimize_function_for_size_p (cfun))
|
||||
return 0;
|
||||
|
||||
if (dump_file)
|
||||
@ -765,7 +765,7 @@ compute_alignments (void)
|
||||
/* In case block is frequent and reached mostly by non-fallthru edge,
|
||||
align it. It is most likely a first block of loop. */
|
||||
if (has_fallthru
|
||||
&& maybe_hot_bb_p (bb)
|
||||
&& optimize_bb_for_speed_p (bb)
|
||||
&& branch_frequency + fallthru_frequency > freq_threshold
|
||||
&& (branch_frequency
|
||||
> fallthru_frequency * PARAM_VALUE (PARAM_ALIGN_LOOP_ITERATIONS)))
|
||||
|
@ -6679,7 +6679,7 @@ tree_swap_operands_p (const_tree arg0, const_tree arg1, bool reorder)
|
||||
if (TREE_CONSTANT (arg0))
|
||||
return 1;
|
||||
|
||||
if (optimize_size)
|
||||
if (cfun && optimize_function_for_size_p (cfun))
|
||||
return 0;
|
||||
|
||||
if (reorder && flag_evaluation_order
|
||||
@ -10407,7 +10407,7 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1)
|
||||
}
|
||||
|
||||
/* Optimize x*x as pow(x,2.0), which is expanded as x*x. */
|
||||
if (! optimize_size
|
||||
if (optimize_function_for_speed_p (cfun)
|
||||
&& operand_equal_p (arg0, arg1, 0))
|
||||
{
|
||||
tree powfn = mathfn_built_in (type, BUILT_IN_POW);
|
||||
|
@ -738,9 +738,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
|
||||
timevar_pop (TV_CPROP1);
|
||||
}
|
||||
|
||||
if (optimize_size)
|
||||
/* Do nothing. */ ;
|
||||
else
|
||||
if (optimize_function_for_speed_p (cfun))
|
||||
{
|
||||
timevar_push (TV_PRE);
|
||||
changed |= one_pre_gcse_pass (pass + 1);
|
||||
@ -773,7 +771,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
|
||||
for code size -- it rarely makes programs faster, and can make
|
||||
them bigger if we did partial redundancy elimination (when optimizing
|
||||
for space, we don't run the partial redundancy algorithms). */
|
||||
if (optimize_size)
|
||||
if (optimize_function_for_size_p (cfun))
|
||||
{
|
||||
timevar_push (TV_HOIST);
|
||||
max_gcse_regno = max_reg_num ();
|
||||
@ -825,7 +823,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
|
||||
/* We are finished with alias. */
|
||||
end_alias_analysis ();
|
||||
|
||||
if (!optimize_size && flag_gcse_sm)
|
||||
if (optimize_function_for_speed_p (cfun) && flag_gcse_sm)
|
||||
{
|
||||
timevar_push (TV_LSM);
|
||||
store_motion ();
|
||||
|
@ -1168,8 +1168,8 @@ find_reg (int num, HARD_REG_SET losers, int alt_regs_p, int accept_call_clobbere
|
||||
if (! accept_call_clobbered
|
||||
&& allocno[num].calls_crossed != 0
|
||||
&& allocno[num].throwing_calls_crossed == 0
|
||||
&& CALLER_SAVE_PROFITABLE (optimize_size ? allocno[num].n_refs : allocno[num].freq,
|
||||
optimize_size ? allocno[num].calls_crossed
|
||||
&& CALLER_SAVE_PROFITABLE (optimize_function_for_size_p (cfun) ? allocno[num].n_refs : allocno[num].freq,
|
||||
optimize_function_for_size_p (cfun) ? allocno[num].calls_crossed
|
||||
: allocno[num].freq_calls_crossed))
|
||||
{
|
||||
HARD_REG_SET new_losers;
|
||||
|
@ -1019,9 +1019,7 @@ ipcp_insert_stage (void)
|
||||
if (new_insns + growth > max_new_insns)
|
||||
break;
|
||||
if (growth
|
||||
&& (optimize_size
|
||||
|| (DECL_STRUCT_FUNCTION (node->decl)
|
||||
->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)))
|
||||
&& optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl)))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Not versioning, cold code would grow");
|
||||
|
@ -674,7 +674,7 @@ cgraph_decide_recursive_inlining (struct cgraph_node *node,
|
||||
int depth = 0;
|
||||
int n = 0;
|
||||
|
||||
if (optimize_size
|
||||
if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl))
|
||||
|| (!flag_inline_functions && !DECL_DECLARED_INLINE_P (node->decl)))
|
||||
return false;
|
||||
|
||||
@ -951,7 +951,7 @@ cgraph_decide_inlining_of_small_functions (void)
|
||||
if (!flag_inline_functions
|
||||
&& !DECL_DECLARED_INLINE_P (edge->callee->decl))
|
||||
not_good = N_("function not declared inline and code size would grow");
|
||||
if (optimize_size)
|
||||
if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION(edge->caller->decl)))
|
||||
not_good = N_("optimizing for size and code size would grow");
|
||||
if (not_good && growth > 0 && cgraph_estimate_growth (edge->callee) > 0)
|
||||
{
|
||||
|
@ -299,7 +299,7 @@ static int contains_replace_regs (rtx);
|
||||
static int memref_referenced_p (rtx, rtx);
|
||||
static int memref_used_between_p (rtx, rtx, rtx);
|
||||
static void no_equiv (rtx, const_rtx, void *);
|
||||
static void block_alloc (int);
|
||||
static void block_alloc (basic_block);
|
||||
static int qty_sugg_compare (int, int);
|
||||
static int qty_sugg_compare_1 (const void *, const void *);
|
||||
static int qty_compare (int, int);
|
||||
@ -311,7 +311,7 @@ static void reg_is_set (rtx, const_rtx, void *);
|
||||
static void reg_is_born (rtx, int);
|
||||
static void wipe_dead_reg (rtx, int);
|
||||
static int find_free_reg (enum reg_class, enum machine_mode, int, int, int,
|
||||
int, int);
|
||||
int, int, basic_block);
|
||||
static void mark_life (int, enum machine_mode, int);
|
||||
static void post_mark_life (int, enum machine_mode, int, int, int);
|
||||
static int requires_inout (const char *);
|
||||
@ -436,7 +436,7 @@ local_alloc (void)
|
||||
|
||||
next_qty = 0;
|
||||
|
||||
block_alloc (b->index);
|
||||
block_alloc (b);
|
||||
}
|
||||
|
||||
free (qty);
|
||||
@ -1270,7 +1270,7 @@ no_equiv (rtx reg, const_rtx store ATTRIBUTE_UNUSED, void *data ATTRIBUTE_UNUSED
|
||||
Only the pseudos that die but once can be handled. */
|
||||
|
||||
static void
|
||||
block_alloc (int b)
|
||||
block_alloc (basic_block b)
|
||||
{
|
||||
int i, q;
|
||||
rtx insn;
|
||||
@ -1283,7 +1283,7 @@ block_alloc (int b)
|
||||
|
||||
/* Count the instructions in the basic block. */
|
||||
|
||||
insn = BB_END (BASIC_BLOCK (b));
|
||||
insn = BB_END (b);
|
||||
while (1)
|
||||
{
|
||||
if (!NOTE_P (insn))
|
||||
@ -1291,7 +1291,7 @@ block_alloc (int b)
|
||||
++insn_count;
|
||||
gcc_assert (insn_count <= max_uid);
|
||||
}
|
||||
if (insn == BB_HEAD (BASIC_BLOCK (b)))
|
||||
if (insn == BB_HEAD (b))
|
||||
break;
|
||||
insn = PREV_INSN (insn);
|
||||
}
|
||||
@ -1302,14 +1302,14 @@ block_alloc (int b)
|
||||
|
||||
/* Initialize table of hardware registers currently live. */
|
||||
|
||||
REG_SET_TO_HARD_REG_SET (regs_live, DF_LR_IN (BASIC_BLOCK (b)));
|
||||
REG_SET_TO_HARD_REG_SET (regs_live, DF_LR_IN (b));
|
||||
|
||||
/* This is conservative, as this would include registers that are
|
||||
artificial-def'ed-but-not-used. However, artificial-defs are
|
||||
rare, and such uninitialized use is rarer still, and the chance
|
||||
of this having any performance impact is even less, while the
|
||||
benefit is not having to compute and keep the TOP set around. */
|
||||
for (def_rec = df_get_artificial_defs (b); *def_rec; def_rec++)
|
||||
for (def_rec = df_get_artificial_defs (b->index); *def_rec; def_rec++)
|
||||
{
|
||||
int regno = DF_REF_REGNO (*def_rec);
|
||||
if (regno < FIRST_PSEUDO_REGISTER)
|
||||
@ -1320,7 +1320,7 @@ block_alloc (int b)
|
||||
and assigns quantities to registers.
|
||||
It computes which registers to tie. */
|
||||
|
||||
insn = BB_HEAD (BASIC_BLOCK (b));
|
||||
insn = BB_HEAD (b);
|
||||
while (1)
|
||||
{
|
||||
if (!NOTE_P (insn))
|
||||
@ -1487,7 +1487,7 @@ block_alloc (int b)
|
||||
IOR_HARD_REG_SET (regs_live_at[2 * insn_number], regs_live);
|
||||
IOR_HARD_REG_SET (regs_live_at[2 * insn_number + 1], regs_live);
|
||||
|
||||
if (insn == BB_END (BASIC_BLOCK (b)))
|
||||
if (insn == BB_END (b))
|
||||
break;
|
||||
|
||||
insn = NEXT_INSN (insn);
|
||||
@ -1542,7 +1542,7 @@ block_alloc (int b)
|
||||
q = qty_order[i];
|
||||
if (qty_phys_num_sugg[q] != 0 || qty_phys_num_copy_sugg[q] != 0)
|
||||
qty[q].phys_reg = find_free_reg (qty[q].min_class, qty[q].mode, q,
|
||||
0, 1, qty[q].birth, qty[q].death);
|
||||
0, 1, qty[q].birth, qty[q].death, b);
|
||||
else
|
||||
qty[q].phys_reg = -1;
|
||||
}
|
||||
@ -1627,19 +1627,19 @@ block_alloc (int b)
|
||||
a scheduling pass after reload and we are not optimizing
|
||||
for code size. */
|
||||
if (flag_schedule_insns_after_reload && dbg_cnt (local_alloc_for_sched)
|
||||
&& !optimize_size
|
||||
&& optimize_bb_for_speed_p (b)
|
||||
&& !SMALL_REGISTER_CLASSES)
|
||||
{
|
||||
qty[q].phys_reg = find_free_reg (qty[q].min_class,
|
||||
qty[q].mode, q, 0, 0,
|
||||
fake_birth, fake_death);
|
||||
fake_birth, fake_death, b);
|
||||
if (qty[q].phys_reg >= 0)
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
qty[q].phys_reg = find_free_reg (qty[q].min_class,
|
||||
qty[q].mode, q, 0, 0,
|
||||
qty[q].birth, qty[q].death);
|
||||
qty[q].birth, qty[q].death, b);
|
||||
if (qty[q].phys_reg >= 0)
|
||||
continue;
|
||||
}
|
||||
@ -1647,17 +1647,17 @@ block_alloc (int b)
|
||||
#ifdef INSN_SCHEDULING
|
||||
/* Similarly, avoid false dependencies. */
|
||||
if (flag_schedule_insns_after_reload && dbg_cnt (local_alloc_for_sched)
|
||||
&& !optimize_size
|
||||
&& optimize_bb_for_speed_p (b)
|
||||
&& !SMALL_REGISTER_CLASSES
|
||||
&& qty[q].alternate_class != NO_REGS)
|
||||
qty[q].phys_reg = find_free_reg (qty[q].alternate_class,
|
||||
qty[q].mode, q, 0, 0,
|
||||
fake_birth, fake_death);
|
||||
fake_birth, fake_death, b);
|
||||
#endif
|
||||
if (qty[q].alternate_class != NO_REGS)
|
||||
qty[q].phys_reg = find_free_reg (qty[q].alternate_class,
|
||||
qty[q].mode, q, 0, 0,
|
||||
qty[q].birth, qty[q].death);
|
||||
qty[q].birth, qty[q].death, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2145,7 +2145,7 @@ wipe_dead_reg (rtx reg, int output_p)
|
||||
static int
|
||||
find_free_reg (enum reg_class rclass, enum machine_mode mode, int qtyno,
|
||||
int accept_call_clobbered, int just_try_suggested,
|
||||
int born_index, int dead_index)
|
||||
int born_index, int dead_index, basic_block bb)
|
||||
{
|
||||
int i, ins;
|
||||
HARD_REG_SET first_used, used;
|
||||
@ -2261,7 +2261,7 @@ find_free_reg (enum reg_class rclass, enum machine_mode mode, int qtyno,
|
||||
/* Don't try the copy-suggested regs again. */
|
||||
qty_phys_num_copy_sugg[qtyno] = 0;
|
||||
return find_free_reg (rclass, mode, qtyno, accept_call_clobbered, 1,
|
||||
born_index, dead_index);
|
||||
born_index, dead_index, bb);
|
||||
}
|
||||
|
||||
/* We need not check to see if the current function has nonlocal
|
||||
@ -2274,11 +2274,12 @@ find_free_reg (enum reg_class rclass, enum machine_mode mode, int qtyno,
|
||||
&& ! just_try_suggested
|
||||
&& qty[qtyno].n_calls_crossed != 0
|
||||
&& qty[qtyno].n_throwing_calls_crossed == 0
|
||||
&& CALLER_SAVE_PROFITABLE (optimize_size ? qty[qtyno].n_refs : qty[qtyno].freq,
|
||||
optimize_size ? qty[qtyno].n_calls_crossed
|
||||
&& CALLER_SAVE_PROFITABLE (optimize_bb_for_size_p (bb) ? qty[qtyno].n_refs
|
||||
: qty[qtyno].freq,
|
||||
optimize_bb_for_size_p (bb) ? qty[qtyno].n_calls_crossed
|
||||
: qty[qtyno].freq_calls_crossed))
|
||||
{
|
||||
i = find_free_reg (rclass, mode, qtyno, 1, 0, born_index, dead_index);
|
||||
i = find_free_reg (rclass, mode, qtyno, 1, 0, born_index, dead_index, bb);
|
||||
if (i >= 0)
|
||||
caller_save_needed = 1;
|
||||
return i;
|
||||
|
@ -269,7 +269,7 @@ decide_unrolling_and_peeling (int flags)
|
||||
fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
|
||||
|
||||
/* Do not peel cold areas. */
|
||||
if (!maybe_hot_bb_p (loop->header))
|
||||
if (optimize_loop_for_size_p (loop))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Not considering loop, cold area\n");
|
||||
@ -368,7 +368,7 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
|
||||
}
|
||||
|
||||
/* Do not peel cold areas. */
|
||||
if (!maybe_hot_bb_p (loop->header))
|
||||
if (optimize_loop_for_size_p (loop))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Not considering loop, cold area\n");
|
||||
|
@ -290,7 +290,7 @@ unswitch_single_loop (struct loop *loop, rtx cond_checked, int num)
|
||||
}
|
||||
|
||||
/* Do not unswitch in cold areas. */
|
||||
if (!maybe_hot_bb_p (loop->header))
|
||||
if (optimize_loop_for_size_p (loop))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; Not unswitching, not hot area\n");
|
||||
|
14
gcc/opts.c
14
gcc/opts.c
@ -990,12 +990,6 @@ decode_options (unsigned int argc, const char **argv)
|
||||
|
||||
if (optimize_size)
|
||||
{
|
||||
/* Loop header copying usually increases size of the code. This used not to
|
||||
be true, since quite often it is possible to verify that the condition is
|
||||
satisfied in the first iteration and therefore to eliminate it. Jump
|
||||
threading handles these cases now. */
|
||||
flag_tree_ch = 0;
|
||||
|
||||
/* Conditional DCE generates bigger code. */
|
||||
flag_tree_builtin_call_dce = 0;
|
||||
|
||||
@ -1004,8 +998,6 @@ decode_options (unsigned int argc, const char **argv)
|
||||
|
||||
/* These options are set with -O3, so reset for -Os */
|
||||
flag_predictive_commoning = 0;
|
||||
flag_inline_functions = 0;
|
||||
flag_unswitch_loops = 0;
|
||||
flag_gcse_after_reload = 0;
|
||||
flag_tree_vectorize = 0;
|
||||
|
||||
@ -1029,12 +1021,6 @@ decode_options (unsigned int argc, const char **argv)
|
||||
align_labels = 1;
|
||||
align_functions = 1;
|
||||
|
||||
/* Unroll/prefetch switches that may be set on the command line, and tend to
|
||||
generate bigger code. */
|
||||
flag_unroll_loops = 0;
|
||||
flag_unroll_all_loops = 0;
|
||||
flag_prefetch_loop_arrays = 0;
|
||||
|
||||
/* Basic optimization options. */
|
||||
optimize_size = 1;
|
||||
if (optimize > 2)
|
||||
|
@ -1066,7 +1066,7 @@ eliminate_partially_redundant_load (basic_block bb, rtx insn,
|
||||
if (/* No load can be replaced by copy. */
|
||||
npred_ok == 0
|
||||
/* Prevent exploding the code. */
|
||||
|| (optimize_size && npred_ok > 1)
|
||||
|| (optimize_bb_for_size_p (bb) && npred_ok > 1)
|
||||
/* If we don't have profile information we cannot tell if splitting
|
||||
a critical edge is profitable or not so don't do it. */
|
||||
|| ((! profile_info || ! flag_branch_probabilities
|
||||
|
@ -261,6 +261,37 @@ optimize_loop_for_speed_p (struct loop *loop)
|
||||
return optimize_bb_for_speed_p (loop->header);
|
||||
}
|
||||
|
||||
/* Return TRUE when LOOP nest should be optimized for speed. */
|
||||
|
||||
bool
|
||||
optimize_loop_nest_for_speed_p (struct loop *loop)
|
||||
{
|
||||
struct loop *l = loop;
|
||||
if (optimize_loop_for_speed_p (loop))
|
||||
return true;
|
||||
l = loop->inner;
|
||||
while (l != loop)
|
||||
{
|
||||
if (optimize_loop_for_speed_p (l))
|
||||
return true;
|
||||
if (l->inner)
|
||||
l = l->inner;
|
||||
else if (l->next)
|
||||
l = l->next;
|
||||
else
|
||||
l = loop_outer (l);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return TRUE when LOOP nest should be optimized for size. */
|
||||
|
||||
bool
|
||||
optimize_loop_nest_for_size_p (struct loop *loop)
|
||||
{
|
||||
return !optimize_loop_nest_for_speed_p (loop);
|
||||
}
|
||||
|
||||
/* Set RTL expansion for BB profile. */
|
||||
|
||||
void
|
||||
|
@ -3439,7 +3439,7 @@ relax_delay_slots (rtx first)
|
||||
|
||||
Only do so if optimizing for size since this results in slower, but
|
||||
smaller code. */
|
||||
if (optimize_size
|
||||
if (optimize_function_for_size_p (cfun)
|
||||
&& GET_CODE (PATTERN (delay_insn)) == RETURN
|
||||
&& next
|
||||
&& JUMP_P (next)
|
||||
|
@ -2419,7 +2419,7 @@ expand_case (tree exp)
|
||||
|
||||
else if (count < case_values_threshold ()
|
||||
|| compare_tree_int (range,
|
||||
(optimize_size ? 3 : 10) * count) > 0
|
||||
(optimize_insn_for_size_p () ? 3 : 10) * count) > 0
|
||||
/* RANGE may be signed, and really large ranges will show up
|
||||
as negative numbers. */
|
||||
|| compare_tree_int (range, 0) < 0
|
||||
@ -2489,7 +2489,7 @@ expand_case (tree exp)
|
||||
|
||||
/* Index jumptables from zero for suitable values of
|
||||
minval to avoid a subtraction. */
|
||||
if (! optimize_size
|
||||
if (optimize_insn_for_speed_p ()
|
||||
&& compare_tree_int (minval, 0) > 0
|
||||
&& compare_tree_int (minval, 3) < 0)
|
||||
{
|
||||
|
@ -92,7 +92,7 @@ ignore_bb_p (const_basic_block bb)
|
||||
{
|
||||
if (bb->index < NUM_FIXED_BLOCKS)
|
||||
return true;
|
||||
if (!maybe_hot_bb_p (bb))
|
||||
if (optimize_bb_for_size_p (bb))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
@ -1535,7 +1535,7 @@ decide_copy_try_finally (int ndests, gimple_seq finally)
|
||||
sw_estimate = 10 + 2 * ndests;
|
||||
|
||||
/* Optimize for size clearly wants our best guess. */
|
||||
if (optimize_size)
|
||||
if (optimize_function_for_size_p (cfun))
|
||||
return f_estimate < sw_estimate;
|
||||
|
||||
/* ??? These numbers are completely made up so far. */
|
||||
|
@ -1843,7 +1843,7 @@ parallelize_loops (void)
|
||||
{
|
||||
htab_empty (reduction_list);
|
||||
if (/* Do not bother with loops in cold areas. */
|
||||
!maybe_hot_bb_p (loop->header)
|
||||
optimize_loop_nest_for_size_p (loop)
|
||||
/* Or loops that roll too little. */
|
||||
|| expected_loop_iterations (loop) <= n_threads
|
||||
/* And of course, the loop must be parallelizable. */
|
||||
|
@ -75,7 +75,7 @@ typedef struct coalesce_list_d
|
||||
possibly on CRITICAL edge and in HOT basic block. */
|
||||
|
||||
static inline int
|
||||
coalesce_cost (int frequency, bool hot, bool critical)
|
||||
coalesce_cost (int frequency, bool optimize_for_size, bool critical)
|
||||
{
|
||||
/* Base costs on BB frequencies bounded by 1. */
|
||||
int cost = frequency;
|
||||
@ -83,12 +83,8 @@ coalesce_cost (int frequency, bool hot, bool critical)
|
||||
if (!cost)
|
||||
cost = 1;
|
||||
|
||||
if (optimize_size)
|
||||
if (optimize_for_size)
|
||||
cost = 1;
|
||||
else
|
||||
/* It is more important to coalesce in HOT blocks. */
|
||||
if (hot)
|
||||
cost *= 2;
|
||||
|
||||
/* Inserting copy on critical edge costs more than inserting it elsewhere. */
|
||||
if (critical)
|
||||
@ -102,7 +98,7 @@ coalesce_cost (int frequency, bool hot, bool critical)
|
||||
static inline int
|
||||
coalesce_cost_bb (basic_block bb)
|
||||
{
|
||||
return coalesce_cost (bb->frequency, maybe_hot_bb_p (bb), false);
|
||||
return coalesce_cost (bb->frequency, optimize_bb_for_size_p (bb), false);
|
||||
}
|
||||
|
||||
|
||||
@ -115,7 +111,7 @@ coalesce_cost_edge (edge e)
|
||||
return MUST_COALESCE_COST;
|
||||
|
||||
return coalesce_cost (EDGE_FREQUENCY (e),
|
||||
maybe_hot_edge_p (e),
|
||||
optimize_edge_for_size_p (e),
|
||||
EDGE_CRITICAL_P (e));
|
||||
}
|
||||
|
||||
@ -1099,7 +1095,7 @@ create_outofssa_var_map (coalesce_list_p cl, bitmap used_in_copy)
|
||||
if (SSA_NAME_VAR (outputs[match]) == SSA_NAME_VAR (input))
|
||||
{
|
||||
cost = coalesce_cost (REG_BR_PROB_BASE,
|
||||
maybe_hot_bb_p (bb),
|
||||
optimize_bb_for_size_p (bb),
|
||||
false);
|
||||
add_coalesce (cl, v1, v2, cost);
|
||||
bitmap_set_bit (used_in_copy, v1);
|
||||
|
@ -359,7 +359,7 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
|
||||
|
||||
FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
|
||||
{
|
||||
if (may_increase_size && maybe_hot_bb_p (loop->header)
|
||||
if (may_increase_size && optimize_loop_for_speed_p (loop)
|
||||
/* Unroll outermost loops only if asked to do so or they do
|
||||
not cause code growth. */
|
||||
&& (unroll_outer
|
||||
|
@ -1460,7 +1460,7 @@ loop_prefetch_arrays (struct loop *loop)
|
||||
struct tree_niter_desc desc;
|
||||
bool unrolled = false, no_other_refs;
|
||||
|
||||
if (!maybe_hot_bb_p (loop->header))
|
||||
if (optimize_loop_nest_for_size_p (loop))
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, " ignored (cold area)\n");
|
||||
|
@ -353,7 +353,8 @@ replace_reciprocal (use_operand_p use_p)
|
||||
basic_block bb = gimple_bb (use_stmt);
|
||||
struct occurrence *occ = (struct occurrence *) bb->aux;
|
||||
|
||||
if (occ->recip_def && use_stmt != occ->recip_def_stmt)
|
||||
if (optimize_bb_for_speed_p (bb)
|
||||
&& occ->recip_def && use_stmt != occ->recip_def_stmt)
|
||||
{
|
||||
gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
|
||||
SET_USE (use_p, occ->recip_def);
|
||||
@ -445,7 +446,7 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
|
||||
static bool
|
||||
gate_cse_reciprocals (void)
|
||||
{
|
||||
return optimize && !optimize_size && flag_reciprocal_math;
|
||||
return optimize && flag_reciprocal_math;
|
||||
}
|
||||
|
||||
/* Go through all the floating-point SSA_NAMEs, and call
|
||||
@ -500,6 +501,9 @@ execute_cse_reciprocals (void)
|
||||
execute_cse_reciprocals_1 (&gsi, def);
|
||||
}
|
||||
|
||||
if (optimize_bb_for_size_p (bb))
|
||||
continue;
|
||||
|
||||
/* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
|
||||
for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
{
|
||||
|
@ -994,7 +994,7 @@ mark_threaded_blocks (bitmap threaded_blocks)
|
||||
|
||||
/* If optimizing for size, only thread through block if we don't have
|
||||
to duplicate it or it's an otherwise empty redirection block. */
|
||||
if (optimize_size)
|
||||
if (optimize_function_for_size_p (cfun))
|
||||
{
|
||||
EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
|
||||
{
|
||||
|
@ -1219,7 +1219,7 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
|
||||
print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
|
||||
}
|
||||
|
||||
if (optimize_size)
|
||||
if (optimize_loop_nest_for_size_p (loop))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DR_DETAILS))
|
||||
fprintf (vect_dump, "versioning not supported when optimizing for size.");
|
||||
@ -1993,7 +1993,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
|
||||
/* Try versioning if:
|
||||
1) flag_tree_vect_loop_version is TRUE
|
||||
2) optimize_size is FALSE
|
||||
2) optimize loop for speed
|
||||
3) there is at least one unsupported misaligned data ref with an unknown
|
||||
misalignment, and
|
||||
4) all misaligned data refs with a known misalignment are supported, and
|
||||
@ -2001,7 +2001,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
|
||||
do_versioning =
|
||||
flag_tree_vect_loop_version
|
||||
&& (!optimize_size)
|
||||
&& optimize_loop_nest_for_speed_p (loop)
|
||||
&& (!loop->inner); /* FORNOW */
|
||||
|
||||
if (do_versioning)
|
||||
|
@ -669,7 +669,7 @@ gimple_divmod_fixed_value_transform (gimple_stmt_iterator *si)
|
||||
at least 50% of time (and 75% gives the guarantee of usage). */
|
||||
if (simple_cst_equal (gimple_assign_rhs2 (stmt), value) != 1
|
||||
|| 2 * count < all
|
||||
|| !maybe_hot_bb_p (gimple_bb (stmt)))
|
||||
|| optimize_bb_for_size_p (gimple_bb (stmt)))
|
||||
return false;
|
||||
|
||||
if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
|
||||
@ -820,7 +820,7 @@ gimple_mod_pow2_value_transform (gimple_stmt_iterator *si)
|
||||
/* We require that we hit a power of 2 at least half of all evaluations. */
|
||||
if (simple_cst_equal (gimple_assign_rhs2 (stmt), value) != 1
|
||||
|| count < wrong_values
|
||||
|| !maybe_hot_bb_p (gimple_bb (stmt)))
|
||||
|| optimize_bb_for_size_p (gimple_bb (stmt)))
|
||||
return false;
|
||||
|
||||
if (dump_file)
|
||||
@ -1017,7 +1017,7 @@ gimple_mod_subtract_transform (gimple_stmt_iterator *si)
|
||||
break;
|
||||
}
|
||||
if (i == steps
|
||||
|| !maybe_hot_bb_p (gimple_bb (stmt)))
|
||||
|| optimize_bb_for_size_p (gimple_bb (stmt)))
|
||||
return false;
|
||||
|
||||
gimple_remove_histogram_value (cfun, stmt, histogram);
|
||||
@ -1397,7 +1397,7 @@ gimple_stringops_transform (gimple_stmt_iterator *gsi)
|
||||
/* We require that count is at least half of all; this means
|
||||
that for the transformation to fire the value must be constant
|
||||
at least 80% of time. */
|
||||
if ((6 * count / 5) < all || !maybe_hot_bb_p (gimple_bb (stmt)))
|
||||
if ((6 * count / 5) < all || optimize_bb_for_size_p (gimple_bb (stmt)))
|
||||
return false;
|
||||
if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
|
||||
return false;
|
||||
|
Loading…
Reference in New Issue
Block a user