fwprop.c (should_replace_address): Add speed attribute.

* fwprop.c (should_replace_address): Add speed attribute.
	(PR_OPTIMIZE_FOR_SPEED): New flag.
	(propagate_rtx_1): Use it.
	(propagate_rtx): Set it.
	(try_fwprop_subst): Update call of rtx_costs.
	(forward_propagate_and_simplify): LIkewise.
	* hooks.c (hook_int_rtx_bool_0): New
	(hook_bool_rtx_int_int_intp_false): Replace by ...
	(hook_bool_rtx_int_int_intp_bool_false): .. thisone.
	* hooks.h (hook_int_rtx_bool_0): New
	(hook_bool_rtx_int_int_intp_false): Replace by ...
	(hook_bool_rtx_int_int_intp_bool_false): .. thisone.
	* optabs.c (avoid_expensive_constant): UPdate call of rtx_cost.
	(prepare_cmp_insn): UPdate call of rtx_cost.
	* postreload.c (reload_cse_simplify_set): Update call of rtx_cost.
	(reload_cse_simplify_operands): Update call of rtx_cost.
	(reload_cse_move2add): call of rtx_cost.
	* target.h (struct gcc_target): Update rtx_costs and address_costs.
	* rtlanal.c (rtx_cost): Add speed argument.
	(address_cost): Add speed argument
	(default_address_cost): Likewise.
	(insn_rtx_cost): Likewise.
	* cfgloopanal.c (seq_cost): Add speed argument.
	(target_reg_cost, target_spill_cost): Turn to array.
	(init_set_costs): Update for speed.
	(estimate_reg_pressure_cost): Add speed argument.
	* auto-inc-dec.c (attempt_change): Update call of rtx_cost.
	* dojump.c (prefer_and_bit_test): UPdate call of rtx_cost.
	* tree-ssa-loop-ivopts.c (struct ivopts_data): New field speed.
	(seq_cost): Add speed argument.
	(computation_cost): Add speed arugment.
	(add_cost, multiply_by_const, get_address_cost): add speed argument.
	(force_expr_to_var_cost): Update for profile info.
	(force_var_cost): Likewise.
	(split_address_cost): Likewise.
	(ptr_difference_cost): Likewise.
	(difference_cost): Likewise.
	(get_computation_cost_at): Likewise.
	(determine_iv_cost): Likewise.
	(ivopts_global_cost_for_size): Likewise.
	(rewrite_use_address): Likewise.
	(tree_ssa_iv_optimize_loop): Initialize speed field.
	* cse.c (optimize_this_for_speed_p): New static var.
	(notreg_cost): Update call of rtx_cost.
	(cse_extended_basic_block): set optimize_this_for_speed_p.
	* ifcvt.c (cheap_bb_rtx_cost_p): Update call of rtx_cost.
	(noce_try_cmove_arith): Likewise.
	(noce_try_sign_mask): LIkewise.
	* expr.c (compress_float_constant): Update rtx_cost calls.
	* tree-ssa-address.c (most_expensive_mult_to_index): Add speed argument.
	(addr_to_parts): Likewise.
	(create_mem_ref): Likewise.
	* dse.c (find_shift_sequence): Add speed argument.
	(replace_read): Update call.
	* calls.c (precompute_register_parameters): Update call of rtx_cost.
	* expmed.c (sdiv_pow2_cheap, smod_pow2_cheap, zero_cost, add_cost,
	* neg_cost, shift_cost, shiftadd_cost,
	shiftsub_cost, mul_cost, sdiv_cost, udiv_cost ,mul_widen_cost,
	mul_highpart_cost): Increase dimension.
	(init_expmed): Initialize for both size and speed.
	(expand_shift): Use profile.
	(synth_mult): Use profile.
	(choose_mult_variant): Use profile.
	(expand_mult): Use profile.
	(expand_mult_highpart_optab): Use profile.
	(expand_mult_highpart): Use profile.
	(expand_smod_pow2): Use profile.
	(expand_divmod): Use profile.
	* simplify-rtx.c (simplify_binary_operation_1): Update call of rtx_cost.
	* loop-invariant.c (create_new_invariant): Use profile.
	(gain_for_invariant): Add speed parameter.
	(best_gain_for_invariant): Likewise.
	(find_invariants_to_move): Likewise.
	(move_single_loop_invariants): Set it.
	* target-def.h (TARGET_RTX_COSTS): Use hook.
	* rtl.h (rtx_cost, address_cost, insn_rtx_cost): Update prototpe.
	(optimize_insn_for_size_p, optimize_insn_for_speed_p): Declare.
	* output.h (default_address_cost): Update prototype.
	* combine.c (optimize_this_for_speed_p): New static var.
	(combine_validate_cost): Update call of rtx_cost.
	(combine_instructions): Set optimize_this_for_speed_p.
	(expand_compound_operation): Update call of rtx_cost.
	(make_extraction):Update call of rtx_cost.
	(force_to_mode):Update call of rtx_cost.
	(distribute_and_simplify_rtx):Update call of rtx_cost.
	* cfgloop.h (target_reg_cost, target_spill_cost): Turn to array.
	(estimate_reg_pressure_cost): Update prototype.
	* tree-flow.h (multiply_by_cost, create_mem_ref): Update prototype.
	* basic-block.h (optimize_insn_for_size_p, optimize_insn_for_speed_p):
	Remove.
	* config/alpha/alpha.c (alpha_rtx_costs): Update.
	(alpha_rtx_costs): Update.
	* config/frv/frv.c (frv_rtx_costs): Update.
	* config/s390/s390.c (s390_rtx_costs): Update.
	* config/m32c/m32c.c (m32c_memory_move_cost): Update.
	(m32c_rtx_costs): Update.
	* config/spu/spu.c (TARGET_ADDRESS_COST): Upate.
	(spu_rtx_costs): Update.
	* config/sparc/sparc.c (sparc_rtx_costs): Update.
	* config/m32r/m32r.c (m32r_rtx_costs): Update.
	* config/i386/i386.c (:ix86_address_cost): Update.
	(ix86_rtx_costs): Update.
	* config/sh/sh.c (sh_rtx_costs, sh_address_cost): Update.
	* config/pdp11/pdp11.c (pdp11_rtx_costs): Update.
	* config/avr/avr.c (avr_rtx_costs, avr_address_cost): Update.
	* config/crx/crx.c (crx_address_cost): Update.
	* config/xtensa/xtensa.c (xtensa_rtx_costs): Update.
	* config/stormy16/stormy16.c
	(xstormy16_address_cost, xstormy16_rtx_costs): Update.
	* config/m68hc11/m68hc11.c
	(m68hc11_address_cost, m68hc11_rtx_costs): Update.
	* config/cris/cris.c (cris_rtx_costs, cris_address_cost): Update.
	* config/iq2000/iq2000.c (iq2000_rtx_costs, iq2000_address_cost): Update.
	* config/mn10300/mn10300.c (mn10300_address_cost, mn10300_rtx_costs): Update
	* config/ia64/ia64.c (ia64_rtx_costs): Update.
	* config/m68k/m68k.c (m68k_rtx_costs): Update.
	* config/rs6000/rs6000.c (rs6000_rtx_costs): Update.
	* config/arc/arc.c (arc_rtx_costs, arc_address_cost): Update.
	* config/mcore/mcore.c (TARGET_ADDRESS_COST): Update.
	(mcore_rtx_costs): update.
	* config/score/score3.c (score3_rtx_costs): Update.
	* config/score/score7.c (score7_rtx_costs): Update.
	* config/score/score3.h (score3_rtx_costs):Update.
	* config/score/score7.h (score7_rtx_costs): Update.
	* config/score/score.c (score_rtx_costs): Update.
	* config/arm/arm.c (arm_address_cost): Update.
	(arm_rtx_costs_1): Update.
	(arm_rtx_costs_1): Update.
	(arm_size_rtx_costs): Update.
	(arm_size_rtx_costs): Update.
	(arm_size_rtx_costs): Update.
	(arm_xscale_rtx_costs): Update.
	(arm_thumb_address_cost): Update.
	* config/pa/pa.c (hppa_address_cost): Update.
	* config/mips/mips.c (mips_rtx_costs): Update.
	* config/vax/vax.c (vax_address_cost): Update.
	* config/h8300/h8300.c (h8300_shift_costs): Update.
	(h8300_rtx_costs): Update.
	* config/v850/v850.c (TARGET_ADDRESS_COST): Update.
	(v850_rtx_costs): Update.
	* config/mmix/mmix.c (mmix_rtx_costs, mmix_rtx_costs): Update.
	* config/bfin/bfin.c
	(bfin_address_cost): Update.
	(bfin_rtx_costs): Update.
	* stmt.c (lshift_cheap_p): Update.

From-SVN: r139821
This commit is contained in:
Jan Hubicka 2008-08-31 11:44:25 +02:00 committed by Jan Hubicka
parent 6aebac53f3
commit f40751dd34
67 changed files with 848 additions and 554 deletions

View File

@ -1,3 +1,151 @@
2008-08-30 Jan Hubicka <jh@suse.cz>
* fwprop.c (should_replace_address): Add speed attribute.
(PR_OPTIMIZE_FOR_SPEED): New flag.
(propagate_rtx_1): Use it.
(propagate_rtx): Set it.
(try_fwprop_subst): Update call of rtx_costs.
(forward_propagate_and_simplify): LIkewise.
* hooks.c (hook_int_rtx_bool_0): New
(hook_bool_rtx_int_int_intp_false): Replace by ...
(hook_bool_rtx_int_int_intp_bool_false): .. thisone.
* hooks.h (hook_int_rtx_bool_0): New
(hook_bool_rtx_int_int_intp_false): Replace by ...
(hook_bool_rtx_int_int_intp_bool_false): .. thisone.
* optabs.c (avoid_expensive_constant): UPdate call of rtx_cost.
(prepare_cmp_insn): UPdate call of rtx_cost.
* postreload.c (reload_cse_simplify_set): Update call of rtx_cost.
(reload_cse_simplify_operands): Update call of rtx_cost.
(reload_cse_move2add): call of rtx_cost.
* target.h (struct gcc_target): Update rtx_costs and address_costs.
* rtlanal.c (rtx_cost): Add speed argument.
(address_cost): Add speed argument
(default_address_cost): Likewise.
(insn_rtx_cost): Likewise.
* cfgloopanal.c (seq_cost): Add speed argument.
(target_reg_cost, target_spill_cost): Turn to array.
(init_set_costs): Update for speed.
(estimate_reg_pressure_cost): Add speed argument.
* auto-inc-dec.c (attempt_change): Update call of rtx_cost.
* dojump.c (prefer_and_bit_test): UPdate call of rtx_cost.
* tree-ssa-loop-ivopts.c (struct ivopts_data): New field speed.
(seq_cost): Add speed argument.
(computation_cost): Add speed arugment.
(add_cost, multiply_by_const, get_address_cost): add speed argument.
(force_expr_to_var_cost): Update for profile info.
(force_var_cost): Likewise.
(split_address_cost): Likewise.
(ptr_difference_cost): Likewise.
(difference_cost): Likewise.
(get_computation_cost_at): Likewise.
(determine_iv_cost): Likewise.
(ivopts_global_cost_for_size): Likewise.
(rewrite_use_address): Likewise.
(tree_ssa_iv_optimize_loop): Initialize speed field.
* cse.c (optimize_this_for_speed_p): New static var.
(notreg_cost): Update call of rtx_cost.
(cse_extended_basic_block): set optimize_this_for_speed_p.
* ifcvt.c (cheap_bb_rtx_cost_p): Update call of rtx_cost.
(noce_try_cmove_arith): Likewise.
(noce_try_sign_mask): LIkewise.
* expr.c (compress_float_constant): Update rtx_cost calls.
* tree-ssa-address.c (most_expensive_mult_to_index): Add speed argument.
(addr_to_parts): Likewise.
(create_mem_ref): Likewise.
* dse.c (find_shift_sequence): Add speed argument.
(replace_read): Update call.
* calls.c (precompute_register_parameters): Update call of rtx_cost.
* expmed.c (sdiv_pow2_cheap, smod_pow2_cheap, zero_cost, add_cost,
* neg_cost, shift_cost, shiftadd_cost,
shiftsub_cost, mul_cost, sdiv_cost, udiv_cost ,mul_widen_cost,
mul_highpart_cost): Increase dimension.
(init_expmed): Initialize for both size and speed.
(expand_shift): Use profile.
(synth_mult): Use profile.
(choose_mult_variant): Use profile.
(expand_mult): Use profile.
(expand_mult_highpart_optab): Use profile.
(expand_mult_highpart): Use profile.
(expand_smod_pow2): Use profile.
(expand_divmod): Use profile.
* simplify-rtx.c (simplify_binary_operation_1): Update call of rtx_cost.
* loop-invariant.c (create_new_invariant): Use profile.
(gain_for_invariant): Add speed parameter.
(best_gain_for_invariant): Likewise.
(find_invariants_to_move): Likewise.
(move_single_loop_invariants): Set it.
* target-def.h (TARGET_RTX_COSTS): Use hook.
* rtl.h (rtx_cost, address_cost, insn_rtx_cost): Update prototpe.
(optimize_insn_for_size_p, optimize_insn_for_speed_p): Declare.
* output.h (default_address_cost): Update prototype.
* combine.c (optimize_this_for_speed_p): New static var.
(combine_validate_cost): Update call of rtx_cost.
(combine_instructions): Set optimize_this_for_speed_p.
(expand_compound_operation): Update call of rtx_cost.
(make_extraction):Update call of rtx_cost.
(force_to_mode):Update call of rtx_cost.
(distribute_and_simplify_rtx):Update call of rtx_cost.
* cfgloop.h (target_reg_cost, target_spill_cost): Turn to array.
(estimate_reg_pressure_cost): Update prototype.
* tree-flow.h (multiply_by_cost, create_mem_ref): Update prototype.
* basic-block.h (optimize_insn_for_size_p, optimize_insn_for_speed_p):
Remove.
* config/alpha/alpha.c (alpha_rtx_costs): Update.
(alpha_rtx_costs): Update.
* config/frv/frv.c (frv_rtx_costs): Update.
* config/s390/s390.c (s390_rtx_costs): Update.
* config/m32c/m32c.c (m32c_memory_move_cost): Update.
(m32c_rtx_costs): Update.
* config/spu/spu.c (TARGET_ADDRESS_COST): Upate.
(spu_rtx_costs): Update.
* config/sparc/sparc.c (sparc_rtx_costs): Update.
* config/m32r/m32r.c (m32r_rtx_costs): Update.
* config/i386/i386.c (:ix86_address_cost): Update.
(ix86_rtx_costs): Update.
* config/sh/sh.c (sh_rtx_costs, sh_address_cost): Update.
* config/pdp11/pdp11.c (pdp11_rtx_costs): Update.
* config/avr/avr.c (avr_rtx_costs, avr_address_cost): Update.
* config/crx/crx.c (crx_address_cost): Update.
* config/xtensa/xtensa.c (xtensa_rtx_costs): Update.
* config/stormy16/stormy16.c
(xstormy16_address_cost, xstormy16_rtx_costs): Update.
* config/m68hc11/m68hc11.c
(m68hc11_address_cost, m68hc11_rtx_costs): Update.
* config/cris/cris.c (cris_rtx_costs, cris_address_cost): Update.
* config/iq2000/iq2000.c (iq2000_rtx_costs, iq2000_address_cost): Update.
* config/mn10300/mn10300.c (mn10300_address_cost, mn10300_rtx_costs): Update
* config/ia64/ia64.c (ia64_rtx_costs): Update.
* config/m68k/m68k.c (m68k_rtx_costs): Update.
* config/rs6000/rs6000.c (rs6000_rtx_costs): Update.
* config/arc/arc.c (arc_rtx_costs, arc_address_cost): Update.
* config/mcore/mcore.c (TARGET_ADDRESS_COST): Update.
(mcore_rtx_costs): update.
* config/score/score3.c (score3_rtx_costs): Update.
* config/score/score7.c (score7_rtx_costs): Update.
* config/score/score3.h (score3_rtx_costs):Update.
* config/score/score7.h (score7_rtx_costs): Update.
* config/score/score.c (score_rtx_costs): Update.
* config/arm/arm.c (arm_address_cost): Update.
(arm_rtx_costs_1): Update.
(arm_rtx_costs_1): Update.
(arm_size_rtx_costs): Update.
(arm_size_rtx_costs): Update.
(arm_size_rtx_costs): Update.
(arm_xscale_rtx_costs): Update.
(arm_thumb_address_cost): Update.
* config/pa/pa.c (hppa_address_cost): Update.
* config/mips/mips.c (mips_rtx_costs): Update.
* config/vax/vax.c (vax_address_cost): Update.
* config/h8300/h8300.c (h8300_shift_costs): Update.
(h8300_rtx_costs): Update.
* config/v850/v850.c (TARGET_ADDRESS_COST): Update.
(v850_rtx_costs): Update.
* config/mmix/mmix.c (mmix_rtx_costs, mmix_rtx_costs): Update.
* config/bfin/bfin.c
(bfin_address_cost): Update.
(bfin_rtx_costs): Update.
* stmt.c (lshift_cheap_p): Update.
2008-08-30 Andrew Pinski <andrew_pinski@playstation.sony.com>
PR middle-end/36444

View File

@ -515,13 +515,14 @@ attempt_change (rtx new_addr, rtx inc_reg)
rtx new_mem;
int old_cost = 0;
int new_cost = 0;
bool speed = optimize_bb_for_speed_p (bb);
PUT_MODE (mem_tmp, mode);
XEXP (mem_tmp, 0) = new_addr;
old_cost = rtx_cost (mem, 0)
+ rtx_cost (PATTERN (inc_insn.insn), 0);
new_cost = rtx_cost (mem_tmp, 0);
old_cost = rtx_cost (mem, 0, speed)
+ rtx_cost (PATTERN (inc_insn.insn), 0, speed);
new_cost = rtx_cost (mem_tmp, 0, speed);
/* The first item of business is to see if this is profitable. */
if (old_cost < new_cost)

View File

@ -835,8 +835,6 @@ extern bool optimize_bb_for_size_p (const_basic_block);
extern bool optimize_bb_for_speed_p (const_basic_block);
extern bool optimize_edge_for_size_p (edge);
extern bool optimize_edge_for_speed_p (edge);
extern bool optimize_insn_for_size_p (void);
extern bool optimize_insn_for_speed_p (void);
extern bool optimize_function_for_size_p (struct function *);
extern bool optimize_function_for_speed_p (struct function *);
extern bool optimize_loop_for_size_p (struct loop *);

View File

@ -713,7 +713,8 @@ precompute_register_parameters (int num_actuals, struct arg_data *args,
|| (GET_CODE (args[i].value) == SUBREG
&& REG_P (SUBREG_REG (args[i].value)))))
&& args[i].mode != BLKmode
&& rtx_cost (args[i].value, SET) > COSTS_N_INSNS (1)
&& rtx_cost (args[i].value, SET, optimize_insn_for_speed_p ())
> COSTS_N_INSNS (1)
&& ((SMALL_REGISTER_CLASSES && *reg_parm_seen)
|| optimize))
args[i].value = copy_to_mode_reg (args[i].mode, args[i].value);

View File

@ -610,12 +610,12 @@ fel_init (loop_iterator *li, loop_p *loop, unsigned flags)
extern unsigned target_avail_regs;
extern unsigned target_res_regs;
extern unsigned target_reg_cost;
extern unsigned target_spill_cost;
extern unsigned target_reg_cost [2];
extern unsigned target_spill_cost [2];
/* Register pressure estimation for induction variable optimizations & loop
invariant motion. */
extern unsigned estimate_reg_pressure_cost (unsigned, unsigned);
extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool);
extern void init_set_costs (void);
/* Loop optimizer initialization. */

View File

@ -295,7 +295,7 @@ get_loop_level (const struct loop *loop)
/* Returns estimate on cost of computing SEQ. */
static unsigned
seq_cost (const_rtx seq)
seq_cost (const_rtx seq, bool speed)
{
unsigned cost = 0;
rtx set;
@ -304,7 +304,7 @@ seq_cost (const_rtx seq)
{
set = single_set (seq);
if (set)
cost += rtx_cost (set, SET);
cost += rtx_cost (set, SET, speed);
else
cost++;
}
@ -317,10 +317,10 @@ seq_cost (const_rtx seq)
unsigned target_avail_regs; /* Number of available registers. */
unsigned target_res_regs; /* Number of registers reserved for temporary
expressions. */
unsigned target_reg_cost; /* The cost for register when there still
unsigned target_reg_cost[2]; /* The cost for register when there still
is some reserve, but we are approaching
the number of available registers. */
unsigned target_spill_cost; /* The cost for register when we need
unsigned target_spill_cost[2]; /* The cost for register when we need
to spill. */
/* Initialize the constants for computing set costs. */
@ -328,6 +328,7 @@ unsigned target_spill_cost; /* The cost for register when we need
void
init_set_costs (void)
{
int speed;
rtx seq;
rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
@ -343,27 +344,32 @@ init_set_costs (void)
target_res_regs = 3;
/* Set up the costs for using extra registers:
for (speed = 0; speed < 2; speed++)
{
crtl->maybe_hot_insn_p = speed;
/* Set up the costs for using extra registers:
1) If not many free registers remain, we should prefer having an
additional move to decreasing the number of available registers.
(TARGET_REG_COST).
2) If no registers are available, we need to spill, which may require
storing the old value to memory and loading it back
(TARGET_SPILL_COST). */
1) If not many free registers remain, we should prefer having an
additional move to decreasing the number of available registers.
(TARGET_REG_COST).
2) If no registers are available, we need to spill, which may require
storing the old value to memory and loading it back
(TARGET_SPILL_COST). */
start_sequence ();
emit_move_insn (reg1, reg2);
seq = get_insns ();
end_sequence ();
target_reg_cost = seq_cost (seq);
start_sequence ();
emit_move_insn (reg1, reg2);
seq = get_insns ();
end_sequence ();
target_reg_cost [speed] = seq_cost (seq, speed);
start_sequence ();
emit_move_insn (mem, reg1);
emit_move_insn (reg2, mem);
seq = get_insns ();
end_sequence ();
target_spill_cost = seq_cost (seq);
start_sequence ();
emit_move_insn (mem, reg1);
emit_move_insn (reg2, mem);
seq = get_insns ();
end_sequence ();
target_spill_cost [speed] = seq_cost (seq, speed);
}
default_rtl_profile ();
}
/* Estimates cost of increased register pressure caused by making N_NEW new
@ -371,7 +377,7 @@ init_set_costs (void)
around the loop. */
unsigned
estimate_reg_pressure_cost (unsigned n_new, unsigned n_old)
estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
{
unsigned cost;
unsigned regs_needed = n_new + n_old;
@ -384,11 +390,11 @@ estimate_reg_pressure_cost (unsigned n_new, unsigned n_old)
if (regs_needed <= target_avail_regs)
/* If we are close to running out of registers, try to preserve
them. */
cost = target_reg_cost * n_new;
cost = target_reg_cost [speed] * n_new;
else
/* If we run out of registers, it is very expensive to add another
one. */
cost = target_spill_cost * n_new;
cost = target_spill_cost [speed] * n_new;
if (optimize && flag_ira && (flag_ira_algorithm == IRA_ALGORITHM_REGIONAL
|| flag_ira_algorithm == IRA_ALGORITHM_MIXED)

View File

@ -297,6 +297,7 @@ static rtx added_links_insn;
/* Basic block in which we are performing combines. */
static basic_block this_basic_block;
static bool optimize_this_for_speed_p;
/* Length of the currently allocated uid_insn_cost array. */
@ -793,10 +794,10 @@ combine_validate_cost (rtx i1, rtx i2, rtx i3, rtx newpat, rtx newi2pat,
}
/* Calculate the replacement insn_rtx_costs. */
new_i3_cost = insn_rtx_cost (newpat);
new_i3_cost = insn_rtx_cost (newpat, optimize_this_for_speed_p);
if (newi2pat)
{
new_i2_cost = insn_rtx_cost (newi2pat);
new_i2_cost = insn_rtx_cost (newi2pat, optimize_this_for_speed_p);
new_cost = (new_i2_cost > 0 && new_i3_cost > 0)
? new_i2_cost + new_i3_cost : 0;
}
@ -811,7 +812,7 @@ combine_validate_cost (rtx i1, rtx i2, rtx i3, rtx newpat, rtx newi2pat,
int old_other_cost, new_other_cost;
old_other_cost = INSN_COST (undobuf.other_insn);
new_other_cost = insn_rtx_cost (newotherpat);
new_other_cost = insn_rtx_cost (newotherpat, optimize_this_for_speed_p);
if (old_other_cost > 0 && new_other_cost > 0)
{
old_cost += old_other_cost;
@ -1068,6 +1069,7 @@ combine_instructions (rtx f, unsigned int nregs)
create_log_links ();
FOR_EACH_BB (this_basic_block)
{
optimize_this_for_speed_p = optimize_bb_for_speed_p (this_basic_block);
last_call_luid = 0;
mem_last_set = -1;
label_tick++;
@ -1090,7 +1092,8 @@ combine_instructions (rtx f, unsigned int nregs)
/* Record the current insn_rtx_cost of this instruction. */
if (NONJUMP_INSN_P (insn))
INSN_COST (insn) = insn_rtx_cost (PATTERN (insn));
INSN_COST (insn) = insn_rtx_cost (PATTERN (insn),
optimize_this_for_speed_p);
if (dump_file)
fprintf(dump_file, "insn_cost %d: %d\n",
INSN_UID (insn), INSN_COST (insn));
@ -6107,9 +6110,11 @@ expand_compound_operation (rtx x)
rtx temp2 = expand_compound_operation (temp);
/* Make sure this is a profitable operation. */
if (rtx_cost (x, SET) > rtx_cost (temp2, SET))
if (rtx_cost (x, SET, optimize_this_for_speed_p)
> rtx_cost (temp2, SET, optimize_this_for_speed_p))
return temp2;
else if (rtx_cost (x, SET) > rtx_cost (temp, SET))
else if (rtx_cost (x, SET, optimize_this_for_speed_p)
> rtx_cost (temp, SET, optimize_this_for_speed_p))
return temp;
else
return x;
@ -6534,7 +6539,8 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
/* Prefer ZERO_EXTENSION, since it gives more information to
backends. */
if (rtx_cost (temp, SET) <= rtx_cost (temp1, SET))
if (rtx_cost (temp, SET, optimize_this_for_speed_p)
<= rtx_cost (temp1, SET, optimize_this_for_speed_p))
return temp;
return temp1;
}
@ -6728,7 +6734,8 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
/* Prefer ZERO_EXTENSION, since it gives more information to
backends. */
if (rtx_cost (temp1, SET) < rtx_cost (temp, SET))
if (rtx_cost (temp1, SET, optimize_this_for_speed_p)
< rtx_cost (temp, SET, optimize_this_for_speed_p))
temp = temp1;
}
pos_rtx = temp;
@ -7377,7 +7384,8 @@ force_to_mode (rtx x, enum machine_mode mode, unsigned HOST_WIDE_INT mask,
y = simplify_gen_binary (AND, GET_MODE (x),
XEXP (x, 0), GEN_INT (cval));
if (rtx_cost (y, SET) < rtx_cost (x, SET))
if (rtx_cost (y, SET, optimize_this_for_speed_p)
< rtx_cost (x, SET, optimize_this_for_speed_p))
x = y;
}
@ -8521,7 +8529,8 @@ distribute_and_simplify_rtx (rtx x, int n)
tmp = apply_distributive_law (simplify_gen_binary (inner_code, mode,
new_op0, new_op1));
if (GET_CODE (tmp) != outer_code
&& rtx_cost (tmp, SET) < rtx_cost (x, SET))
&& rtx_cost (tmp, SET, optimize_this_for_speed_p)
< rtx_cost (x, SET, optimize_this_for_speed_p))
return tmp;
return NULL_RTX;

View File

@ -1260,13 +1260,14 @@ alpha_legitimize_reload_address (rtx x,
scanned. In either case, *TOTAL contains the cost result. */
static bool
alpha_rtx_costs (rtx x, int code, int outer_code, int *total)
alpha_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed)
{
enum machine_mode mode = GET_MODE (x);
bool float_mode_p = FLOAT_MODE_P (mode);
const struct alpha_rtx_cost_data *cost_data;
if (optimize_size)
if (!speed)
cost_data = &alpha_rtx_cost_size;
else
cost_data = &alpha_rtx_cost_data[alpha_tune];
@ -1311,7 +1312,7 @@ alpha_rtx_costs (rtx x, int code, int outer_code, int *total)
*total = COSTS_N_INSNS (15);
else
/* Otherwise we do a load from the GOT. */
*total = COSTS_N_INSNS (optimize_size ? 1 : alpha_memory_latency);
*total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
return true;
case HIGH:
@ -1326,8 +1327,8 @@ alpha_rtx_costs (rtx x, int code, int outer_code, int *total)
else if (GET_CODE (XEXP (x, 0)) == MULT
&& const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
{
*total = (rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
+ rtx_cost (XEXP (x, 1), outer_code) + COSTS_N_INSNS (1));
*total = (rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
+ rtx_cost (XEXP (x, 1), outer_code, speed) + COSTS_N_INSNS (1));
return true;
}
return false;
@ -1375,7 +1376,7 @@ alpha_rtx_costs (rtx x, int code, int outer_code, int *total)
return false;
case MEM:
*total = COSTS_N_INSNS (optimize_size ? 1 : alpha_memory_latency);
*total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
return true;
case NEG:
@ -10657,7 +10658,7 @@ alpha_init_libfuncs (void)
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS alpha_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_0
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg

View File

@ -92,8 +92,8 @@ static void arc_internal_label (FILE *, const char *, unsigned long);
static void arc_va_start (tree, rtx);
static void arc_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
tree, int *, int);
static bool arc_rtx_costs (rtx, int, int, int *);
static int arc_address_cost (rtx);
static bool arc_rtx_costs (rtx, int, int, int *, bool);
static int arc_address_cost (rtx, bool);
static void arc_external_libcall (rtx);
static bool arc_return_in_memory (const_tree, const_tree);
static bool arc_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
@ -830,7 +830,8 @@ arc_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
scanned. In either case, *TOTAL contains the cost result. */
static bool
arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
@ -884,7 +885,7 @@ arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
If ADDR is not a valid address, its cost is irrelevant. */
static int
arc_address_cost (rtx addr)
arc_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
switch (GET_CODE (addr))
{

View File

@ -131,7 +131,8 @@ static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
static bool arm_xscale_rtx_costs (rtx, int, int, int *);
static bool arm_9e_rtx_costs (rtx, int, int, int *);
static int arm_address_cost (rtx);
static bool arm_rtx_costs (rtx, int, int, int *, bool);
static int arm_address_cost (rtx, bool);
static bool arm_memory_load_p (rtx);
static bool arm_cirrus_insn_p (rtx);
static void cirrus_reorg (rtx);
@ -256,9 +257,8 @@ static bool arm_allocate_stack_slots_for_args (void);
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
/* This will be overridden in arm_override_options. */
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS arm_slowmul_rtx_costs
#define TARGET_RTX_COSTS arm_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST arm_address_cost
@ -1185,10 +1185,6 @@ arm_override_options (void)
gcc_assert (arm_tune != arm_none);
tune_flags = all_cores[(int)arm_tune].flags;
if (optimize_size)
targetm.rtx_costs = arm_size_rtx_costs;
else
targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
/* Make sure that the processor choice does not conflict with any of the
other command line choices. */
@ -4920,7 +4916,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
case MINUS:
if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
{
extra_cost = rtx_cost (XEXP (x, 1), code);
extra_cost = rtx_cost (XEXP (x, 1), code, true);
if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
extra_cost += 4 * ARM_NUM_REGS (mode);
return extra_cost;
@ -4969,7 +4965,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
if (GET_CODE (XEXP (x, 0)) == MULT)
{
extra_cost = rtx_cost (XEXP (x, 0), code);
extra_cost = rtx_cost (XEXP (x, 0), code, true);
if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
extra_cost += 4 * ARM_NUM_REGS (mode);
return extra_cost;
@ -5168,7 +5164,7 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
case ROTATE:
if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
{
*total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
*total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
return true;
}
/* Fall through */
@ -5178,15 +5174,15 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
case ASHIFTRT:
if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
{
*total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
*total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
return true;
}
else if (mode == SImode)
{
*total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
*total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
/* Slightly disparage register shifts, but not by much. */
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
*total += 1 + rtx_cost (XEXP (x, 1), code);
*total += 1 + rtx_cost (XEXP (x, 1), code, false);
return true;
}
@ -5354,6 +5350,16 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
}
}
/* RTX costs when optimizing for size. */
static bool
arm_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
{
if (!speed)
return arm_size_rtx_costs (x, code, outer_code, total);
else
return all_cores[(int)arm_tune].rtx_costs;
}
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
supported on any "slowmul" cores, so it can be ignored. */
@ -5546,7 +5552,7 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
/* A COMPARE of a MULT is slow on XScale; the muls instruction
will stall until the multiplication is complete. */
if (GET_CODE (XEXP (x, 0)) == MULT)
*total = 4 + rtx_cost (XEXP (x, 0), code);
*total = 4 + rtx_cost (XEXP (x, 0), code, true);
else
*total = arm_rtx_costs_1 (x, code, outer_code);
return true;
@ -5666,7 +5672,7 @@ arm_thumb_address_cost (rtx x)
}
static int
arm_address_cost (rtx x)
arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
}

View File

@ -81,8 +81,8 @@ static void avr_reorg (void);
static void avr_asm_out_ctor (rtx, int);
static void avr_asm_out_dtor (rtx, int);
static int avr_operand_rtx_cost (rtx, enum machine_mode, enum rtx_code);
static bool avr_rtx_costs (rtx, int, int, int *);
static int avr_address_cost (rtx);
static bool avr_rtx_costs (rtx, int, int, int *, bool);
static int avr_address_cost (rtx, bool);
static bool avr_return_in_memory (const_tree, const_tree);
static struct machine_function * avr_init_machine_status (void);
static rtx avr_builtin_setjmp_frame_value (void);
@ -1562,7 +1562,7 @@ final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
fprintf (asm_out_file, "/*DEBUG: 0x%x\t\t%d\t%d */\n",
INSN_ADDRESSES (uid),
INSN_ADDRESSES (uid) - last_insn_address,
rtx_cost (PATTERN (insn), INSN));
rtx_cost (PATTERN (insn), INSN, !optimize_size));
}
last_insn_address = INSN_ADDRESSES (uid);
}
@ -4991,7 +4991,8 @@ order_regs_for_local_alloc (void)
operand's parent operator. */
static int
avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer)
avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer,
bool speed ATTRIBUTE_UNUSED)
{
enum rtx_code code = GET_CODE (x);
int total;
@ -5021,7 +5022,8 @@ avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer)
case, *TOTAL contains the cost result. */
static bool
avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
bool speed)
{
enum machine_mode mode = GET_MODE (x);
HOST_WIDE_INT val;
@ -5153,8 +5155,8 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
{
case QImode:
if (AVR_HAVE_MUL)
*total = COSTS_N_INSNS (optimize_size ? 3 : 4);
else if (optimize_size)
*total = COSTS_N_INSNS (!speed ? 3 : 4);
else if (!speed)
*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
else
return false;
@ -5162,8 +5164,8 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case HImode:
if (AVR_HAVE_MUL)
*total = COSTS_N_INSNS (optimize_size ? 7 : 10);
else if (optimize_size)
*total = COSTS_N_INSNS (!speed ? 7 : 10);
else if (!speed)
*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
else
return false;
@ -5180,7 +5182,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case MOD:
case UDIV:
case UMOD:
if (optimize_size)
if (!speed)
*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
else
return false;
@ -5194,7 +5196,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case QImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (optimize_size ? 4 : 17);
*total = COSTS_N_INSNS (!speed ? 4 : 17);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
@ -5212,7 +5214,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case HImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (optimize_size ? 5 : 41);
*total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
@ -5240,16 +5242,16 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
*total = COSTS_N_INSNS (5);
break;
case 4:
*total = COSTS_N_INSNS (optimize_size ? 5 : 8);
*total = COSTS_N_INSNS (!speed ? 5 : 8);
break;
case 6:
*total = COSTS_N_INSNS (optimize_size ? 5 : 9);
*total = COSTS_N_INSNS (ptimize_size ? 5 : 9);
break;
case 5:
*total = COSTS_N_INSNS (optimize_size ? 5 : 10);
*total = COSTS_N_INSNS (!speed ? 5 : 10);
break;
default:
*total = COSTS_N_INSNS (optimize_size ? 5 : 41);
*total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
@ -5257,7 +5259,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case SImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (optimize_size ? 7 : 113);
*total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
@ -5278,10 +5280,10 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
*total = COSTS_N_INSNS (6);
break;
case 2:
*total = COSTS_N_INSNS (optimize_size ? 7 : 8);
*total = COSTS_N_INSNS (!speed ? 7 : 8);
break;
default:
*total = COSTS_N_INSNS (optimize_size ? 7 : 113);
*total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
@ -5298,7 +5300,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case QImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (optimize_size ? 4 : 17);
*total = COSTS_N_INSNS (!speed ? 4 : 17);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
@ -5318,7 +5320,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case HImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (optimize_size ? 5 : 41);
*total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
@ -5344,17 +5346,17 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
*total = COSTS_N_INSNS (5);
break;
case 11:
*total = COSTS_N_INSNS (optimize_size ? 5 : 6);
*total = COSTS_N_INSNS (!speed ? 5 : 6);
break;
case 12:
*total = COSTS_N_INSNS (optimize_size ? 5 : 7);
*total = COSTS_N_INSNS (!speed ? 5 : 7);
break;
case 6:
case 13:
*total = COSTS_N_INSNS (optimize_size ? 5 : 8);
*total = COSTS_N_INSNS (!speed ? 5 : 8);
break;
default:
*total = COSTS_N_INSNS (optimize_size ? 5 : 41);
*total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
@ -5362,7 +5364,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case SImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (optimize_size ? 7 : 113);
*total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
@ -5380,13 +5382,13 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
*total = COSTS_N_INSNS (6);
break;
case 2:
*total = COSTS_N_INSNS (optimize_size ? 7 : 8);
*total = COSTS_N_INSNS (!speed ? 7 : 8);
break;
case 31:
*total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 5);
break;
default:
*total = COSTS_N_INSNS (optimize_size ? 7 : 113);
*total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
@ -5403,7 +5405,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case QImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (optimize_size ? 4 : 17);
*total = COSTS_N_INSNS (!speed ? 4 : 17);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
@ -5421,7 +5423,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case HImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (optimize_size ? 5 : 41);
*total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
@ -5450,17 +5452,17 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case 12:
case 13:
case 14:
*total = COSTS_N_INSNS (optimize_size ? 5 : 6);
*total = COSTS_N_INSNS (!speed ? 5 : 6);
break;
case 4:
*total = COSTS_N_INSNS (optimize_size ? 5 : 7);
*total = COSTS_N_INSNS (!speed ? 5 : 7);
break;
case 5:
case 6:
*total = COSTS_N_INSNS (optimize_size ? 5 : 9);
*total = COSTS_N_INSNS (!speed ? 5 : 9);
break;
default:
*total = COSTS_N_INSNS (optimize_size ? 5 : 41);
*total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
@ -5468,7 +5470,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
case SImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (optimize_size ? 7 : 113);
*total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
@ -5481,7 +5483,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
*total = COSTS_N_INSNS (4);
break;
case 2:
*total = COSTS_N_INSNS (optimize_size ? 7 : 8);
*total = COSTS_N_INSNS (!speed ? 7 : 8);
break;
case 8:
case 16:
@ -5492,7 +5494,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
*total = COSTS_N_INSNS (6);
break;
default:
*total = COSTS_N_INSNS (optimize_size ? 7 : 113);
*total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
@ -5543,7 +5545,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
/* Calculate the cost of a memory address. */
static int
avr_address_cost (rtx x)
avr_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x,1)) == CONST_INT

View File

@ -1378,7 +1378,7 @@ bfin_dsp_memref_p (rtx x)
All addressing modes are equally cheap on the Blackfin. */
static int
bfin_address_cost (rtx addr ATTRIBUTE_UNUSED)
bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
{
return 1;
}
@ -2871,7 +2871,7 @@ bfin_legitimate_constant_p (rtx x)
}
static bool
bfin_rtx_costs (rtx x, int code, int outer_code, int *total)
bfin_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
{
int cost2 = COSTS_N_INSNS (1);
rtx op0, op1;
@ -2919,19 +2919,19 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total)
if (val == 2 || val == 4)
{
*total = cost2;
*total += rtx_cost (XEXP (op0, 0), outer_code);
*total += rtx_cost (op1, outer_code);
*total += rtx_cost (XEXP (op0, 0), outer_code, speed);
*total += rtx_cost (op1, outer_code, speed);
return true;
}
}
*total = cost2;
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
*total += rtx_cost (op0, SET);
*total += rtx_cost (op0, SET, speed);
#if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer
towards creating too many induction variables. */
if (!reg_or_7bit_operand (op1, SImode))
*total += rtx_cost (op1, SET);
*total += rtx_cost (op1, SET, speed);
#endif
}
else if (GET_MODE (x) == DImode)
@ -2939,10 +2939,10 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total)
*total = 6 * cost2;
if (GET_CODE (op1) != CONST_INT
|| !satisfies_constraint_Ks7 (op1))
*total += rtx_cost (op1, PLUS);
*total += rtx_cost (op1, PLUS, speed);
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
*total += rtx_cost (op0, PLUS);
*total += rtx_cost (op0, PLUS, speed);
}
return true;
@ -2965,7 +2965,7 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total)
op1 = XEXP (x, 1);
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
*total += rtx_cost (op0, code);
*total += rtx_cost (op0, code, speed);
return true;
@ -2990,7 +2990,7 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total)
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
*total += rtx_cost (op0, code);
*total += rtx_cost (op0, code, speed);
if (GET_MODE (x) == DImode)
{
@ -3004,12 +3004,12 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total)
if (code == AND)
{
if (! rhs_andsi3_operand (XEXP (x, 1), SImode))
*total += rtx_cost (XEXP (x, 1), code);
*total += rtx_cost (XEXP (x, 1), code, speed);
}
else
{
if (! regorlog2_operand (XEXP (x, 1), SImode))
*total += rtx_cost (XEXP (x, 1), code);
*total += rtx_cost (XEXP (x, 1), code, speed);
}
return true;
@ -3042,17 +3042,17 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total)
op0 = XEXP (op0, 0);
op1 = XEXP (op1, 0);
}
else if (optimize_size)
else if (!speed)
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (3);
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
*total += rtx_cost (op0, MULT);
*total += rtx_cost (op0, MULT, speed);
if (GET_CODE (op1) != REG
&& (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
*total += rtx_cost (op1, MULT);
*total += rtx_cost (op1, MULT, speed);
}
return true;

View File

@ -112,8 +112,8 @@ static void cris_asm_output_mi_thunk
static void cris_file_start (void);
static void cris_init_libfuncs (void);
static bool cris_rtx_costs (rtx, int, int, int *);
static int cris_address_cost (rtx);
static bool cris_rtx_costs (rtx, int, int, int *, bool);
static int cris_address_cost (rtx, bool);
static bool cris_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
const_tree, bool);
static int cris_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
@ -1756,7 +1756,8 @@ cris_expand_return (bool on_stack)
scanned. In either case, *TOTAL contains the cost result. */
static bool
cris_rtx_costs (rtx x, int code, int outer_code, int *total)
cris_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed)
{
switch (code)
{
@ -1840,7 +1841,7 @@ cris_rtx_costs (rtx x, int code, int outer_code, int *total)
return false;
case ZERO_EXTEND: case SIGN_EXTEND:
*total = rtx_cost (XEXP (x, 0), outer_code);
*total = rtx_cost (XEXP (x, 0), outer_code, speed);
return true;
default:
@ -1851,7 +1852,7 @@ cris_rtx_costs (rtx x, int code, int outer_code, int *total)
/* The ADDRESS_COST worker. */
static int
cris_address_cost (rtx x)
cris_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
/* The metric to use for the cost-macros is unclear.
The metric used here is (the number of cycles needed) / 2,

View File

@ -136,7 +136,7 @@ static bool crx_fixed_condition_code_regs (unsigned int *, unsigned int *);
static rtx crx_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
int incoming ATTRIBUTE_UNUSED);
static bool crx_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED);
static int crx_address_cost (rtx);
static int crx_address_cost (rtx, bool);
/*****************************************************************************/
/* STACK LAYOUT AND CALLING CONVENTIONS */
@ -800,7 +800,7 @@ crx_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
/* Return cost of the memory address x. */
static int
crx_address_cost (rtx addr)
crx_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
enum crx_addrtype addrtype;
struct crx_address address;

View File

@ -9471,7 +9471,8 @@ static bool
frv_rtx_costs (rtx x,
int code ATTRIBUTE_UNUSED,
int outer_code ATTRIBUTE_UNUSED,
int *total)
int *total,
bool speed ATTRIBUTE_UNUSED)
{
if (outer_code == MEM)
{

View File

@ -1152,7 +1152,7 @@ h8300_shift_costs (rtx x)
/* Worker function for TARGET_RTX_COSTS. */
static bool
h8300_rtx_costs (rtx x, int code, int outer_code, int *total)
h8300_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
{
if (TARGET_H8300SX && outer_code == MEM)
{
@ -1178,7 +1178,7 @@ h8300_rtx_costs (rtx x, int code, int outer_code, int *total)
{
/* Constant operands need the same number of processor
states as register operands. Although we could try to
use a size-based cost for optimize_size, the lack of
use a size-based cost for !speed, the lack of
of a mode makes the results very unpredictable. */
*total = 0;
return true;
@ -1243,11 +1243,11 @@ h8300_rtx_costs (rtx x, int code, int outer_code, int *total)
{
case QImode:
case HImode:
*total = COSTS_N_INSNS (optimize_size ? 4 : 10);
*total = COSTS_N_INSNS (!speed ? 4 : 10);
return false;
case SImode:
*total = COSTS_N_INSNS (optimize_size ? 4 : 18);
*total = COSTS_N_INSNS (!speed ? 4 : 18);
return false;
default:

View File

@ -8491,7 +8491,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
requires to two regs - that would mean more pseudos with longer
lifetimes. */
static int
ix86_address_cost (rtx x)
ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
struct ix86_address parts;
int cost = 1;
@ -25244,10 +25244,11 @@ ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
scanned. In either case, *TOTAL contains the cost result. */
static bool
ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
{
enum rtx_code outer_code = (enum rtx_code) outer_code_i;
enum machine_mode mode = GET_MODE (x);
const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
switch (code)
{
@ -25299,13 +25300,13 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
&& GET_MODE (XEXP (x, 0)) == SImode)
*total = 1;
else if (TARGET_ZERO_EXTEND_WITH_AND)
*total = ix86_cost->add;
*total = cost->add;
else
*total = ix86_cost->movzx;
*total = cost->movzx;
return false;
case SIGN_EXTEND:
*total = ix86_cost->movsx;
*total = cost->movsx;
return false;
case ASHIFT:
@ -25315,13 +25316,13 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
if (value == 1)
{
*total = ix86_cost->add;
*total = cost->add;
return false;
}
if ((value == 2 || value == 3)
&& ix86_cost->lea <= ix86_cost->shift_const)
&& cost->lea <= cost->shift_const)
{
*total = ix86_cost->lea;
*total = cost->lea;
return false;
}
}
@ -25336,24 +25337,24 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
if (CONST_INT_P (XEXP (x, 1)))
{
if (INTVAL (XEXP (x, 1)) > 32)
*total = ix86_cost->shift_const + COSTS_N_INSNS (2);
*total = cost->shift_const + COSTS_N_INSNS (2);
else
*total = ix86_cost->shift_const * 2;
*total = cost->shift_const * 2;
}
else
{
if (GET_CODE (XEXP (x, 1)) == AND)
*total = ix86_cost->shift_var * 2;
*total = cost->shift_var * 2;
else
*total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
*total = cost->shift_var * 6 + COSTS_N_INSNS (2);
}
}
else
{
if (CONST_INT_P (XEXP (x, 1)))
*total = ix86_cost->shift_const;
*total = cost->shift_const;
else
*total = ix86_cost->shift_var;
*total = cost->shift_var;
}
return false;
@ -25361,18 +25362,18 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE scalar cost should be used here. */
*total = ix86_cost->fmul;
*total = cost->fmul;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
*total = ix86_cost->fmul;
*total = cost->fmul;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
*total = ix86_cost->fmul;
*total = cost->fmul;
return false;
}
else
@ -25413,9 +25414,9 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
op0 = XEXP (op0, 0), mode = GET_MODE (op0);
}
*total = (ix86_cost->mult_init[MODE_INDEX (mode)]
+ nbits * ix86_cost->mult_bit
+ rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
*total = (cost->mult_init[MODE_INDEX (mode)]
+ nbits * cost->mult_bit
+ rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
return true;
}
@ -25426,14 +25427,14 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
case UMOD:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
*total = ix86_cost->fdiv;
*total = cost->fdiv;
else if (X87_FLOAT_MODE_P (mode))
*total = ix86_cost->fdiv;
*total = cost->fdiv;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
*total = ix86_cost->fdiv;
*total = cost->fdiv;
else
*total = ix86_cost->divide[MODE_INDEX (mode)];
*total = cost->divide[MODE_INDEX (mode)];
return false;
case PLUS:
@ -25448,11 +25449,11 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
if (val == 2 || val == 4 || val == 8)
{
*total = ix86_cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
*total = cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
*total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
outer_code);
*total += rtx_cost (XEXP (x, 1), outer_code);
outer_code, speed);
*total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
@ -25462,18 +25463,18 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
if (val == 2 || val == 4 || val == 8)
{
*total = ix86_cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
*total += rtx_cost (XEXP (x, 1), outer_code);
*total = cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
*total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
else if (GET_CODE (XEXP (x, 0)) == PLUS)
{
*total = ix86_cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
*total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
*total += rtx_cost (XEXP (x, 1), outer_code);
*total = cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
*total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
*total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
@ -25483,18 +25484,18 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE cost should be used here. */
*total = ix86_cost->fadd;
*total = cost->fadd;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
*total = ix86_cost->fadd;
*total = cost->fadd;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
*total = ix86_cost->fadd;
*total = cost->fadd;
return false;
}
/* FALLTHRU */
@ -25504,10 +25505,10 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
case XOR:
if (!TARGET_64BIT && mode == DImode)
{
*total = (ix86_cost->add * 2
+ (rtx_cost (XEXP (x, 0), outer_code)
*total = (cost->add * 2
+ (rtx_cost (XEXP (x, 0), outer_code, speed)
<< (GET_MODE (XEXP (x, 0)) != DImode))
+ (rtx_cost (XEXP (x, 1), outer_code)
+ (rtx_cost (XEXP (x, 1), outer_code, speed)
<< (GET_MODE (XEXP (x, 1)) != DImode)));
return true;
}
@ -25517,27 +25518,27 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE cost should be used here. */
*total = ix86_cost->fchs;
*total = cost->fchs;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
*total = ix86_cost->fchs;
*total = cost->fchs;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
*total = ix86_cost->fchs;
*total = cost->fchs;
return false;
}
/* FALLTHRU */
case NOT:
if (!TARGET_64BIT && mode == DImode)
*total = ix86_cost->add * 2;
*total = cost->add * 2;
else
*total = ix86_cost->add;
*total = cost->add;
return false;
case COMPARE:
@ -25548,9 +25549,9 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
{
/* This kind of construct is implemented using test[bwl].
Treat it as if we had an AND. */
*total = (ix86_cost->add
+ rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
+ rtx_cost (const1_rtx, outer_code));
*total = (cost->add
+ rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
+ rtx_cost (const1_rtx, outer_code, speed));
return true;
}
return false;
@ -25563,23 +25564,23 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
case ABS:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
*total = ix86_cost->fabs;
*total = cost->fabs;
else if (X87_FLOAT_MODE_P (mode))
*total = ix86_cost->fabs;
*total = cost->fabs;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
*total = ix86_cost->fabs;
*total = cost->fabs;
return false;
case SQRT:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
*total = ix86_cost->fsqrt;
*total = cost->fsqrt;
else if (X87_FLOAT_MODE_P (mode))
*total = ix86_cost->fsqrt;
*total = cost->fsqrt;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
*total = ix86_cost->fsqrt;
*total = cost->fsqrt;
return false;
case UNSPEC:

View File

@ -202,7 +202,7 @@ static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
tree, bool);
static bool ia64_function_ok_for_sibcall (tree, tree);
static bool ia64_return_in_memory (const_tree, const_tree);
static bool ia64_rtx_costs (rtx, int, int, int *);
static bool ia64_rtx_costs (rtx, int, int, int *, bool);
static int ia64_unspec_may_trap_p (const_rtx, unsigned);
static void fix_range (const char *);
static bool ia64_handle_option (size_t, const char *, int);
@ -4810,7 +4810,8 @@ ia64_print_operand (FILE * file, rtx x, int code)
/* ??? This is incomplete. */
static bool
ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{

View File

@ -161,8 +161,8 @@ static bool iq2000_return_in_memory (const_tree, const_tree);
static void iq2000_setup_incoming_varargs (CUMULATIVE_ARGS *,
enum machine_mode, tree, int *,
int);
static bool iq2000_rtx_costs (rtx, int, int, int *);
static int iq2000_address_cost (rtx);
static bool iq2000_rtx_costs (rtx, int, int, int *, bool);
static int iq2000_address_cost (rtx, bool);
static section *iq2000_select_section (tree, int, unsigned HOST_WIDE_INT);
static bool iq2000_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
const_tree, bool);
@ -744,7 +744,7 @@ iq2000_move_1word (rtx operands[], rtx insn, int unsignedp)
/* Provide the costs of an addressing mode that contains ADDR. */
static int
iq2000_address_cost (rtx addr)
iq2000_address_cost (rtx addr, bool speec ATTRIBUTE_UNUSED)
{
switch (GET_CODE (addr))
{
@ -3203,7 +3203,7 @@ print_operand (FILE *file, rtx op, int letter)
}
static bool
iq2000_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int * total)
iq2000_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int * total, bool speed)
{
enum machine_mode mode = GET_MODE (x);

View File

@ -2119,7 +2119,8 @@ m32c_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS m32c_rtx_costs
static bool
m32c_rtx_costs (rtx x, int code, int outer_code, int *total)
m32c_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
@ -2198,7 +2199,7 @@ m32c_rtx_costs (rtx x, int code, int outer_code, int *total)
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST m32c_address_cost
static int
m32c_address_cost (rtx addr)
m32c_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
int i;
/* fprintf(stderr, "\naddress_cost\n");

View File

@ -85,7 +85,7 @@ static bool m32r_return_in_memory (const_tree, const_tree);
static void m32r_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
tree, int *, int);
static void init_idents (void);
static bool m32r_rtx_costs (rtx, int, int, int *);
static bool m32r_rtx_costs (rtx, int, int, int *, bool speed);
static bool m32r_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
const_tree, bool);
static int m32r_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
@ -126,7 +126,7 @@ static int m32r_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS m32r_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_0
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_PROMOTE_PROTOTYPES
#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
@ -1092,7 +1092,8 @@ m32r_issue_rate (void)
/* Cost functions. */
static bool
m32r_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
m32r_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{

View File

@ -67,10 +67,10 @@ static void m68hc11_reorg (void);
static int go_if_legitimate_address_internal (rtx, enum machine_mode, int);
static rtx m68hc11_expand_compare (enum rtx_code, rtx, rtx);
static int must_parenthesize (rtx);
static int m68hc11_address_cost (rtx);
static int m68hc11_address_cost (rtx, bool);
static int m68hc11_shift_cost (enum machine_mode, rtx, int);
static int m68hc11_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
static bool m68hc11_rtx_costs (rtx, int, int, int *);
static bool m68hc11_rtx_costs (rtx, int, int, int *, bool);
static tree m68hc11_handle_fntype_attribute (tree *, tree, tree, int, bool *);
const struct attribute_spec m68hc11_attribute_table[];
@ -5145,7 +5145,7 @@ m68hc11_register_move_cost (enum machine_mode mode, enum reg_class from,
If ADDR is not a valid address, its cost is irrelevant. */
static int
m68hc11_address_cost (rtx addr)
m68hc11_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
int cost = 4;
@ -5230,7 +5230,7 @@ m68hc11_shift_cost (enum machine_mode mode, rtx x, int shift)
{
int total;
total = rtx_cost (x, SET);
total = rtx_cost (x, SET, !optimize_size);
if (mode == QImode)
total += m68hc11_cost->shiftQI_const[shift % 8];
else if (mode == HImode)
@ -5274,14 +5274,14 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code,
return m68hc11_shift_cost (mode, XEXP (x, 0), INTVAL (XEXP (x, 1)));
}
total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code);
total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
total += m68hc11_cost->shift_var;
return total;
case AND:
case XOR:
case IOR:
total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code);
total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
total += m68hc11_cost->logical;
/* Logical instructions are byte instructions only. */
@ -5290,7 +5290,7 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code,
case MINUS:
case PLUS:
total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code);
total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
total += m68hc11_cost->add;
if (GET_MODE_SIZE (mode) > 2)
{
@ -5301,7 +5301,7 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code,
case UDIV:
case DIV:
case MOD:
total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code);
total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
switch (mode)
{
case QImode:
@ -5324,16 +5324,16 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code,
if (mode == HImode && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
&& GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
return m68hc11_cost->multQI
+ rtx_cost (XEXP (XEXP (x, 0), 0), code)
+ rtx_cost (XEXP (XEXP (x, 1), 0), code);
+ rtx_cost (XEXP (XEXP (x, 0), 0), code, !optimize_size)
+ rtx_cost (XEXP (XEXP (x, 1), 0), code, !optimize_size);
/* emul instruction produces 32-bit result for 68HC12. */
if (TARGET_M6812 && mode == SImode
&& GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
&& GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
return m68hc11_cost->multHI
+ rtx_cost (XEXP (XEXP (x, 0), 0), code)
+ rtx_cost (XEXP (XEXP (x, 1), 0), code);
+ rtx_cost (XEXP (XEXP (x, 0), 0), code, !optimize_size)
+ rtx_cost (XEXP (XEXP (x, 1), 0), code, !optimize_size);
total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code);
switch (mode)
@ -5362,7 +5362,7 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code,
case COMPARE:
case ABS:
case ZERO_EXTEND:
total = extra_cost + rtx_cost (XEXP (x, 0), code);
total = extra_cost + rtx_cost (XEXP (x, 0), code, !optimize_size);
if (mode == QImode)
{
return total + COSTS_N_INSNS (1);
@ -5389,7 +5389,8 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code,
}
static bool
m68hc11_rtx_costs (rtx x, int code, int outer_code, int *total)
m68hc11_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{

View File

@ -146,7 +146,7 @@ static tree m68k_handle_fndecl_attribute (tree *node, tree name,
static void m68k_compute_frame_layout (void);
static bool m68k_save_reg (unsigned int regno, bool interrupt_handler);
static bool m68k_ok_for_sibcall_p (tree, tree);
static bool m68k_rtx_costs (rtx, int, int, int *);
static bool m68k_rtx_costs (rtx, int, int, int *, bool);
#if M68K_HONOR_TARGET_STRICT_ALIGNMENT
static bool m68k_return_in_memory (const_tree, const_tree);
#endif
@ -2205,7 +2205,8 @@ const_int_cost (HOST_WIDE_INT i)
}
static bool
m68k_rtx_costs (rtx x, int code, int outer_code, int *total)
m68k_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{

View File

@ -182,7 +182,7 @@ static int mcore_arg_partial_bytes (CUMULATIVE_ARGS *,
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS mcore_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_0
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG mcore_reorg
@ -479,7 +479,8 @@ mcore_ior_cost (rtx x)
}
static bool
mcore_rtx_costs (rtx x, int code, int outer_code, int * total)
mcore_rtx_costs (rtx x, int code, int outer_code, int * total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{

View File

@ -3225,8 +3225,8 @@ mips_binary_cost (rtx x, int single_cost, int double_cost)
else
cost = single_cost;
return (cost
+ rtx_cost (XEXP (x, 0), 0)
+ rtx_cost (XEXP (x, 1), GET_CODE (x)));
+ rtx_cost (XEXP (x, 0), 0, !optimize_size)
+ rtx_cost (XEXP (x, 1), GET_CODE (x), !optimize_size));
}
/* Return the cost of floating-point multiplications of mode MODE. */
@ -3296,7 +3296,8 @@ mips_zero_extend_cost (enum machine_mode mode, rtx op)
/* Implement TARGET_RTX_COSTS. */
static bool
mips_rtx_costs (rtx x, int code, int outer_code, int *total)
mips_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed)
{
enum machine_mode mode = GET_MODE (x);
bool float_mode_p = FLOAT_MODE_P (mode);
@ -3443,7 +3444,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total)
&& UINTVAL (XEXP (x, 1)) == 0xffffffff)
{
*total = (mips_zero_extend_cost (mode, XEXP (x, 0))
+ rtx_cost (XEXP (x, 0), 0));
+ rtx_cost (XEXP (x, 0), 0, speed));
return true;
}
/* Fall through. */
@ -3475,7 +3476,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total)
case LO_SUM:
/* Low-part immediates need an extended MIPS16 instruction. */
*total = (COSTS_N_INSNS (TARGET_MIPS16 ? 2 : 1)
+ rtx_cost (XEXP (x, 0), 0));
+ rtx_cost (XEXP (x, 0), 0, speed));
return true;
case LT:
@ -3515,17 +3516,17 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total)
if (GET_CODE (op0) == MULT && GET_CODE (XEXP (op0, 0)) == NEG)
{
*total = (mips_fp_mult_cost (mode)
+ rtx_cost (XEXP (XEXP (op0, 0), 0), 0)
+ rtx_cost (XEXP (op0, 1), 0)
+ rtx_cost (op1, 0));
+ rtx_cost (XEXP (XEXP (op0, 0), 0), 0, speed)
+ rtx_cost (XEXP (op0, 1), 0, speed)
+ rtx_cost (op1, 0, speed));
return true;
}
if (GET_CODE (op1) == MULT)
{
*total = (mips_fp_mult_cost (mode)
+ rtx_cost (op0, 0)
+ rtx_cost (XEXP (op1, 0), 0)
+ rtx_cost (XEXP (op1, 1), 0));
+ rtx_cost (op0, 0, speed)
+ rtx_cost (XEXP (op1, 0), 0, speed)
+ rtx_cost (XEXP (op1, 1), 0, speed));
return true;
}
}
@ -3566,9 +3567,9 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total)
&& GET_CODE (XEXP (op, 0)) == MULT)
{
*total = (mips_fp_mult_cost (mode)
+ rtx_cost (XEXP (XEXP (op, 0), 0), 0)
+ rtx_cost (XEXP (XEXP (op, 0), 1), 0)
+ rtx_cost (XEXP (op, 1), 0));
+ rtx_cost (XEXP (XEXP (op, 0), 0), 0, speed)
+ rtx_cost (XEXP (XEXP (op, 0), 1), 0, speed)
+ rtx_cost (XEXP (op, 1), 0, speed));
return true;
}
}
@ -3606,9 +3607,9 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total)
if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT)
/* An rsqrt<mode>a or rsqrt<mode>b pattern. Count the
division as being free. */
*total = rtx_cost (XEXP (x, 1), 0);
*total = rtx_cost (XEXP (x, 1), 0, speed);
else
*total = mips_fp_div_cost (mode) + rtx_cost (XEXP (x, 1), 0);
*total = mips_fp_div_cost (mode) + rtx_cost (XEXP (x, 1), 0, speed);
return true;
}
/* Fall through. */
@ -3636,7 +3637,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total)
&& CONST_INT_P (XEXP (x, 1))
&& exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
{
*total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), 0);
*total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), 0, speed);
return true;
}
*total = COSTS_N_INSNS (mips_idiv_insns ());
@ -3671,7 +3672,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total)
/* Implement TARGET_ADDRESS_COST. */
static int
mips_address_cost (rtx addr)
mips_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
return mips_address_insns (addr, SImode, false);
}

View File

@ -132,7 +132,7 @@ static void mmix_setup_incoming_varargs
(CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
static void mmix_file_start (void);
static void mmix_file_end (void);
static bool mmix_rtx_costs (rtx, int, int, int *);
static bool mmix_rtx_costs (rtx, int, int, int *, bool);
static rtx mmix_struct_value_rtx (tree, int);
static bool mmix_pass_by_reference (CUMULATIVE_ARGS *,
enum machine_mode, const_tree, bool);
@ -181,7 +181,7 @@ static bool mmix_pass_by_reference (CUMULATIVE_ARGS *,
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS mmix_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_0
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG mmix_reorg
@ -1106,7 +1106,8 @@ static bool
mmix_rtx_costs (rtx x ATTRIBUTE_UNUSED,
int code ATTRIBUTE_UNUSED,
int outer_code ATTRIBUTE_UNUSED,
int *total ATTRIBUTE_UNUSED)
int *total ATTRIBUTE_UNUSED,
bool speed ATTRIBUTE_UNUSED)
{
/* For the time being, this is just a stub and we'll accept the
generic calculations, until we can do measurements, at least.

View File

@ -70,8 +70,8 @@ enum processor_type mn10300_processor = PROCESSOR_DEFAULT;
static bool mn10300_handle_option (size_t, const char *, int);
static int mn10300_address_cost_1 (rtx, int *);
static int mn10300_address_cost (rtx);
static bool mn10300_rtx_costs (rtx, int, int, int *);
static int mn10300_address_cost (rtx, bool);
static bool mn10300_rtx_costs (rtx, int, int, int *, bool);
static void mn10300_file_start (void);
static bool mn10300_return_in_memory (const_tree, const_tree);
static rtx mn10300_builtin_saveregs (void);
@ -1942,7 +1942,7 @@ legitimate_address_p (enum machine_mode mode, rtx x, int strict)
}
static int
mn10300_address_cost_1 (rtx x, int *unsig)
mn10300_address_cost_1 (rtx x, int *unsig, bool speed ATTRIBUTE_UNUSED)
{
switch (GET_CODE (x))
{
@ -2008,14 +2008,14 @@ mn10300_address_cost_1 (rtx x, int *unsig)
}
static int
mn10300_address_cost (rtx x)
mn10300_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
int s = 0;
return mn10300_address_cost_1 (x, &s);
}
static bool
mn10300_rtx_costs (rtx x, int code, int outer_code, int *total)
mn10300_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{

View File

@ -86,8 +86,8 @@ hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
static void copy_reg_pointer (rtx, rtx);
static void fix_range (const char *);
static bool pa_handle_option (size_t, const char *, int);
static int hppa_address_cost (rtx);
static bool hppa_rtx_costs (rtx, int, int, int *);
static int hppa_address_cost (rtx, bool);
static bool hppa_rtx_costs (rtx, int, int, int *, bool);
static inline rtx force_mode (enum machine_mode, rtx);
static void pa_reorg (void);
static void pa_combine_instructions (void);
@ -1279,7 +1279,8 @@ hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
as GO_IF_LEGITIMATE_ADDRESS. */
static int
hppa_address_cost (rtx X)
hppa_address_cost (rtx X,
bool speed ATTRIBUTE_UNUSED)
{
switch (GET_CODE (X))
{
@ -1299,7 +1300,8 @@ hppa_address_cost (rtx X)
scanned. In either case, *TOTAL contains the cost result. */
static bool
hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{

View File

@ -149,7 +149,7 @@ static const char *singlemove_string (rtx *);
static bool pdp11_assemble_integer (rtx, unsigned int, int);
static void pdp11_output_function_prologue (FILE *, HOST_WIDE_INT);
static void pdp11_output_function_epilogue (FILE *, HOST_WIDE_INT);
static bool pdp11_rtx_costs (rtx, int, int, int *);
static bool pdp11_rtx_costs (rtx, int, int, int *, bool);
static bool pdp11_return_in_memory (const_tree, const_tree);
/* Initialize the GCC target structure. */
@ -1094,7 +1094,8 @@ register_move_cost(enum reg_class c1, enum reg_class c2)
}
static bool
pdp11_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
pdp11_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{

View File

@ -829,7 +829,7 @@ static void rs6000_xcoff_file_start (void);
static void rs6000_xcoff_file_end (void);
#endif
static int rs6000_variable_issue (FILE *, int, rtx, int);
static bool rs6000_rtx_costs (rtx, int, int, int *);
static bool rs6000_rtx_costs (rtx, int, int, int *, bool);
static int rs6000_adjust_cost (rtx, rtx, rtx, int);
static void rs6000_sched_init (FILE *, int, int);
static bool is_microcoded_insn (rtx);
@ -1180,7 +1180,7 @@ static const char alt_reg_names[][8] =
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS rs6000_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_0
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_VECTOR_OPAQUE_P
#define TARGET_VECTOR_OPAQUE_P rs6000_is_opaque_type
@ -21456,7 +21456,8 @@ rs6000_xcoff_file_end (void)
scanned. In either case, *TOTAL contains the cost result. */
static bool
rs6000_rtx_costs (rtx x, int code, int outer_code, int *total)
rs6000_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed)
{
enum machine_mode mode = GET_MODE (x);
@ -21555,7 +21556,7 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int *total)
/* When optimizing for size, MEM should be slightly more expensive
than generating address, e.g., (plus (reg) (const)).
L1 cache latency is about two instructions. */
*total = optimize_size ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
*total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
return true;
case LABEL_REF:
@ -21766,7 +21767,7 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int *total)
case CALL:
case IF_THEN_ELSE:
if (optimize_size)
if (!speed)
{
*total = COSTS_N_INSNS (1);
return true;

View File

@ -2298,7 +2298,8 @@ s390_float_const_zero_p (rtx value)
of the superexpression of x. */
static bool
s390_rtx_costs (rtx x, int code, int outer_code, int *total)
s390_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
@ -2336,9 +2337,9 @@ s390_rtx_costs (rtx x, int code, int outer_code, int *total)
*total = s390_cost->madbr;
else
*total = s390_cost->maebr;
*total += rtx_cost (XEXP (XEXP (x, 0), 0), MULT)
+ rtx_cost (XEXP (XEXP (x, 0), 1), MULT)
+ rtx_cost (XEXP (x, 1), code);
*total += rtx_cost (XEXP (XEXP (x, 0), 0), MULT, speed)
+ rtx_cost (XEXP (XEXP (x, 0), 1), MULT, speed)
+ rtx_cost (XEXP (x, 1), code, speed);
return true; /* Do not do an additional recursive descent. */
}
*total = COSTS_N_INSNS (1);
@ -2492,7 +2493,7 @@ s390_rtx_costs (rtx x, int code, int outer_code, int *total)
/* Return the cost of an address rtx ADDR. */
static int
s390_address_cost (rtx addr)
s390_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
struct s390_address ad;
if (!s390_decompose_address (addr, &ad))

View File

@ -85,7 +85,7 @@ extern const char * score_select (rtx *ops, const char *inst_pre, bool commu,
extern const char * score_output_casesi (rtx *operands);
extern const char * score_rpush (rtx *ops);
extern const char * score_rpop (rtx *ops);
extern bool score_rtx_costs (rtx x, int code, int outer_code, int *total);
extern bool score_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed);
#ifdef RTX_CODE
extern enum machine_mode score_select_cc_mode (enum rtx_code op, rtx x, rtx y);

View File

@ -569,7 +569,8 @@ score_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
/* Implement TARGET_RTX_COSTS macro. */
bool
score_rtx_costs (rtx x, int code, int outer_code, int *total)
score_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
if (TARGET_SCORE5 || TARGET_SCORE5U || TARGET_SCORE7 || TARGET_SCORE7D)
return score7_rtx_costs (x, code, outer_code, total);
@ -581,7 +582,8 @@ score_rtx_costs (rtx x, int code, int outer_code, int *total)
/* Implement TARGET_ADDRESS_COST macro. */
int
score_address_cost (rtx addr)
score_address_cost (rtx addr,
bool speed ATTRIBUTE_UNUSED)
{
if (TARGET_SCORE5 || TARGET_SCORE5U || TARGET_SCORE7 || TARGET_SCORE7D)
return score7_address_cost (addr);

View File

@ -1000,7 +1000,8 @@ score3_address_insns (rtx x, enum machine_mode mode)
/* Implement TARGET_RTX_COSTS macro. */
bool
score3_rtx_costs (rtx x, int code, int outer_code, int *total)
score3_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
@ -1154,7 +1155,7 @@ score3_rtx_costs (rtx x, int code, int outer_code, int *total)
/* Implement TARGET_ADDRESS_COST macro. */
int
score3_address_cost (rtx addr)
score3_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
return score3_address_insns (addr, SImode);
}

View File

@ -126,7 +126,7 @@ extern int score3_address_p (enum machine_mode mode, rtx x, int strict);
extern int score3_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
enum reg_class from,
enum reg_class to);
extern bool score3_rtx_costs (rtx x, int code, int outer_code, int *total);
extern bool score3_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed);
extern int score3_address_cost (rtx addr);
extern int score3_output_external (FILE *file ATTRIBUTE_UNUSED,
tree decl,

View File

@ -991,7 +991,8 @@ score7_address_insns (rtx x, enum machine_mode mode)
/* Implement TARGET_RTX_COSTS macro. */
bool
score7_rtx_costs (rtx x, int code, int outer_code, int *total)
score7_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
@ -1145,7 +1146,8 @@ score7_rtx_costs (rtx x, int code, int outer_code, int *total)
/* Implement TARGET_ADDRESS_COST macro. */
int
score7_address_cost (rtx addr)
score7_address_cost (rtx addr,
bool speed ATTRIBUTE_UNUSED)
{
return score7_address_insns (addr, SImode);
}

View File

@ -126,7 +126,7 @@ extern int score7_address_p (enum machine_mode mode, rtx x, int strict);
extern int score7_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
enum reg_class from,
enum reg_class to);
extern bool score7_rtx_costs (rtx x, int code, int outer_code, int *total);
extern bool score7_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed);
extern int score7_address_cost (rtx addr);
extern int score7_output_external (FILE *file ATTRIBUTE_UNUSED,
tree decl,

View File

@ -241,8 +241,8 @@ static int addsubcosts (rtx);
static int multcosts (rtx);
static bool unspec_caller_rtx_p (rtx);
static bool sh_cannot_copy_insn_p (rtx);
static bool sh_rtx_costs (rtx, int, int, int *);
static int sh_address_cost (rtx);
static bool sh_rtx_costs (rtx, int, int, int *, bool);
static int sh_address_cost (rtx, bool);
static int sh_pr_n_sets (void);
static rtx sh_allocate_initial_value (rtx);
static int shmedia_target_regs_stack_space (HARD_REG_SET *);
@ -2361,7 +2361,7 @@ andcosts (rtx x)
|| satisfies_constraint_J16 (XEXP (x, 1)))
return 1;
else
return 1 + rtx_cost (XEXP (x, 1), AND);
return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
}
/* These constants are single cycle extu.[bw] instructions. */
@ -2461,7 +2461,8 @@ multcosts (rtx x ATTRIBUTE_UNUSED)
scanned. In either case, *TOTAL contains the cost result. */
static bool
sh_rtx_costs (rtx x, int code, int outer_code, int *total)
sh_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
@ -2587,7 +2588,8 @@ sh_rtx_costs (rtx x, int code, int outer_code, int *total)
since it increases pressure on r0. */
static int
sh_address_cost (rtx X)
sh_address_cost (rtx X,
bool speed ATTRIBUTE_UNUSED)
{
return (GET_CODE (X) == PLUS
&& ! CONSTANT_P (XEXP (X, 1))

View File

@ -404,7 +404,7 @@ static rtx sparc_tls_get_addr (void);
static rtx sparc_tls_got (void);
static const char *get_some_local_dynamic_name (void);
static int get_some_local_dynamic_name_1 (rtx *, void *);
static bool sparc_rtx_costs (rtx, int, int, int *);
static bool sparc_rtx_costs (rtx, int, int, int *, bool);
static bool sparc_promote_prototypes (const_tree);
static rtx sparc_struct_value_rtx (tree, int);
static bool sparc_return_in_memory (const_tree, const_tree);
@ -513,7 +513,7 @@ static bool fpu_option_set = false;
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS sparc_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_0
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
/* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a
no-op for TARGET_ARCH32 this is ok. Otherwise we'd need to add a runtime
@ -8401,7 +8401,8 @@ sparc_extra_constraint_check (rtx op, int c, int strict)
??? the latencies and then CSE will just use that. */
static bool
sparc_rtx_costs (rtx x, int code, int outer_code, int *total)
sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
bool float_mode_p = FLOAT_MODE_P (mode);

View File

@ -208,7 +208,7 @@ tree spu_builtin_types[SPU_BTI_MAX];
#define TARGET_RTX_COSTS spu_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_0
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
@ -4209,7 +4209,8 @@ spu_asm_globalize_label (FILE * file, const char *name)
}
static bool
spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
int cost = COSTS_N_INSNS (2);

View File

@ -57,8 +57,8 @@ static void xstormy16_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
static void xstormy16_init_builtins (void);
static rtx xstormy16_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
static bool xstormy16_rtx_costs (rtx, int, int, int *);
static int xstormy16_address_cost (rtx);
static bool xstormy16_rtx_costs (rtx, int, int, int *, bool);
static int xstormy16_address_cost (rtx, bool);
static bool xstormy16_return_in_memory (const_tree, const_tree);
/* Define the information needed to generate branch and scc insns. This is
@ -74,7 +74,7 @@ static GTY(()) section *bss100_section;
static bool
xstormy16_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
int *total)
int *total, bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
@ -107,7 +107,7 @@ xstormy16_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
}
static int
xstormy16_address_cost (rtx x)
xstormy16_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
return (GET_CODE (x) == CONST_INT ? 2
: GET_CODE (x) == PLUS ? 7

View File

@ -137,7 +137,7 @@ static GTY(()) section *zbss_section;
#define TARGET_RTX_COSTS v850_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_0
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG v850_reorg
@ -422,7 +422,7 @@ static bool
v850_rtx_costs (rtx x,
int code,
int outer_code ATTRIBUTE_UNUSED,
int * total)
int * total, bool speed)
{
switch (code)
{
@ -438,7 +438,7 @@ v850_rtx_costs (rtx x,
case DIV:
case UMOD:
case UDIV:
if (TARGET_V850E && optimize_size)
if (TARGET_V850E && !speed)
*total = 6;
else
*total = 60;

View File

@ -49,8 +49,8 @@ static void vax_init_libfuncs (void);
static void vax_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree);
static int vax_address_cost_1 (rtx);
static int vax_address_cost (rtx);
static bool vax_rtx_costs (rtx, int, int, int *);
static int vax_address_cost (rtx, bool);
static bool vax_rtx_costs (rtx, int, int, int *, bool);
static rtx vax_struct_value_rtx (tree, int);
/* Initialize the GCC target structure. */
@ -520,7 +520,7 @@ vax_address_cost_1 (rtx addr)
}
static int
vax_address_cost (rtx x)
vax_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
return (1 + (REG_P (x) ? 0 : vax_address_cost_1 (x)));
}
@ -534,7 +534,8 @@ vax_address_cost (rtx x)
costs on a per cpu basis. */
static bool
vax_rtx_costs (rtx x, int code, int outer_code, int *total)
vax_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
int i = 0; /* may be modified in switch */

View File

@ -139,7 +139,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *,
int) ATTRIBUTE_UNUSED;
static section *xtensa_select_rtx_section (enum machine_mode, rtx,
unsigned HOST_WIDE_INT);
static bool xtensa_rtx_costs (rtx, int, int, int *);
static bool xtensa_rtx_costs (rtx, int, int, int *, bool);
static tree xtensa_build_builtin_va_list (void);
static bool xtensa_return_in_memory (const_tree, const_tree);
static tree xtensa_gimplify_va_arg_expr (tree, tree, gimple_seq *,
@ -177,7 +177,7 @@ static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] =
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS xtensa_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_0
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST xtensa_build_builtin_va_list
@ -2947,7 +2947,8 @@ xtensa_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
scanned. In either case, *TOTAL contains the cost result. */
static bool
xtensa_rtx_costs (rtx x, int code, int outer_code, int *total)
xtensa_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{

View File

@ -283,6 +283,7 @@ static enum machine_mode this_insn_cc0_mode, prev_insn_cc0_mode;
/* Insn being scanned. */
static rtx this_insn;
static bool optimize_this_for_speed_p;
/* Index by register number, gives the number of the next (or
previous) register in the chain of registers sharing the same
@ -752,7 +753,7 @@ notreg_cost (rtx x, enum rtx_code outer)
&& TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (x)),
GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x)))))
? 0
: rtx_cost (x, outer) * 2);
: rtx_cost (x, outer, optimize_this_for_speed_p) * 2);
}
@ -5970,6 +5971,7 @@ cse_extended_basic_block (struct cse_basic_block_data *ebb_data)
FOR_BB_INSNS (bb, insn)
{
optimize_this_for_speed_p = optimize_bb_for_speed_p (bb);
/* If we have processed 1,000 insns, flush the hash table to
avoid extreme quadratic behavior. We must not include NOTEs
in the count since there may be more of them when generating

View File

@ -144,8 +144,8 @@ prefer_and_bit_test (enum machine_mode mode, int bitnum)
XEXP (and_test, 1) = GEN_INT ((unsigned HOST_WIDE_INT) 1 << bitnum);
XEXP (XEXP (shift_test, 0), 1) = GEN_INT (bitnum);
return (rtx_cost (and_test, IF_THEN_ELSE)
<= rtx_cost (shift_test, IF_THEN_ELSE));
return (rtx_cost (and_test, IF_THEN_ELSE, optimize_insn_for_speed_p ())
<= rtx_cost (shift_test, IF_THEN_ELSE, optimize_insn_for_speed_p ()));
}
/* Generate code to evaluate EXP and jump to IF_FALSE_LABEL if

View File

@ -1424,7 +1424,8 @@ static rtx
find_shift_sequence (int access_size,
store_info_t store_info,
read_info_t read_info,
int shift)
int shift,
bool speed)
{
enum machine_mode store_mode = GET_MODE (store_info->mem);
enum machine_mode read_mode = GET_MODE (read_info->mem);
@ -1483,7 +1484,7 @@ find_shift_sequence (int access_size,
cost = 0;
for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
if (INSN_P (insn))
cost += insn_rtx_cost (PATTERN (insn));
cost += insn_rtx_cost (PATTERN (insn), speed);
/* The computation up to here is essentially independent
of the arguments and could be precomputed. It may
@ -1582,7 +1583,8 @@ replace_read (store_info_t store_info, insn_info_t store_insn,
GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
start_sequence ();
if (shift)
read_reg = find_shift_sequence (access_size, store_info, read_info, shift);
read_reg = find_shift_sequence (access_size, store_info, read_info, shift,
optimize_bb_for_speed_p (BLOCK_FOR_INSN (read_insn->insn)));
else
read_reg = extract_low_bits (read_mode, store_mode,
copy_rtx (store_info->rhs));

View File

@ -64,8 +64,8 @@ static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
Usually, this will mean that the MD file will emit non-branch
sequences. */
static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
static bool smod_pow2_cheap[NUM_MACHINE_MODES];
static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
#ifndef SLOW_UNALIGNED_ACCESS
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
@ -98,17 +98,17 @@ static bool smod_pow2_cheap[NUM_MACHINE_MODES];
/* Cost of various pieces of RTL. Note that some of these are indexed by
shift count and some by mode. */
static int zero_cost;
static int add_cost[NUM_MACHINE_MODES];
static int neg_cost[NUM_MACHINE_MODES];
static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int mul_cost[NUM_MACHINE_MODES];
static int sdiv_cost[NUM_MACHINE_MODES];
static int udiv_cost[NUM_MACHINE_MODES];
static int mul_widen_cost[NUM_MACHINE_MODES];
static int mul_highpart_cost[NUM_MACHINE_MODES];
static int zero_cost[2];
static int add_cost[2][NUM_MACHINE_MODES];
static int neg_cost[2][NUM_MACHINE_MODES];
static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int mul_cost[2][NUM_MACHINE_MODES];
static int sdiv_cost[2][NUM_MACHINE_MODES];
static int udiv_cost[2][NUM_MACHINE_MODES];
static int mul_widen_cost[2][NUM_MACHINE_MODES];
static int mul_highpart_cost[2][NUM_MACHINE_MODES];
void
init_expmed (void)
@ -137,15 +137,14 @@ init_expmed (void)
rtx cint[MAX_BITS_PER_WORD];
int m, n;
enum machine_mode mode, wider_mode;
int speed;
zero_cost = rtx_cost (const0_rtx, 0);
for (m = 1; m < MAX_BITS_PER_WORD; m++)
{
pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
cint[m] = GEN_INT (m);
}
memset (&all, 0, sizeof all);
PUT_CODE (&all.reg, REG);
@ -206,61 +205,71 @@ init_expmed (void)
XEXP (&all.shift_sub, 0) = &all.shift_mult;
XEXP (&all.shift_sub, 1) = &all.reg;
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
mode = GET_MODE_WIDER_MODE (mode))
for (speed = 0; speed < 2; speed++)
{
PUT_MODE (&all.reg, mode);
PUT_MODE (&all.plus, mode);
PUT_MODE (&all.neg, mode);
PUT_MODE (&all.mult, mode);
PUT_MODE (&all.sdiv, mode);
PUT_MODE (&all.udiv, mode);
PUT_MODE (&all.sdiv_32, mode);
PUT_MODE (&all.smod_32, mode);
PUT_MODE (&all.wide_trunc, mode);
PUT_MODE (&all.shift, mode);
PUT_MODE (&all.shift_mult, mode);
PUT_MODE (&all.shift_add, mode);
PUT_MODE (&all.shift_sub, mode);
crtl->maybe_hot_insn_p = speed;
zero_cost[speed] = rtx_cost (const0_rtx, 0, speed);
add_cost[mode] = rtx_cost (&all.plus, SET);
neg_cost[mode] = rtx_cost (&all.neg, SET);
mul_cost[mode] = rtx_cost (&all.mult, SET);
sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
udiv_cost[mode] = rtx_cost (&all.udiv, SET);
sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
<= 2 * add_cost[mode]);
smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
<= 4 * add_cost[mode]);
wider_mode = GET_MODE_WIDER_MODE (mode);
if (wider_mode != VOIDmode)
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
mode = GET_MODE_WIDER_MODE (mode))
{
PUT_MODE (&all.zext, wider_mode);
PUT_MODE (&all.wide_mult, wider_mode);
PUT_MODE (&all.wide_lshr, wider_mode);
XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
PUT_MODE (&all.reg, mode);
PUT_MODE (&all.plus, mode);
PUT_MODE (&all.neg, mode);
PUT_MODE (&all.mult, mode);
PUT_MODE (&all.sdiv, mode);
PUT_MODE (&all.udiv, mode);
PUT_MODE (&all.sdiv_32, mode);
PUT_MODE (&all.smod_32, mode);
PUT_MODE (&all.wide_trunc, mode);
PUT_MODE (&all.shift, mode);
PUT_MODE (&all.shift_mult, mode);
PUT_MODE (&all.shift_add, mode);
PUT_MODE (&all.shift_sub, mode);
mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
}
add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
shift_cost[mode][0] = 0;
shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
<= 2 * add_cost[speed][mode]);
smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
<= 4 * add_cost[speed][mode]);
n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
for (m = 1; m < n; m++)
{
XEXP (&all.shift, 1) = cint[m];
XEXP (&all.shift_mult, 1) = pow2[m];
wider_mode = GET_MODE_WIDER_MODE (mode);
if (wider_mode != VOIDmode)
{
PUT_MODE (&all.zext, wider_mode);
PUT_MODE (&all.wide_mult, wider_mode);
PUT_MODE (&all.wide_lshr, wider_mode);
XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
shift_cost[mode][m] = rtx_cost (&all.shift, SET);
shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
mul_widen_cost[speed][wider_mode]
= rtx_cost (&all.wide_mult, SET, speed);
mul_highpart_cost[speed][mode]
= rtx_cost (&all.wide_trunc, SET, speed);
}
shift_cost[speed][mode][0] = 0;
shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
= add_cost[speed][mode];
n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
for (m = 1; m < n; m++)
{
XEXP (&all.shift, 1) = cint[m];
XEXP (&all.shift_mult, 1) = pow2[m];
shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
}
}
}
default_rtl_profile ();
}
/* Return an rtx representing minus the value of X.
@ -2057,6 +2066,7 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
optab rrotate_optab = rotr_optab;
enum machine_mode op1_mode;
int attempt;
bool speed = optimize_insn_for_speed_p ();
op1 = expand_normal (amount);
op1_mode = GET_MODE (op1);
@ -2098,8 +2108,8 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
&& INTVAL (op1) > 0
&& INTVAL (op1) < GET_MODE_BITSIZE (mode)
&& INTVAL (op1) < MAX_BITS_PER_WORD
&& shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
&& shift_cost[mode][INTVAL (op1)] != MAX_COST)
&& shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
&& shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
{
int i;
for (i = 0; i < INTVAL (op1); i++)
@ -2293,6 +2303,9 @@ struct alg_hash_entry {
Otherwise, the cost within which multiplication by T is
impossible. */
struct mult_cost cost;
/* OPtimized for speed? */
bool speed;
};
/* The number of cache/hash entries. */
@ -2346,6 +2359,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
int hash_index;
bool cache_hit = false;
enum alg_code cache_alg = alg_zero;
bool speed = optimize_insn_for_speed_p ();
/* Indicate that no algorithm is yet found. If no algorithm
is found, this value will be returned and indicate failure. */
@ -2373,13 +2387,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
fail now. */
if (t == 0)
{
if (MULT_COST_LESS (cost_limit, zero_cost))
if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
return;
else
{
alg_out->ops = 1;
alg_out->cost.cost = zero_cost;
alg_out->cost.latency = zero_cost;
alg_out->cost.cost = zero_cost[speed];
alg_out->cost.latency = zero_cost[speed];
alg_out->op[0] = alg_zero;
return;
}
@ -2392,11 +2406,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
best_cost = *cost_limit;
/* Compute the hash index. */
hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
/* See if we already know what to do for T. */
if (alg_hash[hash_index].t == t
&& alg_hash[hash_index].mode == mode
&& alg_hash[hash_index].mode == mode
&& alg_hash[hash_index].speed == speed
&& alg_hash[hash_index].alg != alg_unknown)
{
cache_alg = alg_hash[hash_index].alg;
@ -2465,10 +2481,10 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
q = t >> m;
/* The function expand_shift will choose between a shift and
a sequence of additions, so the observed cost is given as
MIN (m * add_cost[mode], shift_cost[mode][m]). */
op_cost = m * add_cost[mode];
if (shift_cost[mode][m] < op_cost)
op_cost = shift_cost[mode][m];
MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
op_cost = m * add_cost[speed][mode];
if (shift_cost[speed][mode][m] < op_cost)
op_cost = shift_cost[speed][mode][m];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, q, &new_limit, mode);
@ -2509,7 +2525,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{
/* T ends with ...111. Multiply by (T + 1) and subtract 1. */
op_cost = add_cost[mode];
op_cost = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, t + 1, &new_limit, mode);
@ -2529,7 +2545,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{
/* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
op_cost = add_cost[mode];
op_cost = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, t - 1, &new_limit, mode);
@ -2574,14 +2590,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
equal to its cost, otherwise assume that on superscalar
hardware the shift may be executed concurrently with the
earlier steps in the algorithm. */
op_cost = add_cost[mode] + shift_cost[mode][m];
if (shiftadd_cost[mode][m] < op_cost)
op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
if (shiftadd_cost[speed][mode][m] < op_cost)
{
op_cost = shiftadd_cost[mode][m];
op_cost = shiftadd_cost[speed][mode][m];
op_latency = op_cost;
}
else
op_latency = add_cost[mode];
op_latency = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_latency;
@ -2613,14 +2629,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
equal to it's cost, otherwise assume that on superscalar
hardware the shift may be executed concurrently with the
earlier steps in the algorithm. */
op_cost = add_cost[mode] + shift_cost[mode][m];
if (shiftsub_cost[mode][m] < op_cost)
op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
if (shiftsub_cost[speed][mode][m] < op_cost)
{
op_cost = shiftsub_cost[mode][m];
op_cost = shiftsub_cost[speed][mode][m];
op_latency = op_cost;
}
else
op_latency = add_cost[mode];
op_latency = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_latency;
@ -2654,7 +2670,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
m = exact_log2 (q);
if (m >= 0 && m < maxm)
{
op_cost = shiftadd_cost[mode][m];
op_cost = shiftadd_cost[speed][mode][m];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
@ -2679,7 +2695,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
m = exact_log2 (q);
if (m >= 0 && m < maxm)
{
op_cost = shiftsub_cost[mode][m];
op_cost = shiftsub_cost[speed][mode][m];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
@ -2710,6 +2726,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
caller. */
alg_hash[hash_index].t = t;
alg_hash[hash_index].mode = mode;
alg_hash[hash_index].speed = speed;
alg_hash[hash_index].alg = alg_impossible;
alg_hash[hash_index].cost = *cost_limit;
return;
@ -2720,6 +2737,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{
alg_hash[hash_index].t = t;
alg_hash[hash_index].mode = mode;
alg_hash[hash_index].speed = speed;
alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
alg_hash[hash_index].cost.cost = best_cost.cost;
alg_hash[hash_index].cost.latency = best_cost.latency;
@ -2759,6 +2777,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
struct algorithm alg2;
struct mult_cost limit;
int op_cost;
bool speed = optimize_insn_for_speed_p ();
/* Fail quickly for impossible bounds. */
if (mult_cost < 0)
@ -2767,7 +2786,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
/* Ensure that mult_cost provides a reasonable upper bound.
Any constant multiplication can be performed with less
than 2 * bits additions. */
op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
if (mult_cost > op_cost)
mult_cost = op_cost;
@ -2780,7 +2799,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
`unsigned int' */
if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
{
op_cost = neg_cost[mode];
op_cost = neg_cost[speed][mode];
if (MULT_COST_LESS (&alg->cost, mult_cost))
{
limit.cost = alg->cost.cost - op_cost;
@ -2800,7 +2819,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
}
/* This proves very useful for division-by-constant. */
op_cost = add_cost[mode];
op_cost = add_cost[speed][mode];
if (MULT_COST_LESS (&alg->cost, mult_cost))
{
limit.cost = alg->cost.cost - op_cost;
@ -2988,6 +3007,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
enum mult_variant variant;
struct algorithm algorithm;
int max_cost;
bool speed = optimize_insn_for_speed_p ();
/* Handling const0_rtx here allows us to use zero as a rogue value for
coeff below. */
@ -3029,8 +3049,8 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
result is interpreted as an unsigned coefficient.
Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult. */
max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
- neg_cost[mode];
max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
- neg_cost[speed][mode];
if (max_cost > 0
&& choose_mult_variant (mode, -INTVAL (op1), &algorithm,
&variant, max_cost))
@ -3074,7 +3094,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
/* Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult. */
max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
if (choose_mult_variant (mode, coeff, &algorithm, &variant,
max_cost))
return expand_mult_const (mode, op0, coeff, target,
@ -3317,6 +3337,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
optab moptab;
rtx tem;
int size;
bool speed = optimize_insn_for_speed_p ();
gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
@ -3325,7 +3346,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */
if (mul_highpart_cost[mode] < max_cost)
if (mul_highpart_cost[speed][mode] < max_cost)
{
moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
@ -3337,8 +3358,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */
if (size - 1 < BITS_PER_WORD
&& (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
+ 4 * add_cost[mode] < max_cost))
&& (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
+ 4 * add_cost[speed][mode] < max_cost))
{
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
@ -3352,7 +3373,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Try widening multiplication. */
moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
&& mul_widen_cost[wider_mode] < max_cost)
&& mul_widen_cost[speed][wider_mode] < max_cost)
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
unsignedp, OPTAB_WIDEN);
@ -3363,7 +3384,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Try widening the mode and perform a non-widening multiplication. */
if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
&& mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
&& mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
{
rtx insns, wop0, wop1;
@ -3390,8 +3411,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
&& (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
+ 4 * add_cost[mode] < max_cost))
&& (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
+ 4 * add_cost[speed][mode] < max_cost))
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
@ -3429,6 +3450,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
enum mult_variant variant;
struct algorithm alg;
rtx tem;
bool speed = optimize_insn_for_speed_p ();
gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
/* We can't support modes wider than HOST_BITS_PER_INT. */
@ -3444,13 +3466,13 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
return expand_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost);
extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
/* Check whether we try to multiply by a negative constant. */
if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
{
sign_adjust = true;
extra_cost += add_cost[mode];
extra_cost += add_cost[speed][mode];
}
/* See whether shift/add multiplication is cheap enough. */
@ -3510,7 +3532,7 @@ expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
temp = gen_rtx_LSHIFTRT (mode, result, shift);
if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
|| rtx_cost (temp, SET) > COSTS_N_INSNS (2))
|| rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
{
temp = expand_binop (mode, xor_optab, op0, signmask,
NULL_RTX, 1, OPTAB_LIB_WIDEN);
@ -3641,7 +3663,7 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
temp = gen_reg_rtx (mode);
temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
else
@ -3714,6 +3736,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
int max_cost, extra_cost;
static HOST_WIDE_INT last_div_const = 0;
static HOST_WIDE_INT ext_op1;
bool speed = optimize_insn_for_speed_p ();
op1_is_constant = GET_CODE (op1) == CONST_INT;
if (op1_is_constant)
@ -3844,10 +3867,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
/* Only deduct something for a REM if the last divide done was
for a different constant. Then set the constant of the last
divide. */
max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
if (rem_flag && ! (last_div_const != 0 && op1_is_constant
&& INTVAL (op1) == last_div_const))
max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
@ -3966,9 +3989,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
goto fail1;
extra_cost
= (shift_cost[compute_mode][post_shift - 1]
+ shift_cost[compute_mode][1]
+ 2 * add_cost[compute_mode]);
= (shift_cost[speed][compute_mode][post_shift - 1]
+ shift_cost[speed][compute_mode][1]
+ 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 1,
max_cost - extra_cost);
@ -4002,8 +4025,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
build_int_cst (NULL_TREE, pre_shift),
NULL_RTX, 1);
extra_cost
= (shift_cost[compute_mode][pre_shift]
+ shift_cost[compute_mode][post_shift]);
= (shift_cost[speed][compute_mode][pre_shift]
+ shift_cost[speed][compute_mode][post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, ml,
NULL_RTX, 1,
max_cost - extra_cost);
@ -4133,9 +4156,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
|| size - 1 >= BITS_PER_WORD)
goto fail1;
extra_cost = (shift_cost[compute_mode][post_shift]
+ shift_cost[compute_mode][size - 1]
+ add_cost[compute_mode]);
extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1]
+ add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr,
NULL_RTX, 0,
max_cost - extra_cost);
@ -4170,9 +4193,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
mlr = gen_int_mode (ml, compute_mode);
extra_cost = (shift_cost[compute_mode][post_shift]
+ shift_cost[compute_mode][size - 1]
+ 2 * add_cost[compute_mode]);
extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1]
+ 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr,
NULL_RTX, 0,
max_cost - extra_cost);
@ -4265,9 +4288,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
NULL_RTX, 0);
t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN);
extra_cost = (shift_cost[compute_mode][post_shift]
+ shift_cost[compute_mode][size - 1]
+ 2 * add_cost[compute_mode]);
extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1]
+ 2 * add_cost[speed][compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, ml,
NULL_RTX, 1,
max_cost - extra_cost);

View File

@ -3458,13 +3458,14 @@ compress_float_constant (rtx x, rtx y)
enum machine_mode srcmode;
REAL_VALUE_TYPE r;
int oldcost, newcost;
bool speed = optimize_insn_for_speed_p ();
REAL_VALUE_FROM_CONST_DOUBLE (r, y);
if (LEGITIMATE_CONSTANT_P (y))
oldcost = rtx_cost (y, SET);
oldcost = rtx_cost (y, SET, speed);
else
oldcost = rtx_cost (force_const_mem (dstmode, y), SET);
oldcost = rtx_cost (force_const_mem (dstmode, y), SET, speed);
for (srcmode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (orig_srcmode));
srcmode != orig_srcmode;
@ -3491,7 +3492,7 @@ compress_float_constant (rtx x, rtx y)
if (! (*insn_data[ic].operand[1].predicate) (trunc_y, srcmode))
continue;
/* This is valid, but may not be cheaper than the original. */
newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET);
newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET, speed);
if (oldcost < newcost)
continue;
}
@ -3499,7 +3500,7 @@ compress_float_constant (rtx x, rtx y)
{
trunc_y = force_const_mem (srcmode, trunc_y);
/* This is valid, but may not be cheaper than the original. */
newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET);
newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET, speed);
if (oldcost < newcost)
continue;
trunc_y = validize_mem (trunc_y);

View File

@ -184,7 +184,8 @@ canonicalize_address (rtx x)
for a memory access in the given MODE. */
static bool
should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode)
should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
bool speed)
{
int gain;
@ -196,14 +197,14 @@ should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode)
return true;
/* Prefer the new address if it is less expensive. */
gain = address_cost (old_rtx, mode) - address_cost (new_rtx, mode);
gain = address_cost (old_rtx, mode, speed) - address_cost (new_rtx, mode, speed);
/* If the addresses have equivalent cost, prefer the new address
if it has the highest `rtx_cost'. That has the potential of
eliminating the most insns without additional costs, and it
is the same that cse.c used to do. */
if (gain == 0)
gain = rtx_cost (new_rtx, SET) - rtx_cost (old_rtx, SET);
gain = rtx_cost (new_rtx, SET, speed) - rtx_cost (old_rtx, SET, speed);
return (gain > 0);
}
@ -231,7 +232,10 @@ enum {
PR_HANDLE_MEM is set when the source of the propagation was not
another MEM. Then, it is safe not to treat non-read-only MEMs as
``opaque'' objects. */
PR_HANDLE_MEM = 2
PR_HANDLE_MEM = 2,
/* Set when costs should be optimized for speed. */
PR_OPTIMIZE_FOR_SPEED = 4
};
@ -360,7 +364,8 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
/* Copy propagations are always ok. Otherwise check the costs. */
if (!(REG_P (old_rtx) && REG_P (new_rtx))
&& !should_replace_address (op0, new_op0, GET_MODE (x)))
&& !should_replace_address (op0, new_op0, GET_MODE (x),
flags & PR_OPTIMIZE_FOR_SPEED))
return true;
tem = replace_equiv_address_nv (x, new_op0);
@ -438,7 +443,8 @@ varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
Otherwise, we accept simplifications that have a lower or equal cost. */
static rtx
propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx)
propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
bool speed)
{
rtx tem;
bool collapsed;
@ -453,6 +459,9 @@ propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx)
if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
flags |= PR_HANDLE_MEM;
if (speed)
flags |= PR_OPTIMIZE_FOR_SPEED;
tem = x;
collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
if (tem == x || !collapsed)
@ -728,7 +737,8 @@ try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new_rtx, rtx def_insn, bool
enum df_ref_type type = DF_REF_TYPE (use);
int flags = DF_REF_FLAGS (use);
rtx set = single_set (insn);
int old_cost = rtx_cost (SET_SRC (set), SET);
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
int old_cost = rtx_cost (SET_SRC (set), SET, speed);
bool ok;
if (dump_file)
@ -750,7 +760,7 @@ try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new_rtx, rtx def_insn, bool
}
else if (DF_REF_TYPE (use) == DF_REF_REG_USE
&& rtx_cost (SET_SRC (set), SET) > old_cost)
&& rtx_cost (SET_SRC (set), SET, speed) > old_cost)
{
if (dump_file)
fprintf (dump_file, "Changes to insn %d not profitable\n",
@ -928,7 +938,8 @@ forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set)
else
mode = GET_MODE (*loc);
new_rtx = propagate_rtx (*loc, mode, reg, src);
new_rtx = propagate_rtx (*loc, mode, reg, src,
optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
if (!new_rtx)
return false;

View File

@ -153,6 +153,12 @@ hook_int_rtx_0 (rtx a ATTRIBUTE_UNUSED)
return 0;
}
int
hook_int_rtx_bool_0 (rtx a ATTRIBUTE_UNUSED, bool b ATTRIBUTE_UNUSED)
{
return 0;
}
int
hook_int_size_t_constcharptr_int_0 (size_t a ATTRIBUTE_UNUSED,
const char *b ATTRIBUTE_UNUSED,
@ -233,10 +239,11 @@ hook_bool_uintp_uintp_false (unsigned int *a ATTRIBUTE_UNUSED,
}
bool
hook_bool_rtx_int_int_intp_false (rtx a ATTRIBUTE_UNUSED,
int b ATTRIBUTE_UNUSED,
int c ATTRIBUTE_UNUSED,
int *d ATTRIBUTE_UNUSED)
hook_bool_rtx_int_int_intp_bool_false (rtx a ATTRIBUTE_UNUSED,
int b ATTRIBUTE_UNUSED,
int c ATTRIBUTE_UNUSED,
int *d ATTRIBUTE_UNUSED,
bool speed_p ATTRIBUTE_UNUSED)
{
return false;
}

View File

@ -44,7 +44,7 @@ extern bool hook_bool_const_tree_hwi_hwi_const_tree_true (const_tree,
const_tree);
extern bool hook_bool_rtx_false (rtx);
extern bool hook_bool_uintp_uintp_false (unsigned int *, unsigned int *);
extern bool hook_bool_rtx_int_int_intp_false (rtx, int, int, int *);
extern bool hook_bool_rtx_int_int_intp_bool_false (rtx, int, int, int *, bool);
extern bool hook_bool_constcharptr_size_t_false (const char *, size_t);
extern bool hook_bool_size_t_constcharptr_int_true (size_t, const char *, int);
extern bool hook_bool_tree_tree_false (tree, tree);
@ -59,6 +59,7 @@ extern void hook_void_tree_treeptr (tree, tree *);
extern int hook_int_const_tree_0 (const_tree);
extern int hook_int_const_tree_const_tree_1 (const_tree, const_tree);
extern int hook_int_rtx_0 (rtx);
extern int hook_int_rtx_bool_0 (rtx, bool);
extern int hook_int_size_t_constcharptr_int_0 (size_t, const char *, int);
extern int hook_int_void_no_regs (void);

View File

@ -143,12 +143,13 @@ cheap_bb_rtx_cost_p (const_basic_block bb, int max_cost)
{
int count = 0;
rtx insn = BB_HEAD (bb);
bool speed = optimize_bb_for_speed_p (bb);
while (1)
{
if (NONJUMP_INSN_P (insn))
{
int cost = insn_rtx_cost (PATTERN (insn));
int cost = insn_rtx_cost (PATTERN (insn), speed);
if (cost == 0)
return false;
@ -1351,7 +1352,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
if insn_rtx_cost can't be estimated. */
if (insn_a)
{
insn_cost = insn_rtx_cost (PATTERN (insn_a));
insn_cost = insn_rtx_cost (PATTERN (insn_a),
optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn_a)));
if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost))
return FALSE;
}
@ -1360,7 +1362,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
if (insn_b)
{
insn_cost += insn_rtx_cost (PATTERN (insn_b));
insn_cost += insn_rtx_cost (PATTERN (insn_b),
optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn_b)));
if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost))
return FALSE;
}
@ -1901,7 +1904,8 @@ noce_try_sign_mask (struct noce_if_info *if_info)
INSN_B which can happen for e.g. conditional stores to memory. */
b_unconditional = (if_info->insn_b == NULL_RTX
|| BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb);
if (rtx_cost (t, SET) >= COSTS_N_INSNS (2)
if (rtx_cost (t, SET, optimize_bb_for_speed_p (BLOCK_FOR_INSN (if_info->insn_b)))
>= COSTS_N_INSNS (2)
&& (!b_unconditional
|| t != if_info->b))
return FALSE;

View File

@ -586,7 +586,8 @@ doloop_optimize (struct loop *loop)
max_cost
= COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
if (rtx_cost (desc->niter_expr, SET) > max_cost)
if (rtx_cost (desc->niter_expr, SET, optimize_loop_for_speed_p (loop))
> max_cost)
{
if (dump_file)
fprintf (dump_file,

View File

@ -668,6 +668,7 @@ create_new_invariant (struct def *def, rtx insn, bitmap depends_on,
{
struct invariant *inv = XNEW (struct invariant);
rtx set = single_set (insn);
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
inv->def = def;
inv->always_executed = always_executed;
@ -676,9 +677,9 @@ create_new_invariant (struct def *def, rtx insn, bitmap depends_on,
/* If the set is simple, usually by moving it we move the whole store out of
the loop. Otherwise we save only cost of the computation. */
if (def)
inv->cost = rtx_cost (set, SET);
inv->cost = rtx_cost (set, SET, speed);
else
inv->cost = rtx_cost (SET_SRC (set), SET);
inv->cost = rtx_cost (SET_SRC (set), SET, speed);
inv->move = false;
inv->reg = NULL_RTX;
@ -1048,15 +1049,15 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed)
static int
gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
unsigned new_regs, unsigned regs_used)
unsigned new_regs, unsigned regs_used, bool speed)
{
int comp_cost, size_cost;
get_inv_cost (inv, &comp_cost, regs_needed);
actual_stamp++;
size_cost = (estimate_reg_pressure_cost (new_regs + *regs_needed, regs_used)
- estimate_reg_pressure_cost (new_regs, regs_used));
size_cost = (estimate_reg_pressure_cost (new_regs + *regs_needed, regs_used, speed)
- estimate_reg_pressure_cost (new_regs, regs_used, speed));
return comp_cost - size_cost;
}
@ -1069,7 +1070,7 @@ gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
static int
best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
unsigned new_regs, unsigned regs_used)
unsigned new_regs, unsigned regs_used, bool speed)
{
struct invariant *inv;
int gain = 0, again;
@ -1084,7 +1085,8 @@ best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
if (inv->eqto != inv->invno)
continue;
again = gain_for_invariant (inv, &aregs_needed, new_regs, regs_used);
again = gain_for_invariant (inv, &aregs_needed, new_regs, regs_used,
speed);
if (again > gain)
{
gain = again;
@ -1123,7 +1125,7 @@ set_move_mark (unsigned invno)
/* Determines which invariants to move. */
static void
find_invariants_to_move (void)
find_invariants_to_move (bool speed)
{
unsigned i, regs_used, regs_needed = 0, new_regs;
struct invariant *inv = NULL;
@ -1147,7 +1149,7 @@ find_invariants_to_move (void)
}
new_regs = 0;
while (best_gain_for_invariant (&inv, &regs_needed, new_regs, regs_used) > 0)
while (best_gain_for_invariant (&inv, &regs_needed, new_regs, regs_used, speed) > 0)
{
set_move_mark (inv->invno);
new_regs += regs_needed;
@ -1314,7 +1316,7 @@ move_single_loop_invariants (struct loop *loop)
init_inv_motion_data ();
find_invariants (loop);
find_invariants_to_move ();
find_invariants_to_move (optimize_loop_for_speed_p (loop));
move_invariants (loop);
free_inv_motion_data ();

View File

@ -1414,7 +1414,8 @@ avoid_expensive_constant (enum machine_mode mode, optab binoptab,
if (mode != VOIDmode
&& optimize
&& CONSTANT_P (x)
&& rtx_cost (x, binoptab->code) > COSTS_N_INSNS (1))
&& rtx_cost (x, binoptab->code, optimize_insn_for_speed_p ())
> COSTS_N_INSNS (1))
{
if (GET_CODE (x) == CONST_INT)
{
@ -4037,11 +4038,13 @@ prepare_cmp_insn (rtx *px, rtx *py, enum rtx_code *pcomparison, rtx size,
/* If we are inside an appropriately-short loop and we are optimizing,
force expensive constants into a register. */
if (CONSTANT_P (x) && optimize
&& rtx_cost (x, COMPARE) > COSTS_N_INSNS (1))
&& (rtx_cost (x, COMPARE, optimize_insn_for_speed_p ())
> COSTS_N_INSNS (1)))
x = force_reg (mode, x);
if (CONSTANT_P (y) && optimize
&& rtx_cost (y, COMPARE) > COSTS_N_INSNS (1))
&& (rtx_cost (y, COMPARE, optimize_insn_for_speed_p ())
> COSTS_N_INSNS (1)))
y = force_reg (mode, y);
#ifdef HAVE_cc0

View File

@ -621,7 +621,7 @@ extern void default_elf_asm_output_external (FILE *file, tree,
const char *);
extern int maybe_assemble_visibility (tree);
extern int default_address_cost (rtx);
extern int default_address_cost (rtx, bool);
/* dbxout helper functions */
#if defined DBX_DEBUGGING_INFO || defined XCOFF_DEBUGGING_INFO

View File

@ -233,6 +233,7 @@ reload_cse_simplify_set (rtx set, rtx insn)
#ifdef LOAD_EXTEND_OP
enum rtx_code extend_op = UNKNOWN;
#endif
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
dreg = true_regnum (SET_DEST (set));
if (dreg < 0)
@ -267,7 +268,7 @@ reload_cse_simplify_set (rtx set, rtx insn)
old_cost = REGISTER_MOVE_COST (GET_MODE (src),
REGNO_REG_CLASS (REGNO (src)), dclass);
else
old_cost = rtx_cost (src, SET);
old_cost = rtx_cost (src, SET, speed);
for (l = val->locs; l; l = l->next)
{
@ -302,7 +303,7 @@ reload_cse_simplify_set (rtx set, rtx insn)
this_rtx = GEN_INT (this_val);
}
#endif
this_cost = rtx_cost (this_rtx, SET);
this_cost = rtx_cost (this_rtx, SET, speed);
}
else if (REG_P (this_rtx))
{
@ -310,7 +311,7 @@ reload_cse_simplify_set (rtx set, rtx insn)
if (extend_op != UNKNOWN)
{
this_rtx = gen_rtx_fmt_e (extend_op, word_mode, this_rtx);
this_cost = rtx_cost (this_rtx, SET);
this_cost = rtx_cost (this_rtx, SET, speed);
}
else
#endif
@ -570,8 +571,10 @@ reload_cse_simplify_operands (rtx insn, rtx testreg)
if (op_alt_regno[i][j] == -1
&& reg_fits_class_p (testreg, rclass, 0, mode)
&& (GET_CODE (recog_data.operand[i]) != CONST_INT
|| (rtx_cost (recog_data.operand[i], SET)
> rtx_cost (testreg, SET))))
|| (rtx_cost (recog_data.operand[i], SET,
optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)))
> rtx_cost (testreg, SET,
optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn))))))
{
alternative_nregs[j]++;
op_alt_regno[i][j] = regno;
@ -1240,6 +1243,8 @@ reload_cse_move2add (rtx first)
{
rtx new_src = gen_int_mode (INTVAL (src) - reg_offset[regno],
GET_MODE (reg));
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
/* (set (reg) (plus (reg) (const_int 0))) is not canonical;
use (set (reg) (reg)) instead.
We don't delete this insn, nor do we convert it into a
@ -1255,7 +1260,7 @@ reload_cse_move2add (rtx first)
if (INTVAL (src) == reg_offset [regno])
validate_change (insn, &SET_SRC (pat), reg, 0);
}
else if (rtx_cost (new_src, PLUS) < rtx_cost (src, SET)
else if (rtx_cost (new_src, PLUS, speed) < rtx_cost (src, SET, speed)
&& have_add2_insn (reg, new_src))
{
rtx tem = gen_rtx_PLUS (GET_MODE (reg), reg, new_src);
@ -1331,14 +1336,15 @@ reload_cse_move2add (rtx first)
+ base_offset
- regno_offset,
GET_MODE (reg));
int success = 0;
bool success = false;
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
if (new_src == const0_rtx)
/* See above why we create (set (reg) (reg)) here. */
success
= validate_change (next, &SET_SRC (set), reg, 0);
else if ((rtx_cost (new_src, PLUS)
< COSTS_N_INSNS (1) + rtx_cost (src3, SET))
else if ((rtx_cost (new_src, PLUS, speed)
< COSTS_N_INSNS (1) + rtx_cost (src3, SET, speed))
&& have_add2_insn (reg, new_src))
{
rtx newpat = gen_rtx_SET (VOIDmode,

View File

@ -1074,8 +1074,8 @@ rhs_regno (const_rtx x)
#define MAX_COST INT_MAX
extern void init_rtlanal (void);
extern int rtx_cost (rtx, enum rtx_code);
extern int address_cost (rtx, enum machine_mode);
extern int rtx_cost (rtx, enum rtx_code, bool);
extern int address_cost (rtx, enum machine_mode, bool);
extern unsigned int subreg_lsb (const_rtx);
extern unsigned int subreg_lsb_1 (enum machine_mode, enum machine_mode,
unsigned int);
@ -1784,7 +1784,7 @@ extern int loc_mentioned_in_p (rtx *, const_rtx);
extern rtx find_first_parameter_load (rtx, rtx);
extern bool keep_with_call_p (const_rtx);
extern bool label_is_jump_target_p (const_rtx, const_rtx);
extern int insn_rtx_cost (rtx);
extern int insn_rtx_cost (rtx, bool);
/* Given an insn and condition, return a canonical description of
the test being made. */
@ -2339,5 +2339,7 @@ extern void insn_locators_finalize (void);
extern void set_curr_insn_source_location (location_t);
extern void set_curr_insn_block (tree);
extern int curr_insn_locator (void);
extern bool optimize_insn_for_size_p (void);
extern bool optimize_insn_for_speed_p (void);
#endif /* ! GCC_RTL_H */

View File

@ -3501,10 +3501,13 @@ label_is_jump_target_p (const_rtx label, const_rtx jump_insn)
/* Return an estimate of the cost of computing rtx X.
One use is in cse, to decide which expression to keep in the hash table.
Another is in rtl generation, to pick the cheapest way to multiply.
Other uses like the latter are expected in the future. */
Other uses like the latter are expected in the future.
SPEED parameter specify whether costs optimized for speed or size should
be returned. */
int
rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED)
rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED, bool speed)
{
int i, j;
enum rtx_code code;
@ -3552,7 +3555,7 @@ rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED)
break;
default:
if (targetm.rtx_costs (x, code, outer_code, &total))
if (targetm.rtx_costs (x, code, outer_code, &total, speed))
return total;
break;
}
@ -3563,19 +3566,22 @@ rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED)
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
if (fmt[i] == 'e')
total += rtx_cost (XEXP (x, i), code);
total += rtx_cost (XEXP (x, i), code, speed);
else if (fmt[i] == 'E')
for (j = 0; j < XVECLEN (x, i); j++)
total += rtx_cost (XVECEXP (x, i, j), code);
total += rtx_cost (XVECEXP (x, i, j), code, speed);
return total;
}
/* Return cost of address expression X.
Expect that X is properly formed address reference. */
Expect that X is properly formed address reference.
SPEED parameter specify whether costs optimized for speed or size should
be returned. */
int
address_cost (rtx x, enum machine_mode mode)
address_cost (rtx x, enum machine_mode mode, bool speed)
{
/* We may be asked for cost of various unusual addresses, such as operands
of push instruction. It is not worthwhile to complicate writing
@ -3584,15 +3590,15 @@ address_cost (rtx x, enum machine_mode mode)
if (!memory_address_p (mode, x))
return 1000;
return targetm.address_cost (x);
return targetm.address_cost (x, speed);
}
/* If the target doesn't override, compute the cost as with arithmetic. */
int
default_address_cost (rtx x)
default_address_cost (rtx x, bool speed)
{
return rtx_cost (x, MEM);
return rtx_cost (x, MEM, speed);
}
@ -4563,7 +4569,7 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
zero indicates an instruction pattern without a known cost. */
int
insn_rtx_cost (rtx pat)
insn_rtx_cost (rtx pat, bool speed)
{
int i, cost;
rtx set;
@ -4591,7 +4597,7 @@ insn_rtx_cost (rtx pat)
else
return 0;
cost = rtx_cost (SET_SRC (set), SET);
cost = rtx_cost (SET_SRC (set), SET, speed);
return cost > 0 ? cost : COSTS_N_INSNS (1);
}

View File

@ -1665,12 +1665,13 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
rtx coeff;
unsigned HOST_WIDE_INT l;
HOST_WIDE_INT h;
bool speed = optimize_function_for_speed_p (cfun);
add_double (coeff0l, coeff0h, coeff1l, coeff1h, &l, &h);
coeff = immed_double_const (l, h, mode);
tem = simplify_gen_binary (MULT, mode, lhs, coeff);
return rtx_cost (tem, SET) <= rtx_cost (orig, SET)
return rtx_cost (tem, SET, speed) <= rtx_cost (orig, SET, speed)
? tem : 0;
}
}
@ -1859,12 +1860,13 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
rtx coeff;
unsigned HOST_WIDE_INT l;
HOST_WIDE_INT h;
bool speed = optimize_function_for_speed_p (cfun);
add_double (coeff0l, coeff0h, negcoeff1l, negcoeff1h, &l, &h);
coeff = immed_double_const (l, h, mode);
tem = simplify_gen_binary (MULT, mode, lhs, coeff);
return rtx_cost (tem, SET) <= rtx_cost (orig, SET)
return rtx_cost (tem, SET, speed) <= rtx_cost (orig, SET, speed)
? tem : 0;
}
}

View File

@ -2125,7 +2125,8 @@ bool lshift_cheap_p (void)
if (!init)
{
rtx reg = gen_rtx_REG (word_mode, 10000);
int cost = rtx_cost (gen_rtx_ASHIFT (word_mode, const1_rtx, reg), SET);
int cost = rtx_cost (gen_rtx_ASHIFT (word_mode, const1_rtx, reg), SET,
optimize_insn_for_speed_p ());
cheap = cost < COSTS_N_INSNS (3);
init = true;
}

View File

@ -481,7 +481,7 @@
#define TARGET_MS_BITFIELD_LAYOUT_P hook_bool_const_tree_false
#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_false
#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
#define TARGET_RTX_COSTS hook_bool_rtx_int_int_intp_false
#define TARGET_RTX_COSTS hook_bool_rtx_int_int_intp_bool_false
#define TARGET_MANGLE_TYPE hook_constcharptr_const_tree_null
#define TARGET_ALLOCATE_INITIAL_VALUE NULL

View File

@ -648,11 +648,11 @@ struct gcc_target
scanned. In either case, *TOTAL contains the cost result. */
/* Note that CODE and OUTER_CODE ought to be RTX_CODE, but that's
not necessarily defined at this point. */
bool (* rtx_costs) (rtx x, int code, int outer_code, int *total);
bool (* rtx_costs) (rtx x, int code, int outer_code, int *total, bool speed);
/* Compute the cost of X, used as an address. Never called with
invalid addresses. */
int (* address_cost) (rtx x);
int (* address_cost) (rtx x, bool speed);
/* Return where to allocate pseudo for a given hard register initial
value. */

View File

@ -1124,7 +1124,7 @@ extern void tree_check_data_deps (void);
bool expr_invariant_in_loop_p (struct loop *, tree);
bool stmt_invariant_in_loop_p (struct loop *, gimple);
bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode);
unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode);
unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode, bool);
/* In tree-ssa-threadupdate.c. */
extern bool thread_through_all_blocks (bool);
@ -1155,7 +1155,7 @@ struct mem_address
struct affine_tree_combination;
tree create_mem_ref (gimple_stmt_iterator *, tree,
struct affine_tree_combination *);
struct affine_tree_combination *, bool);
rtx addr_for_mem_ref (struct mem_address *, bool);
void get_address_description (tree, struct mem_address *);
tree maybe_fold_tmr (tree);

View File

@ -437,7 +437,8 @@ add_to_parts (struct mem_address *parts, tree elt)
element(s) to PARTS. */
static void
most_expensive_mult_to_index (struct mem_address *parts, aff_tree *addr)
most_expensive_mult_to_index (struct mem_address *parts, aff_tree *addr,
bool speed)
{
HOST_WIDE_INT coef;
double_int best_mult, amult, amult_neg;
@ -459,7 +460,7 @@ most_expensive_mult_to_index (struct mem_address *parts, aff_tree *addr)
|| !multiplier_allowed_in_address_p (coef, Pmode))
continue;
acost = multiply_by_cost (coef, Pmode);
acost = multiply_by_cost (coef, Pmode, speed);
if (acost > best_mult_cost)
{
@ -512,7 +513,7 @@ most_expensive_mult_to_index (struct mem_address *parts, aff_tree *addr)
addressing modes is useless. */
static void
addr_to_parts (aff_tree *addr, struct mem_address *parts)
addr_to_parts (aff_tree *addr, struct mem_address *parts, bool speed)
{
tree part;
unsigned i;
@ -532,7 +533,7 @@ addr_to_parts (aff_tree *addr, struct mem_address *parts)
/* First move the most expensive feasible multiplication
to index. */
most_expensive_mult_to_index (parts, addr);
most_expensive_mult_to_index (parts, addr, speed);
/* Try to find a base of the reference. Since at the moment
there is no reliable way how to distinguish between pointer and its
@ -573,13 +574,14 @@ gimplify_mem_ref_parts (gimple_stmt_iterator *gsi, struct mem_address *parts)
of created memory reference. */
tree
create_mem_ref (gimple_stmt_iterator *gsi, tree type, aff_tree *addr)
create_mem_ref (gimple_stmt_iterator *gsi, tree type, aff_tree *addr,
bool speed)
{
tree mem_ref, tmp;
tree atype;
struct mem_address parts;
addr_to_parts (addr, &parts);
addr_to_parts (addr, &parts, speed);
gimplify_mem_ref_parts (gsi, &parts);
mem_ref = create_mem_ref_raw (type, &parts);
if (mem_ref)

View File

@ -219,6 +219,9 @@ struct ivopts_data
/* The currently optimized loop. */
struct loop *current_loop;
/* Are we optimizing for speed? */
bool speed;
/* Number of registers used in it. */
unsigned regs_used;
@ -2539,7 +2542,7 @@ get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
/* Returns estimate on cost of computing SEQ. */
static unsigned
seq_cost (rtx seq)
seq_cost (rtx seq, bool speed)
{
unsigned cost = 0;
rtx set;
@ -2548,7 +2551,7 @@ seq_cost (rtx seq)
{
set = single_set (seq);
if (set)
cost += rtx_cost (set, SET);
cost += rtx_cost (set, SET,speed);
else
cost++;
}
@ -2641,23 +2644,28 @@ prepare_decl_rtl (tree *expr_p, int *ws, void *data)
/* Determines cost of the computation of EXPR. */
static unsigned
computation_cost (tree expr)
computation_cost (tree expr, bool speed)
{
rtx seq, rslt;
tree type = TREE_TYPE (expr);
unsigned cost;
/* Avoid using hard regs in ways which may be unsupported. */
int regno = LAST_VIRTUAL_REGISTER + 1;
enum function_frequency real_frequency = cfun->function_frequency;
cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
crtl->maybe_hot_insn_p = speed;
walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
start_sequence ();
rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
seq = get_insns ();
end_sequence ();
default_rtl_profile ();
cfun->function_frequency = real_frequency;
cost = seq_cost (seq);
cost = seq_cost (seq, speed);
if (MEM_P (rslt))
cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
return cost;
}
@ -2833,7 +2841,7 @@ get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
/* Returns cost of addition in MODE. */
static unsigned
add_cost (enum machine_mode mode)
add_cost (enum machine_mode mode, bool speed)
{
static unsigned costs[NUM_MACHINE_MODES];
rtx seq;
@ -2850,7 +2858,7 @@ add_cost (enum machine_mode mode)
seq = get_insns ();
end_sequence ();
cost = seq_cost (seq);
cost = seq_cost (seq, speed);
if (!cost)
cost = 1;
@ -2895,7 +2903,7 @@ mbc_entry_eq (const void *entry1, const void *entry2)
/* Returns cost of multiplication by constant CST in MODE. */
unsigned
multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
{
static htab_t costs;
struct mbc_entry **cached, act;
@ -2921,7 +2929,7 @@ multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
seq = get_insns ();
end_sequence ();
cost = seq_cost (seq);
cost = seq_cost (seq, speed);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
@ -2984,7 +2992,8 @@ multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
static comp_cost
get_address_cost (bool symbol_present, bool var_present,
unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
enum machine_mode mem_mode)
enum machine_mode mem_mode,
bool speed)
{
static bool initialized[MAX_MACHINE_MODE];
static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
@ -3100,8 +3109,8 @@ get_address_cost (bool symbol_present, bool var_present,
seq = get_insns ();
end_sequence ();
acost = seq_cost (seq);
acost += address_cost (addr, mem_mode);
acost = seq_cost (seq, speed);
acost += address_cost (addr, mem_mode, speed);
if (!acost)
acost = 1;
@ -3120,7 +3129,7 @@ get_address_cost (bool symbol_present, bool var_present,
If VAR_PRESENT is true, try whether the mode with
SYMBOL_PRESENT = false is cheaper even with cost of addition, and
if this is the case, use it. */
add_c = add_cost (Pmode);
add_c = add_cost (Pmode, speed);
for (i = 0; i < 8; i++)
{
var_p = i & 1;
@ -3178,10 +3187,10 @@ get_address_cost (bool symbol_present, bool var_present,
&& multiplier_allowed_in_address_p (ratio, mem_mode));
if (ratio != 1 && !ratio_p)
cost += multiply_by_cost (ratio, Pmode);
cost += multiply_by_cost (ratio, Pmode, speed);
if (s_offset && !offset_p && !symbol_present)
cost += add_cost (Pmode);
cost += add_cost (Pmode, speed);
acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
@ -3191,12 +3200,12 @@ get_address_cost (bool symbol_present, bool var_present,
/* Estimates cost of forcing expression EXPR into a variable. */
static comp_cost
force_expr_to_var_cost (tree expr)
force_expr_to_var_cost (tree expr, bool speed)
{
static bool costs_initialized = false;
static unsigned integer_cost;
static unsigned symbol_cost;
static unsigned address_cost;
static unsigned integer_cost [2];
static unsigned symbol_cost [2];
static unsigned address_cost [2];
tree op0, op1;
comp_cost cost0, cost1, cost;
enum machine_mode mode;
@ -3206,30 +3215,36 @@ force_expr_to_var_cost (tree expr)
tree type = build_pointer_type (integer_type_node);
tree var, addr;
rtx x;
int i;
var = create_tmp_var_raw (integer_type_node, "test_var");
TREE_STATIC (var) = 1;
x = produce_memory_decl_rtl (var, NULL);
SET_DECL_RTL (var, x);
integer_cost = computation_cost (build_int_cst (integer_type_node,
2000));
addr = build1 (ADDR_EXPR, type, var);
symbol_cost = computation_cost (addr) + 1;
address_cost
= computation_cost (build2 (POINTER_PLUS_EXPR, type,
addr,
build_int_cst (sizetype, 2000))) + 1;
if (dump_file && (dump_flags & TDF_DETAILS))
for (i = 0; i < 2; i++)
{
fprintf (dump_file, "force_expr_to_var_cost:\n");
fprintf (dump_file, " integer %d\n", (int) integer_cost);
fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
fprintf (dump_file, " address %d\n", (int) address_cost);
fprintf (dump_file, " other %d\n", (int) target_spill_cost);
fprintf (dump_file, "\n");
integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
2000), i);
symbol_cost[i] = computation_cost (addr, i) + 1;
address_cost[i]
= computation_cost (build2 (POINTER_PLUS_EXPR, type,
addr,
build_int_cst (sizetype, 2000)), i) + 1;
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
fprintf (dump_file, " address %d\n", (int) address_cost[i]);
fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
fprintf (dump_file, "\n");
}
}
costs_initialized = true;
@ -3243,7 +3258,7 @@ force_expr_to_var_cost (tree expr)
if (is_gimple_min_invariant (expr))
{
if (TREE_CODE (expr) == INTEGER_CST)
return new_cost (integer_cost, 0);
return new_cost (integer_cost [speed], 0);
if (TREE_CODE (expr) == ADDR_EXPR)
{
@ -3252,10 +3267,10 @@ force_expr_to_var_cost (tree expr)
if (TREE_CODE (obj) == VAR_DECL
|| TREE_CODE (obj) == PARM_DECL
|| TREE_CODE (obj) == RESULT_DECL)
return new_cost (symbol_cost, 0);
return new_cost (symbol_cost [speed], 0);
}
return new_cost (address_cost, 0);
return new_cost (address_cost [speed], 0);
}
switch (TREE_CODE (expr))
@ -3272,18 +3287,18 @@ force_expr_to_var_cost (tree expr)
if (is_gimple_val (op0))
cost0 = zero_cost;
else
cost0 = force_expr_to_var_cost (op0);
cost0 = force_expr_to_var_cost (op0, speed);
if (is_gimple_val (op1))
cost1 = zero_cost;
else
cost1 = force_expr_to_var_cost (op1);
cost1 = force_expr_to_var_cost (op1, speed);
break;
default:
/* Just an arbitrary value, FIXME. */
return new_cost (target_spill_cost, 0);
return new_cost (target_spill_cost[speed], 0);
}
mode = TYPE_MODE (TREE_TYPE (expr));
@ -3292,16 +3307,16 @@ force_expr_to_var_cost (tree expr)
case POINTER_PLUS_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
cost = new_cost (add_cost (mode), 0);
cost = new_cost (add_cost (mode, speed), 0);
break;
case MULT_EXPR:
if (cst_and_fits_in_hwi (op0))
cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
else if (cst_and_fits_in_hwi (op1))
cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
else if (cst_and_fits_in_hwi (op1))
cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
else
return new_cost (target_spill_cost, 0);
return new_cost (target_spill_cost [speed], 0);
break;
default:
@ -3315,8 +3330,8 @@ force_expr_to_var_cost (tree expr)
computations often are either loop invariant or at least can
be shared between several iv uses, so letting this grow without
limits would not give reasonable results. */
if (cost.cost > target_spill_cost)
cost.cost = target_spill_cost;
if (cost.cost > target_spill_cost [speed])
cost.cost = target_spill_cost [speed];
return cost;
}
@ -3334,7 +3349,7 @@ force_var_cost (struct ivopts_data *data,
walk_tree (&expr, find_depends, depends_on, NULL);
}
return force_expr_to_var_cost (expr);
return force_expr_to_var_cost (expr, data->speed);
}
/* Estimates cost of expressing address ADDR as var + symbol + offset. The
@ -3365,7 +3380,7 @@ split_address_cost (struct ivopts_data *data,
*var_present = true;
fd_ivopts_data = data;
walk_tree (&addr, find_depends, depends_on, NULL);
return new_cost (target_spill_cost, 0);
return new_cost (target_spill_cost[data->speed], 0);
}
*offset += bitpos / BITS_PER_UNIT;
@ -3395,6 +3410,7 @@ ptr_difference_cost (struct ivopts_data *data,
{
HOST_WIDE_INT diff = 0;
comp_cost cost;
bool speed = optimize_loop_for_speed_p (data->current_loop);
gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
@ -3415,7 +3431,7 @@ ptr_difference_cost (struct ivopts_data *data,
cost = force_var_cost (data, e1, depends_on);
cost = add_costs (cost, force_var_cost (data, e2, depends_on));
cost.cost += add_cost (Pmode);
cost.cost += add_cost (Pmode, speed);
return cost;
}
@ -3459,14 +3475,14 @@ difference_cost (struct ivopts_data *data,
if (integer_zerop (e1))
{
cost = force_var_cost (data, e2, depends_on);
cost.cost += multiply_by_cost (-1, mode);
cost.cost += multiply_by_cost (-1, mode, data->speed);
return cost;
}
cost = force_var_cost (data, e1, depends_on);
cost = add_costs (cost, force_var_cost (data, e2, depends_on));
cost.cost += add_cost (mode);
cost.cost += add_cost (mode, data->speed);
return cost;
}
@ -3491,6 +3507,7 @@ get_computation_cost_at (struct ivopts_data *data,
comp_cost cost;
unsigned n_sums;
double_int rat;
bool speed = optimize_bb_for_speed_p (gimple_bb (at));
*depends_on = NULL;
@ -3571,7 +3588,7 @@ get_computation_cost_at (struct ivopts_data *data,
else
{
cost = force_var_cost (data, cbase, depends_on);
cost.cost += add_cost (TYPE_MODE (ctype));
cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
cost = add_costs (cost,
difference_cost (data,
ubase, build_int_cst (utype, 0),
@ -3590,20 +3607,20 @@ get_computation_cost_at (struct ivopts_data *data,
if (address_p)
return add_costs (cost, get_address_cost (symbol_present, var_present,
offset, ratio,
TYPE_MODE (TREE_TYPE (*use->op_p))));
TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
/* Otherwise estimate the costs for computing the expression. */
aratio = ratio > 0 ? ratio : -ratio;
if (!symbol_present && !var_present && !offset)
{
if (ratio != 1)
cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
return cost;
}
if (aratio != 1)
cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
n_sums = 1;
if (var_present
@ -3616,7 +3633,7 @@ get_computation_cost_at (struct ivopts_data *data,
if (offset)
cost.complexity++;
cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
return cost;
fallback:
@ -3630,7 +3647,7 @@ fallback:
if (address_p)
comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
return new_cost (computation_cost (comp), 0);
return new_cost (computation_cost (comp, speed), 0);
}
}
@ -4008,7 +4025,7 @@ determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
base = cand->iv->base;
cost_base = force_var_cost (data, base, NULL);
cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
@ -4062,7 +4079,7 @@ ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
{
/* We add size to the cost, so that we prefer eliminating ivs
if possible. */
return size + estimate_reg_pressure_cost (size, data->regs_used);
return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
}
/* For each size of the induction variable set determine the penalty. */
@ -4101,8 +4118,8 @@ determine_set_costs (struct ivopts_data *data)
{
fprintf (dump_file, "Global costs:\n");
fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
}
n = 0;
@ -5255,7 +5272,7 @@ rewrite_use_address (struct ivopts_data *data,
gcc_assert (ok);
unshare_aff_combination (&aff);
ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
copy_ref_info (ref, *use->op_p);
*use->op_p = ref;
}
@ -5469,6 +5486,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
gcc_assert (!data->niters);
data->current_loop = loop;
data->speed = optimize_loop_for_speed_p (loop);
if (dump_file && (dump_flags & TDF_DETAILS))
{