From f40751dd3417bc2b10d85a8f6afa3771c6de7101 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sun, 31 Aug 2008 11:44:25 +0200 Subject: [PATCH] fwprop.c (should_replace_address): Add speed attribute. * fwprop.c (should_replace_address): Add speed attribute. (PR_OPTIMIZE_FOR_SPEED): New flag. (propagate_rtx_1): Use it. (propagate_rtx): Set it. (try_fwprop_subst): Update call of rtx_costs. (forward_propagate_and_simplify): LIkewise. * hooks.c (hook_int_rtx_bool_0): New (hook_bool_rtx_int_int_intp_false): Replace by ... (hook_bool_rtx_int_int_intp_bool_false): .. thisone. * hooks.h (hook_int_rtx_bool_0): New (hook_bool_rtx_int_int_intp_false): Replace by ... (hook_bool_rtx_int_int_intp_bool_false): .. thisone. * optabs.c (avoid_expensive_constant): UPdate call of rtx_cost. (prepare_cmp_insn): UPdate call of rtx_cost. * postreload.c (reload_cse_simplify_set): Update call of rtx_cost. (reload_cse_simplify_operands): Update call of rtx_cost. (reload_cse_move2add): call of rtx_cost. * target.h (struct gcc_target): Update rtx_costs and address_costs. * rtlanal.c (rtx_cost): Add speed argument. (address_cost): Add speed argument (default_address_cost): Likewise. (insn_rtx_cost): Likewise. * cfgloopanal.c (seq_cost): Add speed argument. (target_reg_cost, target_spill_cost): Turn to array. (init_set_costs): Update for speed. (estimate_reg_pressure_cost): Add speed argument. * auto-inc-dec.c (attempt_change): Update call of rtx_cost. * dojump.c (prefer_and_bit_test): UPdate call of rtx_cost. * tree-ssa-loop-ivopts.c (struct ivopts_data): New field speed. (seq_cost): Add speed argument. (computation_cost): Add speed arugment. (add_cost, multiply_by_const, get_address_cost): add speed argument. (force_expr_to_var_cost): Update for profile info. (force_var_cost): Likewise. (split_address_cost): Likewise. (ptr_difference_cost): Likewise. (difference_cost): Likewise. (get_computation_cost_at): Likewise. (determine_iv_cost): Likewise. (ivopts_global_cost_for_size): Likewise. (rewrite_use_address): Likewise. (tree_ssa_iv_optimize_loop): Initialize speed field. * cse.c (optimize_this_for_speed_p): New static var. (notreg_cost): Update call of rtx_cost. (cse_extended_basic_block): set optimize_this_for_speed_p. * ifcvt.c (cheap_bb_rtx_cost_p): Update call of rtx_cost. (noce_try_cmove_arith): Likewise. (noce_try_sign_mask): LIkewise. * expr.c (compress_float_constant): Update rtx_cost calls. * tree-ssa-address.c (most_expensive_mult_to_index): Add speed argument. (addr_to_parts): Likewise. (create_mem_ref): Likewise. * dse.c (find_shift_sequence): Add speed argument. (replace_read): Update call. * calls.c (precompute_register_parameters): Update call of rtx_cost. * expmed.c (sdiv_pow2_cheap, smod_pow2_cheap, zero_cost, add_cost, * neg_cost, shift_cost, shiftadd_cost, shiftsub_cost, mul_cost, sdiv_cost, udiv_cost ,mul_widen_cost, mul_highpart_cost): Increase dimension. (init_expmed): Initialize for both size and speed. (expand_shift): Use profile. (synth_mult): Use profile. (choose_mult_variant): Use profile. (expand_mult): Use profile. (expand_mult_highpart_optab): Use profile. (expand_mult_highpart): Use profile. (expand_smod_pow2): Use profile. (expand_divmod): Use profile. * simplify-rtx.c (simplify_binary_operation_1): Update call of rtx_cost. * loop-invariant.c (create_new_invariant): Use profile. (gain_for_invariant): Add speed parameter. (best_gain_for_invariant): Likewise. (find_invariants_to_move): Likewise. (move_single_loop_invariants): Set it. * target-def.h (TARGET_RTX_COSTS): Use hook. * rtl.h (rtx_cost, address_cost, insn_rtx_cost): Update prototpe. (optimize_insn_for_size_p, optimize_insn_for_speed_p): Declare. * output.h (default_address_cost): Update prototype. * combine.c (optimize_this_for_speed_p): New static var. (combine_validate_cost): Update call of rtx_cost. (combine_instructions): Set optimize_this_for_speed_p. (expand_compound_operation): Update call of rtx_cost. (make_extraction):Update call of rtx_cost. (force_to_mode):Update call of rtx_cost. (distribute_and_simplify_rtx):Update call of rtx_cost. * cfgloop.h (target_reg_cost, target_spill_cost): Turn to array. (estimate_reg_pressure_cost): Update prototype. * tree-flow.h (multiply_by_cost, create_mem_ref): Update prototype. * basic-block.h (optimize_insn_for_size_p, optimize_insn_for_speed_p): Remove. * config/alpha/alpha.c (alpha_rtx_costs): Update. (alpha_rtx_costs): Update. * config/frv/frv.c (frv_rtx_costs): Update. * config/s390/s390.c (s390_rtx_costs): Update. * config/m32c/m32c.c (m32c_memory_move_cost): Update. (m32c_rtx_costs): Update. * config/spu/spu.c (TARGET_ADDRESS_COST): Upate. (spu_rtx_costs): Update. * config/sparc/sparc.c (sparc_rtx_costs): Update. * config/m32r/m32r.c (m32r_rtx_costs): Update. * config/i386/i386.c (:ix86_address_cost): Update. (ix86_rtx_costs): Update. * config/sh/sh.c (sh_rtx_costs, sh_address_cost): Update. * config/pdp11/pdp11.c (pdp11_rtx_costs): Update. * config/avr/avr.c (avr_rtx_costs, avr_address_cost): Update. * config/crx/crx.c (crx_address_cost): Update. * config/xtensa/xtensa.c (xtensa_rtx_costs): Update. * config/stormy16/stormy16.c (xstormy16_address_cost, xstormy16_rtx_costs): Update. * config/m68hc11/m68hc11.c (m68hc11_address_cost, m68hc11_rtx_costs): Update. * config/cris/cris.c (cris_rtx_costs, cris_address_cost): Update. * config/iq2000/iq2000.c (iq2000_rtx_costs, iq2000_address_cost): Update. * config/mn10300/mn10300.c (mn10300_address_cost, mn10300_rtx_costs): Update * config/ia64/ia64.c (ia64_rtx_costs): Update. * config/m68k/m68k.c (m68k_rtx_costs): Update. * config/rs6000/rs6000.c (rs6000_rtx_costs): Update. * config/arc/arc.c (arc_rtx_costs, arc_address_cost): Update. * config/mcore/mcore.c (TARGET_ADDRESS_COST): Update. (mcore_rtx_costs): update. * config/score/score3.c (score3_rtx_costs): Update. * config/score/score7.c (score7_rtx_costs): Update. * config/score/score3.h (score3_rtx_costs):Update. * config/score/score7.h (score7_rtx_costs): Update. * config/score/score.c (score_rtx_costs): Update. * config/arm/arm.c (arm_address_cost): Update. (arm_rtx_costs_1): Update. (arm_rtx_costs_1): Update. (arm_size_rtx_costs): Update. (arm_size_rtx_costs): Update. (arm_size_rtx_costs): Update. (arm_xscale_rtx_costs): Update. (arm_thumb_address_cost): Update. * config/pa/pa.c (hppa_address_cost): Update. * config/mips/mips.c (mips_rtx_costs): Update. * config/vax/vax.c (vax_address_cost): Update. * config/h8300/h8300.c (h8300_shift_costs): Update. (h8300_rtx_costs): Update. * config/v850/v850.c (TARGET_ADDRESS_COST): Update. (v850_rtx_costs): Update. * config/mmix/mmix.c (mmix_rtx_costs, mmix_rtx_costs): Update. * config/bfin/bfin.c (bfin_address_cost): Update. (bfin_rtx_costs): Update. * stmt.c (lshift_cheap_p): Update. From-SVN: r139821 --- gcc/ChangeLog | 148 ++++++++++++++++++ gcc/auto-inc-dec.c | 7 +- gcc/basic-block.h | 2 - gcc/calls.c | 3 +- gcc/cfgloop.h | 6 +- gcc/cfgloopanal.c | 56 +++---- gcc/combine.c | 29 ++-- gcc/config/alpha/alpha.c | 15 +- gcc/config/arc/arc.c | 9 +- gcc/config/arm/arm.c | 36 +++-- gcc/config/avr/avr.c | 78 +++++----- gcc/config/bfin/bfin.c | 30 ++-- gcc/config/cris/cris.c | 11 +- gcc/config/crx/crx.c | 4 +- gcc/config/frv/frv.c | 3 +- gcc/config/h8300/h8300.c | 8 +- gcc/config/i386/i386.c | 111 +++++++------- gcc/config/ia64/ia64.c | 5 +- gcc/config/iq2000/iq2000.c | 8 +- gcc/config/m32c/m32c.c | 5 +- gcc/config/m32r/m32r.c | 7 +- gcc/config/m68hc11/m68hc11.c | 29 ++-- gcc/config/m68k/m68k.c | 5 +- gcc/config/mcore/mcore.c | 5 +- gcc/config/mips/mips.c | 37 ++--- gcc/config/mmix/mmix.c | 7 +- gcc/config/mn10300/mn10300.c | 10 +- gcc/config/pa/pa.c | 10 +- gcc/config/pdp11/pdp11.c | 5 +- gcc/config/rs6000/rs6000.c | 11 +- gcc/config/s390/s390.c | 11 +- gcc/config/score/score-protos.h | 2 +- gcc/config/score/score.c | 6 +- gcc/config/score/score3.c | 5 +- gcc/config/score/score3.h | 2 +- gcc/config/score/score7.c | 6 +- gcc/config/score/score7.h | 2 +- gcc/config/sh/sh.c | 12 +- gcc/config/sparc/sparc.c | 7 +- gcc/config/spu/spu.c | 5 +- gcc/config/stormy16/stormy16.c | 8 +- gcc/config/v850/v850.c | 6 +- gcc/config/vax/vax.c | 9 +- gcc/config/xtensa/xtensa.c | 7 +- gcc/cse.c | 4 +- gcc/dojump.c | 4 +- gcc/dse.c | 8 +- gcc/expmed.c | 255 +++++++++++++++++--------------- gcc/expr.c | 9 +- gcc/fwprop.c | 29 ++-- gcc/hooks.c | 15 +- gcc/hooks.h | 3 +- gcc/ifcvt.c | 12 +- gcc/loop-doloop.c | 3 +- gcc/loop-invariant.c | 22 +-- gcc/optabs.c | 9 +- gcc/output.h | 2 +- gcc/postreload.c | 24 +-- gcc/rtl.h | 8 +- gcc/rtlanal.c | 30 ++-- gcc/simplify-rtx.c | 6 +- gcc/stmt.c | 3 +- gcc/target-def.h | 2 +- gcc/target.h | 4 +- gcc/tree-flow.h | 4 +- gcc/tree-ssa-address.c | 14 +- gcc/tree-ssa-loop-ivopts.c | 144 ++++++++++-------- 67 files changed, 848 insertions(+), 554 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9d386fc0d9e..742e28abe1f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,151 @@ +2008-08-30 Jan Hubicka + + * fwprop.c (should_replace_address): Add speed attribute. + (PR_OPTIMIZE_FOR_SPEED): New flag. + (propagate_rtx_1): Use it. + (propagate_rtx): Set it. + (try_fwprop_subst): Update call of rtx_costs. + (forward_propagate_and_simplify): LIkewise. + * hooks.c (hook_int_rtx_bool_0): New + (hook_bool_rtx_int_int_intp_false): Replace by ... + (hook_bool_rtx_int_int_intp_bool_false): .. thisone. + * hooks.h (hook_int_rtx_bool_0): New + (hook_bool_rtx_int_int_intp_false): Replace by ... + (hook_bool_rtx_int_int_intp_bool_false): .. thisone. + * optabs.c (avoid_expensive_constant): UPdate call of rtx_cost. + (prepare_cmp_insn): UPdate call of rtx_cost. + * postreload.c (reload_cse_simplify_set): Update call of rtx_cost. + (reload_cse_simplify_operands): Update call of rtx_cost. + (reload_cse_move2add): call of rtx_cost. + * target.h (struct gcc_target): Update rtx_costs and address_costs. + * rtlanal.c (rtx_cost): Add speed argument. + (address_cost): Add speed argument + (default_address_cost): Likewise. + (insn_rtx_cost): Likewise. + * cfgloopanal.c (seq_cost): Add speed argument. + (target_reg_cost, target_spill_cost): Turn to array. + (init_set_costs): Update for speed. + (estimate_reg_pressure_cost): Add speed argument. + * auto-inc-dec.c (attempt_change): Update call of rtx_cost. + * dojump.c (prefer_and_bit_test): UPdate call of rtx_cost. + * tree-ssa-loop-ivopts.c (struct ivopts_data): New field speed. + (seq_cost): Add speed argument. + (computation_cost): Add speed arugment. + (add_cost, multiply_by_const, get_address_cost): add speed argument. + (force_expr_to_var_cost): Update for profile info. + (force_var_cost): Likewise. + (split_address_cost): Likewise. + (ptr_difference_cost): Likewise. + (difference_cost): Likewise. + (get_computation_cost_at): Likewise. + (determine_iv_cost): Likewise. + (ivopts_global_cost_for_size): Likewise. + (rewrite_use_address): Likewise. + (tree_ssa_iv_optimize_loop): Initialize speed field. + * cse.c (optimize_this_for_speed_p): New static var. + (notreg_cost): Update call of rtx_cost. + (cse_extended_basic_block): set optimize_this_for_speed_p. + * ifcvt.c (cheap_bb_rtx_cost_p): Update call of rtx_cost. + (noce_try_cmove_arith): Likewise. + (noce_try_sign_mask): LIkewise. + * expr.c (compress_float_constant): Update rtx_cost calls. + * tree-ssa-address.c (most_expensive_mult_to_index): Add speed argument. + (addr_to_parts): Likewise. + (create_mem_ref): Likewise. + * dse.c (find_shift_sequence): Add speed argument. + (replace_read): Update call. + * calls.c (precompute_register_parameters): Update call of rtx_cost. + * expmed.c (sdiv_pow2_cheap, smod_pow2_cheap, zero_cost, add_cost, + * neg_cost, shift_cost, shiftadd_cost, + shiftsub_cost, mul_cost, sdiv_cost, udiv_cost ,mul_widen_cost, + mul_highpart_cost): Increase dimension. + (init_expmed): Initialize for both size and speed. + (expand_shift): Use profile. + (synth_mult): Use profile. + (choose_mult_variant): Use profile. + (expand_mult): Use profile. + (expand_mult_highpart_optab): Use profile. + (expand_mult_highpart): Use profile. + (expand_smod_pow2): Use profile. + (expand_divmod): Use profile. + * simplify-rtx.c (simplify_binary_operation_1): Update call of rtx_cost. + * loop-invariant.c (create_new_invariant): Use profile. + (gain_for_invariant): Add speed parameter. + (best_gain_for_invariant): Likewise. + (find_invariants_to_move): Likewise. + (move_single_loop_invariants): Set it. + * target-def.h (TARGET_RTX_COSTS): Use hook. + * rtl.h (rtx_cost, address_cost, insn_rtx_cost): Update prototpe. + (optimize_insn_for_size_p, optimize_insn_for_speed_p): Declare. + * output.h (default_address_cost): Update prototype. + * combine.c (optimize_this_for_speed_p): New static var. + (combine_validate_cost): Update call of rtx_cost. + (combine_instructions): Set optimize_this_for_speed_p. + (expand_compound_operation): Update call of rtx_cost. + (make_extraction):Update call of rtx_cost. + (force_to_mode):Update call of rtx_cost. + (distribute_and_simplify_rtx):Update call of rtx_cost. + * cfgloop.h (target_reg_cost, target_spill_cost): Turn to array. + (estimate_reg_pressure_cost): Update prototype. + * tree-flow.h (multiply_by_cost, create_mem_ref): Update prototype. + * basic-block.h (optimize_insn_for_size_p, optimize_insn_for_speed_p): + Remove. + * config/alpha/alpha.c (alpha_rtx_costs): Update. + (alpha_rtx_costs): Update. + * config/frv/frv.c (frv_rtx_costs): Update. + * config/s390/s390.c (s390_rtx_costs): Update. + * config/m32c/m32c.c (m32c_memory_move_cost): Update. + (m32c_rtx_costs): Update. + * config/spu/spu.c (TARGET_ADDRESS_COST): Upate. + (spu_rtx_costs): Update. + * config/sparc/sparc.c (sparc_rtx_costs): Update. + * config/m32r/m32r.c (m32r_rtx_costs): Update. + * config/i386/i386.c (:ix86_address_cost): Update. + (ix86_rtx_costs): Update. + * config/sh/sh.c (sh_rtx_costs, sh_address_cost): Update. + * config/pdp11/pdp11.c (pdp11_rtx_costs): Update. + * config/avr/avr.c (avr_rtx_costs, avr_address_cost): Update. + * config/crx/crx.c (crx_address_cost): Update. + * config/xtensa/xtensa.c (xtensa_rtx_costs): Update. + * config/stormy16/stormy16.c + (xstormy16_address_cost, xstormy16_rtx_costs): Update. + * config/m68hc11/m68hc11.c + (m68hc11_address_cost, m68hc11_rtx_costs): Update. + * config/cris/cris.c (cris_rtx_costs, cris_address_cost): Update. + * config/iq2000/iq2000.c (iq2000_rtx_costs, iq2000_address_cost): Update. + * config/mn10300/mn10300.c (mn10300_address_cost, mn10300_rtx_costs): Update + * config/ia64/ia64.c (ia64_rtx_costs): Update. + * config/m68k/m68k.c (m68k_rtx_costs): Update. + * config/rs6000/rs6000.c (rs6000_rtx_costs): Update. + * config/arc/arc.c (arc_rtx_costs, arc_address_cost): Update. + * config/mcore/mcore.c (TARGET_ADDRESS_COST): Update. + (mcore_rtx_costs): update. + * config/score/score3.c (score3_rtx_costs): Update. + * config/score/score7.c (score7_rtx_costs): Update. + * config/score/score3.h (score3_rtx_costs):Update. + * config/score/score7.h (score7_rtx_costs): Update. + * config/score/score.c (score_rtx_costs): Update. + * config/arm/arm.c (arm_address_cost): Update. + (arm_rtx_costs_1): Update. + (arm_rtx_costs_1): Update. + (arm_size_rtx_costs): Update. + (arm_size_rtx_costs): Update. + (arm_size_rtx_costs): Update. + (arm_xscale_rtx_costs): Update. + (arm_thumb_address_cost): Update. + * config/pa/pa.c (hppa_address_cost): Update. + * config/mips/mips.c (mips_rtx_costs): Update. + * config/vax/vax.c (vax_address_cost): Update. + * config/h8300/h8300.c (h8300_shift_costs): Update. + (h8300_rtx_costs): Update. + * config/v850/v850.c (TARGET_ADDRESS_COST): Update. + (v850_rtx_costs): Update. + * config/mmix/mmix.c (mmix_rtx_costs, mmix_rtx_costs): Update. + * config/bfin/bfin.c + (bfin_address_cost): Update. + (bfin_rtx_costs): Update. + * stmt.c (lshift_cheap_p): Update. + 2008-08-30 Andrew Pinski PR middle-end/36444 diff --git a/gcc/auto-inc-dec.c b/gcc/auto-inc-dec.c index 35d6c87a696..6db9c7ef1f4 100644 --- a/gcc/auto-inc-dec.c +++ b/gcc/auto-inc-dec.c @@ -515,13 +515,14 @@ attempt_change (rtx new_addr, rtx inc_reg) rtx new_mem; int old_cost = 0; int new_cost = 0; + bool speed = optimize_bb_for_speed_p (bb); PUT_MODE (mem_tmp, mode); XEXP (mem_tmp, 0) = new_addr; - old_cost = rtx_cost (mem, 0) - + rtx_cost (PATTERN (inc_insn.insn), 0); - new_cost = rtx_cost (mem_tmp, 0); + old_cost = rtx_cost (mem, 0, speed) + + rtx_cost (PATTERN (inc_insn.insn), 0, speed); + new_cost = rtx_cost (mem_tmp, 0, speed); /* The first item of business is to see if this is profitable. */ if (old_cost < new_cost) diff --git a/gcc/basic-block.h b/gcc/basic-block.h index e1d13ea27c6..59a6f4aca9c 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -835,8 +835,6 @@ extern bool optimize_bb_for_size_p (const_basic_block); extern bool optimize_bb_for_speed_p (const_basic_block); extern bool optimize_edge_for_size_p (edge); extern bool optimize_edge_for_speed_p (edge); -extern bool optimize_insn_for_size_p (void); -extern bool optimize_insn_for_speed_p (void); extern bool optimize_function_for_size_p (struct function *); extern bool optimize_function_for_speed_p (struct function *); extern bool optimize_loop_for_size_p (struct loop *); diff --git a/gcc/calls.c b/gcc/calls.c index 3f322445560..96d05eaf502 100644 --- a/gcc/calls.c +++ b/gcc/calls.c @@ -713,7 +713,8 @@ precompute_register_parameters (int num_actuals, struct arg_data *args, || (GET_CODE (args[i].value) == SUBREG && REG_P (SUBREG_REG (args[i].value))))) && args[i].mode != BLKmode - && rtx_cost (args[i].value, SET) > COSTS_N_INSNS (1) + && rtx_cost (args[i].value, SET, optimize_insn_for_speed_p ()) + > COSTS_N_INSNS (1) && ((SMALL_REGISTER_CLASSES && *reg_parm_seen) || optimize)) args[i].value = copy_to_mode_reg (args[i].mode, args[i].value); diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index d21d50bebdd..842ebb5d5a4 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -610,12 +610,12 @@ fel_init (loop_iterator *li, loop_p *loop, unsigned flags) extern unsigned target_avail_regs; extern unsigned target_res_regs; -extern unsigned target_reg_cost; -extern unsigned target_spill_cost; +extern unsigned target_reg_cost [2]; +extern unsigned target_spill_cost [2]; /* Register pressure estimation for induction variable optimizations & loop invariant motion. */ -extern unsigned estimate_reg_pressure_cost (unsigned, unsigned); +extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool); extern void init_set_costs (void); /* Loop optimizer initialization. */ diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c index db5bd2a62b9..d59fa2fb505 100644 --- a/gcc/cfgloopanal.c +++ b/gcc/cfgloopanal.c @@ -295,7 +295,7 @@ get_loop_level (const struct loop *loop) /* Returns estimate on cost of computing SEQ. */ static unsigned -seq_cost (const_rtx seq) +seq_cost (const_rtx seq, bool speed) { unsigned cost = 0; rtx set; @@ -304,7 +304,7 @@ seq_cost (const_rtx seq) { set = single_set (seq); if (set) - cost += rtx_cost (set, SET); + cost += rtx_cost (set, SET, speed); else cost++; } @@ -317,10 +317,10 @@ seq_cost (const_rtx seq) unsigned target_avail_regs; /* Number of available registers. */ unsigned target_res_regs; /* Number of registers reserved for temporary expressions. */ -unsigned target_reg_cost; /* The cost for register when there still +unsigned target_reg_cost[2]; /* The cost for register when there still is some reserve, but we are approaching the number of available registers. */ -unsigned target_spill_cost; /* The cost for register when we need +unsigned target_spill_cost[2]; /* The cost for register when we need to spill. */ /* Initialize the constants for computing set costs. */ @@ -328,6 +328,7 @@ unsigned target_spill_cost; /* The cost for register when we need void init_set_costs (void) { + int speed; rtx seq; rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER); rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1); @@ -343,27 +344,32 @@ init_set_costs (void) target_res_regs = 3; - /* Set up the costs for using extra registers: + for (speed = 0; speed < 2; speed++) + { + crtl->maybe_hot_insn_p = speed; + /* Set up the costs for using extra registers: - 1) If not many free registers remain, we should prefer having an - additional move to decreasing the number of available registers. - (TARGET_REG_COST). - 2) If no registers are available, we need to spill, which may require - storing the old value to memory and loading it back - (TARGET_SPILL_COST). */ + 1) If not many free registers remain, we should prefer having an + additional move to decreasing the number of available registers. + (TARGET_REG_COST). + 2) If no registers are available, we need to spill, which may require + storing the old value to memory and loading it back + (TARGET_SPILL_COST). */ - start_sequence (); - emit_move_insn (reg1, reg2); - seq = get_insns (); - end_sequence (); - target_reg_cost = seq_cost (seq); + start_sequence (); + emit_move_insn (reg1, reg2); + seq = get_insns (); + end_sequence (); + target_reg_cost [speed] = seq_cost (seq, speed); - start_sequence (); - emit_move_insn (mem, reg1); - emit_move_insn (reg2, mem); - seq = get_insns (); - end_sequence (); - target_spill_cost = seq_cost (seq); + start_sequence (); + emit_move_insn (mem, reg1); + emit_move_insn (reg2, mem); + seq = get_insns (); + end_sequence (); + target_spill_cost [speed] = seq_cost (seq, speed); + } + default_rtl_profile (); } /* Estimates cost of increased register pressure caused by making N_NEW new @@ -371,7 +377,7 @@ init_set_costs (void) around the loop. */ unsigned -estimate_reg_pressure_cost (unsigned n_new, unsigned n_old) +estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed) { unsigned cost; unsigned regs_needed = n_new + n_old; @@ -384,11 +390,11 @@ estimate_reg_pressure_cost (unsigned n_new, unsigned n_old) if (regs_needed <= target_avail_regs) /* If we are close to running out of registers, try to preserve them. */ - cost = target_reg_cost * n_new; + cost = target_reg_cost [speed] * n_new; else /* If we run out of registers, it is very expensive to add another one. */ - cost = target_spill_cost * n_new; + cost = target_spill_cost [speed] * n_new; if (optimize && flag_ira && (flag_ira_algorithm == IRA_ALGORITHM_REGIONAL || flag_ira_algorithm == IRA_ALGORITHM_MIXED) diff --git a/gcc/combine.c b/gcc/combine.c index a39649d7c8e..30ec0809625 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -297,6 +297,7 @@ static rtx added_links_insn; /* Basic block in which we are performing combines. */ static basic_block this_basic_block; +static bool optimize_this_for_speed_p; /* Length of the currently allocated uid_insn_cost array. */ @@ -793,10 +794,10 @@ combine_validate_cost (rtx i1, rtx i2, rtx i3, rtx newpat, rtx newi2pat, } /* Calculate the replacement insn_rtx_costs. */ - new_i3_cost = insn_rtx_cost (newpat); + new_i3_cost = insn_rtx_cost (newpat, optimize_this_for_speed_p); if (newi2pat) { - new_i2_cost = insn_rtx_cost (newi2pat); + new_i2_cost = insn_rtx_cost (newi2pat, optimize_this_for_speed_p); new_cost = (new_i2_cost > 0 && new_i3_cost > 0) ? new_i2_cost + new_i3_cost : 0; } @@ -811,7 +812,7 @@ combine_validate_cost (rtx i1, rtx i2, rtx i3, rtx newpat, rtx newi2pat, int old_other_cost, new_other_cost; old_other_cost = INSN_COST (undobuf.other_insn); - new_other_cost = insn_rtx_cost (newotherpat); + new_other_cost = insn_rtx_cost (newotherpat, optimize_this_for_speed_p); if (old_other_cost > 0 && new_other_cost > 0) { old_cost += old_other_cost; @@ -1068,6 +1069,7 @@ combine_instructions (rtx f, unsigned int nregs) create_log_links (); FOR_EACH_BB (this_basic_block) { + optimize_this_for_speed_p = optimize_bb_for_speed_p (this_basic_block); last_call_luid = 0; mem_last_set = -1; label_tick++; @@ -1090,7 +1092,8 @@ combine_instructions (rtx f, unsigned int nregs) /* Record the current insn_rtx_cost of this instruction. */ if (NONJUMP_INSN_P (insn)) - INSN_COST (insn) = insn_rtx_cost (PATTERN (insn)); + INSN_COST (insn) = insn_rtx_cost (PATTERN (insn), + optimize_this_for_speed_p); if (dump_file) fprintf(dump_file, "insn_cost %d: %d\n", INSN_UID (insn), INSN_COST (insn)); @@ -6107,9 +6110,11 @@ expand_compound_operation (rtx x) rtx temp2 = expand_compound_operation (temp); /* Make sure this is a profitable operation. */ - if (rtx_cost (x, SET) > rtx_cost (temp2, SET)) + if (rtx_cost (x, SET, optimize_this_for_speed_p) + > rtx_cost (temp2, SET, optimize_this_for_speed_p)) return temp2; - else if (rtx_cost (x, SET) > rtx_cost (temp, SET)) + else if (rtx_cost (x, SET, optimize_this_for_speed_p) + > rtx_cost (temp, SET, optimize_this_for_speed_p)) return temp; else return x; @@ -6534,7 +6539,8 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos, /* Prefer ZERO_EXTENSION, since it gives more information to backends. */ - if (rtx_cost (temp, SET) <= rtx_cost (temp1, SET)) + if (rtx_cost (temp, SET, optimize_this_for_speed_p) + <= rtx_cost (temp1, SET, optimize_this_for_speed_p)) return temp; return temp1; } @@ -6728,7 +6734,8 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos, /* Prefer ZERO_EXTENSION, since it gives more information to backends. */ - if (rtx_cost (temp1, SET) < rtx_cost (temp, SET)) + if (rtx_cost (temp1, SET, optimize_this_for_speed_p) + < rtx_cost (temp, SET, optimize_this_for_speed_p)) temp = temp1; } pos_rtx = temp; @@ -7377,7 +7384,8 @@ force_to_mode (rtx x, enum machine_mode mode, unsigned HOST_WIDE_INT mask, y = simplify_gen_binary (AND, GET_MODE (x), XEXP (x, 0), GEN_INT (cval)); - if (rtx_cost (y, SET) < rtx_cost (x, SET)) + if (rtx_cost (y, SET, optimize_this_for_speed_p) + < rtx_cost (x, SET, optimize_this_for_speed_p)) x = y; } @@ -8521,7 +8529,8 @@ distribute_and_simplify_rtx (rtx x, int n) tmp = apply_distributive_law (simplify_gen_binary (inner_code, mode, new_op0, new_op1)); if (GET_CODE (tmp) != outer_code - && rtx_cost (tmp, SET) < rtx_cost (x, SET)) + && rtx_cost (tmp, SET, optimize_this_for_speed_p) + < rtx_cost (x, SET, optimize_this_for_speed_p)) return tmp; return NULL_RTX; diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index cba9370f629..cad90e1adce 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -1260,13 +1260,14 @@ alpha_legitimize_reload_address (rtx x, scanned. In either case, *TOTAL contains the cost result. */ static bool -alpha_rtx_costs (rtx x, int code, int outer_code, int *total) +alpha_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed) { enum machine_mode mode = GET_MODE (x); bool float_mode_p = FLOAT_MODE_P (mode); const struct alpha_rtx_cost_data *cost_data; - if (optimize_size) + if (!speed) cost_data = &alpha_rtx_cost_size; else cost_data = &alpha_rtx_cost_data[alpha_tune]; @@ -1311,7 +1312,7 @@ alpha_rtx_costs (rtx x, int code, int outer_code, int *total) *total = COSTS_N_INSNS (15); else /* Otherwise we do a load from the GOT. */ - *total = COSTS_N_INSNS (optimize_size ? 1 : alpha_memory_latency); + *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); return true; case HIGH: @@ -1326,8 +1327,8 @@ alpha_rtx_costs (rtx x, int code, int outer_code, int *total) else if (GET_CODE (XEXP (x, 0)) == MULT && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) { - *total = (rtx_cost (XEXP (XEXP (x, 0), 0), outer_code) - + rtx_cost (XEXP (x, 1), outer_code) + COSTS_N_INSNS (1)); + *total = (rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed) + + rtx_cost (XEXP (x, 1), outer_code, speed) + COSTS_N_INSNS (1)); return true; } return false; @@ -1375,7 +1376,7 @@ alpha_rtx_costs (rtx x, int code, int outer_code, int *total) return false; case MEM: - *total = COSTS_N_INSNS (optimize_size ? 1 : alpha_memory_latency); + *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); return true; case NEG: @@ -10657,7 +10658,7 @@ alpha_init_libfuncs (void) #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS alpha_rtx_costs #undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index 067d9f7ec79..891b8efefba 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -92,8 +92,8 @@ static void arc_internal_label (FILE *, const char *, unsigned long); static void arc_va_start (tree, rtx); static void arc_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int); -static bool arc_rtx_costs (rtx, int, int, int *); -static int arc_address_cost (rtx); +static bool arc_rtx_costs (rtx, int, int, int *, bool); +static int arc_address_cost (rtx, bool); static void arc_external_libcall (rtx); static bool arc_return_in_memory (const_tree, const_tree); static bool arc_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, @@ -830,7 +830,8 @@ arc_setup_incoming_varargs (CUMULATIVE_ARGS *cum, scanned. In either case, *TOTAL contains the cost result. */ static bool -arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) +arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { @@ -884,7 +885,7 @@ arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) If ADDR is not a valid address, its cost is irrelevant. */ static int -arc_address_cost (rtx addr) +arc_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED) { switch (GET_CODE (addr)) { diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 4de60bb67f1..6a4b3ef11cd 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -131,7 +131,8 @@ static bool arm_slowmul_rtx_costs (rtx, int, int, int *); static bool arm_fastmul_rtx_costs (rtx, int, int, int *); static bool arm_xscale_rtx_costs (rtx, int, int, int *); static bool arm_9e_rtx_costs (rtx, int, int, int *); -static int arm_address_cost (rtx); +static bool arm_rtx_costs (rtx, int, int, int *, bool); +static int arm_address_cost (rtx, bool); static bool arm_memory_load_p (rtx); static bool arm_cirrus_insn_p (rtx); static void cirrus_reorg (rtx); @@ -256,9 +257,8 @@ static bool arm_allocate_stack_slots_for_args (void); #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall -/* This will be overridden in arm_override_options. */ #undef TARGET_RTX_COSTS -#define TARGET_RTX_COSTS arm_slowmul_rtx_costs +#define TARGET_RTX_COSTS arm_rtx_costs #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST arm_address_cost @@ -1185,10 +1185,6 @@ arm_override_options (void) gcc_assert (arm_tune != arm_none); tune_flags = all_cores[(int)arm_tune].flags; - if (optimize_size) - targetm.rtx_costs = arm_size_rtx_costs; - else - targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs; /* Make sure that the processor choice does not conflict with any of the other command line choices. */ @@ -4920,7 +4916,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer) case MINUS: if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2) { - extra_cost = rtx_cost (XEXP (x, 1), code); + extra_cost = rtx_cost (XEXP (x, 1), code, true); if (!REG_OR_SUBREG_REG (XEXP (x, 0))) extra_cost += 4 * ARM_NUM_REGS (mode); return extra_cost; @@ -4969,7 +4965,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer) if (GET_CODE (XEXP (x, 0)) == MULT) { - extra_cost = rtx_cost (XEXP (x, 0), code); + extra_cost = rtx_cost (XEXP (x, 0), code, true); if (!REG_OR_SUBREG_REG (XEXP (x, 1))) extra_cost += 4 * ARM_NUM_REGS (mode); return extra_cost; @@ -5168,7 +5164,7 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total) case ROTATE: if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG) { - *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code); + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false); return true; } /* Fall through */ @@ -5178,15 +5174,15 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total) case ASHIFTRT: if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT) { - *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code); + *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false); return true; } else if (mode == SImode) { - *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code); + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false); /* Slightly disparage register shifts, but not by much. */ if (GET_CODE (XEXP (x, 1)) != CONST_INT) - *total += 1 + rtx_cost (XEXP (x, 1), code); + *total += 1 + rtx_cost (XEXP (x, 1), code, false); return true; } @@ -5354,6 +5350,16 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total) } } +/* RTX costs when optimizing for size. */ +static bool +arm_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed) +{ + if (!speed) + return arm_size_rtx_costs (x, code, outer_code, total); + else + return all_cores[(int)arm_tune].rtx_costs; +} + /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not supported on any "slowmul" cores, so it can be ignored. */ @@ -5546,7 +5552,7 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total) /* A COMPARE of a MULT is slow on XScale; the muls instruction will stall until the multiplication is complete. */ if (GET_CODE (XEXP (x, 0)) == MULT) - *total = 4 + rtx_cost (XEXP (x, 0), code); + *total = 4 + rtx_cost (XEXP (x, 0), code, true); else *total = arm_rtx_costs_1 (x, code, outer_code); return true; @@ -5666,7 +5672,7 @@ arm_thumb_address_cost (rtx x) } static int -arm_address_cost (rtx x) +arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) { return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); } diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index dba224db886..ed8c97729a4 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -81,8 +81,8 @@ static void avr_reorg (void); static void avr_asm_out_ctor (rtx, int); static void avr_asm_out_dtor (rtx, int); static int avr_operand_rtx_cost (rtx, enum machine_mode, enum rtx_code); -static bool avr_rtx_costs (rtx, int, int, int *); -static int avr_address_cost (rtx); +static bool avr_rtx_costs (rtx, int, int, int *, bool); +static int avr_address_cost (rtx, bool); static bool avr_return_in_memory (const_tree, const_tree); static struct machine_function * avr_init_machine_status (void); static rtx avr_builtin_setjmp_frame_value (void); @@ -1562,7 +1562,7 @@ final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED, fprintf (asm_out_file, "/*DEBUG: 0x%x\t\t%d\t%d */\n", INSN_ADDRESSES (uid), INSN_ADDRESSES (uid) - last_insn_address, - rtx_cost (PATTERN (insn), INSN)); + rtx_cost (PATTERN (insn), INSN, !optimize_size)); } last_insn_address = INSN_ADDRESSES (uid); } @@ -4991,7 +4991,8 @@ order_regs_for_local_alloc (void) operand's parent operator. */ static int -avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer) +avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer, + bool speed ATTRIBUTE_UNUSED) { enum rtx_code code = GET_CODE (x); int total; @@ -5021,7 +5022,8 @@ avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer) case, *TOTAL contains the cost result. */ static bool -avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) +avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total, + bool speed) { enum machine_mode mode = GET_MODE (x); HOST_WIDE_INT val; @@ -5153,8 +5155,8 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) { case QImode: if (AVR_HAVE_MUL) - *total = COSTS_N_INSNS (optimize_size ? 3 : 4); - else if (optimize_size) + *total = COSTS_N_INSNS (!speed ? 3 : 4); + else if (!speed) *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); else return false; @@ -5162,8 +5164,8 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case HImode: if (AVR_HAVE_MUL) - *total = COSTS_N_INSNS (optimize_size ? 7 : 10); - else if (optimize_size) + *total = COSTS_N_INSNS (!speed ? 7 : 10); + else if (!speed) *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); else return false; @@ -5180,7 +5182,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case MOD: case UDIV: case UMOD: - if (optimize_size) + if (!speed) *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); else return false; @@ -5194,7 +5196,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case QImode: if (GET_CODE (XEXP (x, 1)) != CONST_INT) { - *total = COSTS_N_INSNS (optimize_size ? 4 : 17); + *total = COSTS_N_INSNS (!speed ? 4 : 17); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } else @@ -5212,7 +5214,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case HImode: if (GET_CODE (XEXP (x, 1)) != CONST_INT) { - *total = COSTS_N_INSNS (optimize_size ? 5 : 41); + *total = COSTS_N_INSNS (!speed ? 5 : 41); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } else @@ -5240,16 +5242,16 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) *total = COSTS_N_INSNS (5); break; case 4: - *total = COSTS_N_INSNS (optimize_size ? 5 : 8); + *total = COSTS_N_INSNS (!speed ? 5 : 8); break; case 6: - *total = COSTS_N_INSNS (optimize_size ? 5 : 9); + *total = COSTS_N_INSNS (ptimize_size ? 5 : 9); break; case 5: - *total = COSTS_N_INSNS (optimize_size ? 5 : 10); + *total = COSTS_N_INSNS (!speed ? 5 : 10); break; default: - *total = COSTS_N_INSNS (optimize_size ? 5 : 41); + *total = COSTS_N_INSNS (!speed ? 5 : 41); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } break; @@ -5257,7 +5259,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case SImode: if (GET_CODE (XEXP (x, 1)) != CONST_INT) { - *total = COSTS_N_INSNS (optimize_size ? 7 : 113); + *total = COSTS_N_INSNS (!speed ? 7 : 113); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } else @@ -5278,10 +5280,10 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) *total = COSTS_N_INSNS (6); break; case 2: - *total = COSTS_N_INSNS (optimize_size ? 7 : 8); + *total = COSTS_N_INSNS (!speed ? 7 : 8); break; default: - *total = COSTS_N_INSNS (optimize_size ? 7 : 113); + *total = COSTS_N_INSNS (!speed ? 7 : 113); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } break; @@ -5298,7 +5300,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case QImode: if (GET_CODE (XEXP (x, 1)) != CONST_INT) { - *total = COSTS_N_INSNS (optimize_size ? 4 : 17); + *total = COSTS_N_INSNS (!speed ? 4 : 17); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } else @@ -5318,7 +5320,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case HImode: if (GET_CODE (XEXP (x, 1)) != CONST_INT) { - *total = COSTS_N_INSNS (optimize_size ? 5 : 41); + *total = COSTS_N_INSNS (!speed ? 5 : 41); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } else @@ -5344,17 +5346,17 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) *total = COSTS_N_INSNS (5); break; case 11: - *total = COSTS_N_INSNS (optimize_size ? 5 : 6); + *total = COSTS_N_INSNS (!speed ? 5 : 6); break; case 12: - *total = COSTS_N_INSNS (optimize_size ? 5 : 7); + *total = COSTS_N_INSNS (!speed ? 5 : 7); break; case 6: case 13: - *total = COSTS_N_INSNS (optimize_size ? 5 : 8); + *total = COSTS_N_INSNS (!speed ? 5 : 8); break; default: - *total = COSTS_N_INSNS (optimize_size ? 5 : 41); + *total = COSTS_N_INSNS (!speed ? 5 : 41); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } break; @@ -5362,7 +5364,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case SImode: if (GET_CODE (XEXP (x, 1)) != CONST_INT) { - *total = COSTS_N_INSNS (optimize_size ? 7 : 113); + *total = COSTS_N_INSNS (!speed ? 7 : 113); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } else @@ -5380,13 +5382,13 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) *total = COSTS_N_INSNS (6); break; case 2: - *total = COSTS_N_INSNS (optimize_size ? 7 : 8); + *total = COSTS_N_INSNS (!speed ? 7 : 8); break; case 31: *total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 5); break; default: - *total = COSTS_N_INSNS (optimize_size ? 7 : 113); + *total = COSTS_N_INSNS (!speed ? 7 : 113); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } break; @@ -5403,7 +5405,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case QImode: if (GET_CODE (XEXP (x, 1)) != CONST_INT) { - *total = COSTS_N_INSNS (optimize_size ? 4 : 17); + *total = COSTS_N_INSNS (!speed ? 4 : 17); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } else @@ -5421,7 +5423,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case HImode: if (GET_CODE (XEXP (x, 1)) != CONST_INT) { - *total = COSTS_N_INSNS (optimize_size ? 5 : 41); + *total = COSTS_N_INSNS (!speed ? 5 : 41); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } else @@ -5450,17 +5452,17 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case 12: case 13: case 14: - *total = COSTS_N_INSNS (optimize_size ? 5 : 6); + *total = COSTS_N_INSNS (!speed ? 5 : 6); break; case 4: - *total = COSTS_N_INSNS (optimize_size ? 5 : 7); + *total = COSTS_N_INSNS (!speed ? 5 : 7); break; case 5: case 6: - *total = COSTS_N_INSNS (optimize_size ? 5 : 9); + *total = COSTS_N_INSNS (!speed ? 5 : 9); break; default: - *total = COSTS_N_INSNS (optimize_size ? 5 : 41); + *total = COSTS_N_INSNS (!speed ? 5 : 41); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } break; @@ -5468,7 +5470,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) case SImode: if (GET_CODE (XEXP (x, 1)) != CONST_INT) { - *total = COSTS_N_INSNS (optimize_size ? 7 : 113); + *total = COSTS_N_INSNS (!speed ? 7 : 113); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } else @@ -5481,7 +5483,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) *total = COSTS_N_INSNS (4); break; case 2: - *total = COSTS_N_INSNS (optimize_size ? 7 : 8); + *total = COSTS_N_INSNS (!speed ? 7 : 8); break; case 8: case 16: @@ -5492,7 +5494,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) *total = COSTS_N_INSNS (6); break; default: - *total = COSTS_N_INSNS (optimize_size ? 7 : 113); + *total = COSTS_N_INSNS (!speed ? 7 : 113); *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code); } break; @@ -5543,7 +5545,7 @@ avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) /* Calculate the cost of a memory address. */ static int -avr_address_cost (rtx x) +avr_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) { if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x,1)) == CONST_INT diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c index 8fda5c01d8e..6ff1093de8c 100644 --- a/gcc/config/bfin/bfin.c +++ b/gcc/config/bfin/bfin.c @@ -1378,7 +1378,7 @@ bfin_dsp_memref_p (rtx x) All addressing modes are equally cheap on the Blackfin. */ static int -bfin_address_cost (rtx addr ATTRIBUTE_UNUSED) +bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) { return 1; } @@ -2871,7 +2871,7 @@ bfin_legitimate_constant_p (rtx x) } static bool -bfin_rtx_costs (rtx x, int code, int outer_code, int *total) +bfin_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed) { int cost2 = COSTS_N_INSNS (1); rtx op0, op1; @@ -2919,19 +2919,19 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total) if (val == 2 || val == 4) { *total = cost2; - *total += rtx_cost (XEXP (op0, 0), outer_code); - *total += rtx_cost (op1, outer_code); + *total += rtx_cost (XEXP (op0, 0), outer_code, speed); + *total += rtx_cost (op1, outer_code, speed); return true; } } *total = cost2; if (GET_CODE (op0) != REG && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) - *total += rtx_cost (op0, SET); + *total += rtx_cost (op0, SET, speed); #if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer towards creating too many induction variables. */ if (!reg_or_7bit_operand (op1, SImode)) - *total += rtx_cost (op1, SET); + *total += rtx_cost (op1, SET, speed); #endif } else if (GET_MODE (x) == DImode) @@ -2939,10 +2939,10 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total) *total = 6 * cost2; if (GET_CODE (op1) != CONST_INT || !satisfies_constraint_Ks7 (op1)) - *total += rtx_cost (op1, PLUS); + *total += rtx_cost (op1, PLUS, speed); if (GET_CODE (op0) != REG && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) - *total += rtx_cost (op0, PLUS); + *total += rtx_cost (op0, PLUS, speed); } return true; @@ -2965,7 +2965,7 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total) op1 = XEXP (x, 1); if (GET_CODE (op0) != REG && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) - *total += rtx_cost (op0, code); + *total += rtx_cost (op0, code, speed); return true; @@ -2990,7 +2990,7 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total) if (GET_CODE (op0) != REG && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) - *total += rtx_cost (op0, code); + *total += rtx_cost (op0, code, speed); if (GET_MODE (x) == DImode) { @@ -3004,12 +3004,12 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total) if (code == AND) { if (! rhs_andsi3_operand (XEXP (x, 1), SImode)) - *total += rtx_cost (XEXP (x, 1), code); + *total += rtx_cost (XEXP (x, 1), code, speed); } else { if (! regorlog2_operand (XEXP (x, 1), SImode)) - *total += rtx_cost (XEXP (x, 1), code); + *total += rtx_cost (XEXP (x, 1), code, speed); } return true; @@ -3042,17 +3042,17 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total) op0 = XEXP (op0, 0); op1 = XEXP (op1, 0); } - else if (optimize_size) + else if (!speed) *total = COSTS_N_INSNS (1); else *total = COSTS_N_INSNS (3); if (GET_CODE (op0) != REG && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) - *total += rtx_cost (op0, MULT); + *total += rtx_cost (op0, MULT, speed); if (GET_CODE (op1) != REG && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG)) - *total += rtx_cost (op1, MULT); + *total += rtx_cost (op1, MULT, speed); } return true; diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c index a77f200d7ca..ff177f41f04 100644 --- a/gcc/config/cris/cris.c +++ b/gcc/config/cris/cris.c @@ -112,8 +112,8 @@ static void cris_asm_output_mi_thunk static void cris_file_start (void); static void cris_init_libfuncs (void); -static bool cris_rtx_costs (rtx, int, int, int *); -static int cris_address_cost (rtx); +static bool cris_rtx_costs (rtx, int, int, int *, bool); +static int cris_address_cost (rtx, bool); static bool cris_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, const_tree, bool); static int cris_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, @@ -1756,7 +1756,8 @@ cris_expand_return (bool on_stack) scanned. In either case, *TOTAL contains the cost result. */ static bool -cris_rtx_costs (rtx x, int code, int outer_code, int *total) +cris_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed) { switch (code) { @@ -1840,7 +1841,7 @@ cris_rtx_costs (rtx x, int code, int outer_code, int *total) return false; case ZERO_EXTEND: case SIGN_EXTEND: - *total = rtx_cost (XEXP (x, 0), outer_code); + *total = rtx_cost (XEXP (x, 0), outer_code, speed); return true; default: @@ -1851,7 +1852,7 @@ cris_rtx_costs (rtx x, int code, int outer_code, int *total) /* The ADDRESS_COST worker. */ static int -cris_address_cost (rtx x) +cris_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) { /* The metric to use for the cost-macros is unclear. The metric used here is (the number of cycles needed) / 2, diff --git a/gcc/config/crx/crx.c b/gcc/config/crx/crx.c index 2f4aa6f176b..1ae1bcfcd46 100644 --- a/gcc/config/crx/crx.c +++ b/gcc/config/crx/crx.c @@ -136,7 +136,7 @@ static bool crx_fixed_condition_code_regs (unsigned int *, unsigned int *); static rtx crx_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED); static bool crx_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED); -static int crx_address_cost (rtx); +static int crx_address_cost (rtx, bool); /*****************************************************************************/ /* STACK LAYOUT AND CALLING CONVENTIONS */ @@ -800,7 +800,7 @@ crx_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, /* Return cost of the memory address x. */ static int -crx_address_cost (rtx addr) +crx_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED) { enum crx_addrtype addrtype; struct crx_address address; diff --git a/gcc/config/frv/frv.c b/gcc/config/frv/frv.c index 7fa2e22f094..4a57486a566 100644 --- a/gcc/config/frv/frv.c +++ b/gcc/config/frv/frv.c @@ -9471,7 +9471,8 @@ static bool frv_rtx_costs (rtx x, int code ATTRIBUTE_UNUSED, int outer_code ATTRIBUTE_UNUSED, - int *total) + int *total, + bool speed ATTRIBUTE_UNUSED) { if (outer_code == MEM) { diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c index 3d9c0fb7efb..992c73a4c1b 100644 --- a/gcc/config/h8300/h8300.c +++ b/gcc/config/h8300/h8300.c @@ -1152,7 +1152,7 @@ h8300_shift_costs (rtx x) /* Worker function for TARGET_RTX_COSTS. */ static bool -h8300_rtx_costs (rtx x, int code, int outer_code, int *total) +h8300_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed) { if (TARGET_H8300SX && outer_code == MEM) { @@ -1178,7 +1178,7 @@ h8300_rtx_costs (rtx x, int code, int outer_code, int *total) { /* Constant operands need the same number of processor states as register operands. Although we could try to - use a size-based cost for optimize_size, the lack of + use a size-based cost for !speed, the lack of of a mode makes the results very unpredictable. */ *total = 0; return true; @@ -1243,11 +1243,11 @@ h8300_rtx_costs (rtx x, int code, int outer_code, int *total) { case QImode: case HImode: - *total = COSTS_N_INSNS (optimize_size ? 4 : 10); + *total = COSTS_N_INSNS (!speed ? 4 : 10); return false; case SImode: - *total = COSTS_N_INSNS (optimize_size ? 4 : 18); + *total = COSTS_N_INSNS (!speed ? 4 : 18); return false; default: diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b240b7d6b86..025eee6a99c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -8491,7 +8491,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) requires to two regs - that would mean more pseudos with longer lifetimes. */ static int -ix86_address_cost (rtx x) +ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) { struct ix86_address parts; int cost = 1; @@ -25244,10 +25244,11 @@ ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) scanned. In either case, *TOTAL contains the cost result. */ static bool -ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) +ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed) { enum rtx_code outer_code = (enum rtx_code) outer_code_i; enum machine_mode mode = GET_MODE (x); + const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; switch (code) { @@ -25299,13 +25300,13 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) && GET_MODE (XEXP (x, 0)) == SImode) *total = 1; else if (TARGET_ZERO_EXTEND_WITH_AND) - *total = ix86_cost->add; + *total = cost->add; else - *total = ix86_cost->movzx; + *total = cost->movzx; return false; case SIGN_EXTEND: - *total = ix86_cost->movsx; + *total = cost->movsx; return false; case ASHIFT: @@ -25315,13 +25316,13 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); if (value == 1) { - *total = ix86_cost->add; + *total = cost->add; return false; } if ((value == 2 || value == 3) - && ix86_cost->lea <= ix86_cost->shift_const) + && cost->lea <= cost->shift_const) { - *total = ix86_cost->lea; + *total = cost->lea; return false; } } @@ -25336,24 +25337,24 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) if (CONST_INT_P (XEXP (x, 1))) { if (INTVAL (XEXP (x, 1)) > 32) - *total = ix86_cost->shift_const + COSTS_N_INSNS (2); + *total = cost->shift_const + COSTS_N_INSNS (2); else - *total = ix86_cost->shift_const * 2; + *total = cost->shift_const * 2; } else { if (GET_CODE (XEXP (x, 1)) == AND) - *total = ix86_cost->shift_var * 2; + *total = cost->shift_var * 2; else - *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2); + *total = cost->shift_var * 6 + COSTS_N_INSNS (2); } } else { if (CONST_INT_P (XEXP (x, 1))) - *total = ix86_cost->shift_const; + *total = cost->shift_const; else - *total = ix86_cost->shift_var; + *total = cost->shift_var; } return false; @@ -25361,18 +25362,18 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) { /* ??? SSE scalar cost should be used here. */ - *total = ix86_cost->fmul; + *total = cost->fmul; return false; } else if (X87_FLOAT_MODE_P (mode)) { - *total = ix86_cost->fmul; + *total = cost->fmul; return false; } else if (FLOAT_MODE_P (mode)) { /* ??? SSE vector cost should be used here. */ - *total = ix86_cost->fmul; + *total = cost->fmul; return false; } else @@ -25413,9 +25414,9 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) op0 = XEXP (op0, 0), mode = GET_MODE (op0); } - *total = (ix86_cost->mult_init[MODE_INDEX (mode)] - + nbits * ix86_cost->mult_bit - + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code)); + *total = (cost->mult_init[MODE_INDEX (mode)] + + nbits * cost->mult_bit + + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed)); return true; } @@ -25426,14 +25427,14 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) case UMOD: if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) /* ??? SSE cost should be used here. */ - *total = ix86_cost->fdiv; + *total = cost->fdiv; else if (X87_FLOAT_MODE_P (mode)) - *total = ix86_cost->fdiv; + *total = cost->fdiv; else if (FLOAT_MODE_P (mode)) /* ??? SSE vector cost should be used here. */ - *total = ix86_cost->fdiv; + *total = cost->fdiv; else - *total = ix86_cost->divide[MODE_INDEX (mode)]; + *total = cost->divide[MODE_INDEX (mode)]; return false; case PLUS: @@ -25448,11 +25449,11 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); if (val == 2 || val == 4 || val == 8) { - *total = ix86_cost->lea; - *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); + *total = cost->lea; + *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed); *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), - outer_code); - *total += rtx_cost (XEXP (x, 1), outer_code); + outer_code, speed); + *total += rtx_cost (XEXP (x, 1), outer_code, speed); return true; } } @@ -25462,18 +25463,18 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); if (val == 2 || val == 4 || val == 8) { - *total = ix86_cost->lea; - *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); - *total += rtx_cost (XEXP (x, 1), outer_code); + *total = cost->lea; + *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed); + *total += rtx_cost (XEXP (x, 1), outer_code, speed); return true; } } else if (GET_CODE (XEXP (x, 0)) == PLUS) { - *total = ix86_cost->lea; - *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); - *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); - *total += rtx_cost (XEXP (x, 1), outer_code); + *total = cost->lea; + *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed); + *total += rtx_cost (XEXP (x, 1), outer_code, speed); return true; } } @@ -25483,18 +25484,18 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) { /* ??? SSE cost should be used here. */ - *total = ix86_cost->fadd; + *total = cost->fadd; return false; } else if (X87_FLOAT_MODE_P (mode)) { - *total = ix86_cost->fadd; + *total = cost->fadd; return false; } else if (FLOAT_MODE_P (mode)) { /* ??? SSE vector cost should be used here. */ - *total = ix86_cost->fadd; + *total = cost->fadd; return false; } /* FALLTHRU */ @@ -25504,10 +25505,10 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) case XOR: if (!TARGET_64BIT && mode == DImode) { - *total = (ix86_cost->add * 2 - + (rtx_cost (XEXP (x, 0), outer_code) + *total = (cost->add * 2 + + (rtx_cost (XEXP (x, 0), outer_code, speed) << (GET_MODE (XEXP (x, 0)) != DImode)) - + (rtx_cost (XEXP (x, 1), outer_code) + + (rtx_cost (XEXP (x, 1), outer_code, speed) << (GET_MODE (XEXP (x, 1)) != DImode))); return true; } @@ -25517,27 +25518,27 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) { /* ??? SSE cost should be used here. */ - *total = ix86_cost->fchs; + *total = cost->fchs; return false; } else if (X87_FLOAT_MODE_P (mode)) { - *total = ix86_cost->fchs; + *total = cost->fchs; return false; } else if (FLOAT_MODE_P (mode)) { /* ??? SSE vector cost should be used here. */ - *total = ix86_cost->fchs; + *total = cost->fchs; return false; } /* FALLTHRU */ case NOT: if (!TARGET_64BIT && mode == DImode) - *total = ix86_cost->add * 2; + *total = cost->add * 2; else - *total = ix86_cost->add; + *total = cost->add; return false; case COMPARE: @@ -25548,9 +25549,9 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) { /* This kind of construct is implemented using test[bwl]. Treat it as if we had an AND. */ - *total = (ix86_cost->add - + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code) - + rtx_cost (const1_rtx, outer_code)); + *total = (cost->add + + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed) + + rtx_cost (const1_rtx, outer_code, speed)); return true; } return false; @@ -25563,23 +25564,23 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total) case ABS: if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) /* ??? SSE cost should be used here. */ - *total = ix86_cost->fabs; + *total = cost->fabs; else if (X87_FLOAT_MODE_P (mode)) - *total = ix86_cost->fabs; + *total = cost->fabs; else if (FLOAT_MODE_P (mode)) /* ??? SSE vector cost should be used here. */ - *total = ix86_cost->fabs; + *total = cost->fabs; return false; case SQRT: if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) /* ??? SSE cost should be used here. */ - *total = ix86_cost->fsqrt; + *total = cost->fsqrt; else if (X87_FLOAT_MODE_P (mode)) - *total = ix86_cost->fsqrt; + *total = cost->fsqrt; else if (FLOAT_MODE_P (mode)) /* ??? SSE vector cost should be used here. */ - *total = ix86_cost->fsqrt; + *total = cost->fsqrt; return false; case UNSPEC: diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index d9100e8d40b..c16ecc7e3c3 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -202,7 +202,7 @@ static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, tree, bool); static bool ia64_function_ok_for_sibcall (tree, tree); static bool ia64_return_in_memory (const_tree, const_tree); -static bool ia64_rtx_costs (rtx, int, int, int *); +static bool ia64_rtx_costs (rtx, int, int, int *, bool); static int ia64_unspec_may_trap_p (const_rtx, unsigned); static void fix_range (const char *); static bool ia64_handle_option (size_t, const char *, int); @@ -4810,7 +4810,8 @@ ia64_print_operand (FILE * file, rtx x, int code) /* ??? This is incomplete. */ static bool -ia64_rtx_costs (rtx x, int code, int outer_code, int *total) +ia64_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { diff --git a/gcc/config/iq2000/iq2000.c b/gcc/config/iq2000/iq2000.c index 06c187bc451..a716e729532 100644 --- a/gcc/config/iq2000/iq2000.c +++ b/gcc/config/iq2000/iq2000.c @@ -161,8 +161,8 @@ static bool iq2000_return_in_memory (const_tree, const_tree); static void iq2000_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int); -static bool iq2000_rtx_costs (rtx, int, int, int *); -static int iq2000_address_cost (rtx); +static bool iq2000_rtx_costs (rtx, int, int, int *, bool); +static int iq2000_address_cost (rtx, bool); static section *iq2000_select_section (tree, int, unsigned HOST_WIDE_INT); static bool iq2000_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, const_tree, bool); @@ -744,7 +744,7 @@ iq2000_move_1word (rtx operands[], rtx insn, int unsignedp) /* Provide the costs of an addressing mode that contains ADDR. */ static int -iq2000_address_cost (rtx addr) +iq2000_address_cost (rtx addr, bool speec ATTRIBUTE_UNUSED) { switch (GET_CODE (addr)) { @@ -3203,7 +3203,7 @@ print_operand (FILE *file, rtx op, int letter) } static bool -iq2000_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int * total) +iq2000_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int * total, bool speed) { enum machine_mode mode = GET_MODE (x); diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c index dcd5b374f93..3fda87bcf0f 100644 --- a/gcc/config/m32c/m32c.c +++ b/gcc/config/m32c/m32c.c @@ -2119,7 +2119,8 @@ m32c_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS m32c_rtx_costs static bool -m32c_rtx_costs (rtx x, int code, int outer_code, int *total) +m32c_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { @@ -2198,7 +2199,7 @@ m32c_rtx_costs (rtx x, int code, int outer_code, int *total) #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST m32c_address_cost static int -m32c_address_cost (rtx addr) +m32c_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED) { int i; /* fprintf(stderr, "\naddress_cost\n"); diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c index 23e7851b160..790eeef1232 100644 --- a/gcc/config/m32r/m32r.c +++ b/gcc/config/m32r/m32r.c @@ -85,7 +85,7 @@ static bool m32r_return_in_memory (const_tree, const_tree); static void m32r_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int); static void init_idents (void); -static bool m32r_rtx_costs (rtx, int, int, int *); +static bool m32r_rtx_costs (rtx, int, int, int *, bool speed); static bool m32r_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, const_tree, bool); static int m32r_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, @@ -126,7 +126,7 @@ static int m32r_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS m32r_rtx_costs #undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 #undef TARGET_PROMOTE_PROTOTYPES #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true @@ -1092,7 +1092,8 @@ m32r_issue_rate (void) /* Cost functions. */ static bool -m32r_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) +m32r_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { diff --git a/gcc/config/m68hc11/m68hc11.c b/gcc/config/m68hc11/m68hc11.c index a52739be67e..b19f1e7c4b8 100644 --- a/gcc/config/m68hc11/m68hc11.c +++ b/gcc/config/m68hc11/m68hc11.c @@ -67,10 +67,10 @@ static void m68hc11_reorg (void); static int go_if_legitimate_address_internal (rtx, enum machine_mode, int); static rtx m68hc11_expand_compare (enum rtx_code, rtx, rtx); static int must_parenthesize (rtx); -static int m68hc11_address_cost (rtx); +static int m68hc11_address_cost (rtx, bool); static int m68hc11_shift_cost (enum machine_mode, rtx, int); static int m68hc11_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code); -static bool m68hc11_rtx_costs (rtx, int, int, int *); +static bool m68hc11_rtx_costs (rtx, int, int, int *, bool); static tree m68hc11_handle_fntype_attribute (tree *, tree, tree, int, bool *); const struct attribute_spec m68hc11_attribute_table[]; @@ -5145,7 +5145,7 @@ m68hc11_register_move_cost (enum machine_mode mode, enum reg_class from, If ADDR is not a valid address, its cost is irrelevant. */ static int -m68hc11_address_cost (rtx addr) +m68hc11_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED) { int cost = 4; @@ -5230,7 +5230,7 @@ m68hc11_shift_cost (enum machine_mode mode, rtx x, int shift) { int total; - total = rtx_cost (x, SET); + total = rtx_cost (x, SET, !optimize_size); if (mode == QImode) total += m68hc11_cost->shiftQI_const[shift % 8]; else if (mode == HImode) @@ -5274,14 +5274,14 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code, return m68hc11_shift_cost (mode, XEXP (x, 0), INTVAL (XEXP (x, 1))); } - total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code); + total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size); total += m68hc11_cost->shift_var; return total; case AND: case XOR: case IOR: - total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code); + total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size); total += m68hc11_cost->logical; /* Logical instructions are byte instructions only. */ @@ -5290,7 +5290,7 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code, case MINUS: case PLUS: - total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code); + total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size); total += m68hc11_cost->add; if (GET_MODE_SIZE (mode) > 2) { @@ -5301,7 +5301,7 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code, case UDIV: case DIV: case MOD: - total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code); + total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size); switch (mode) { case QImode: @@ -5324,16 +5324,16 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code, if (mode == HImode && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) return m68hc11_cost->multQI - + rtx_cost (XEXP (XEXP (x, 0), 0), code) - + rtx_cost (XEXP (XEXP (x, 1), 0), code); + + rtx_cost (XEXP (XEXP (x, 0), 0), code, !optimize_size) + + rtx_cost (XEXP (XEXP (x, 1), 0), code, !optimize_size); /* emul instruction produces 32-bit result for 68HC12. */ if (TARGET_M6812 && mode == SImode && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) return m68hc11_cost->multHI - + rtx_cost (XEXP (XEXP (x, 0), 0), code) - + rtx_cost (XEXP (XEXP (x, 1), 0), code); + + rtx_cost (XEXP (XEXP (x, 0), 0), code, !optimize_size) + + rtx_cost (XEXP (XEXP (x, 1), 0), code, !optimize_size); total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code); switch (mode) @@ -5362,7 +5362,7 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code, case COMPARE: case ABS: case ZERO_EXTEND: - total = extra_cost + rtx_cost (XEXP (x, 0), code); + total = extra_cost + rtx_cost (XEXP (x, 0), code, !optimize_size); if (mode == QImode) { return total + COSTS_N_INSNS (1); @@ -5389,7 +5389,8 @@ m68hc11_rtx_costs_1 (rtx x, enum rtx_code code, } static bool -m68hc11_rtx_costs (rtx x, int code, int outer_code, int *total) +m68hc11_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c index 1853073af81..46647ea9b97 100644 --- a/gcc/config/m68k/m68k.c +++ b/gcc/config/m68k/m68k.c @@ -146,7 +146,7 @@ static tree m68k_handle_fndecl_attribute (tree *node, tree name, static void m68k_compute_frame_layout (void); static bool m68k_save_reg (unsigned int regno, bool interrupt_handler); static bool m68k_ok_for_sibcall_p (tree, tree); -static bool m68k_rtx_costs (rtx, int, int, int *); +static bool m68k_rtx_costs (rtx, int, int, int *, bool); #if M68K_HONOR_TARGET_STRICT_ALIGNMENT static bool m68k_return_in_memory (const_tree, const_tree); #endif @@ -2205,7 +2205,8 @@ const_int_cost (HOST_WIDE_INT i) } static bool -m68k_rtx_costs (rtx x, int code, int outer_code, int *total) +m68k_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { diff --git a/gcc/config/mcore/mcore.c b/gcc/config/mcore/mcore.c index b80f59f476c..229b32eab6d 100644 --- a/gcc/config/mcore/mcore.c +++ b/gcc/config/mcore/mcore.c @@ -182,7 +182,7 @@ static int mcore_arg_partial_bytes (CUMULATIVE_ARGS *, #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS mcore_rtx_costs #undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG mcore_reorg @@ -479,7 +479,8 @@ mcore_ior_cost (rtx x) } static bool -mcore_rtx_costs (rtx x, int code, int outer_code, int * total) +mcore_rtx_costs (rtx x, int code, int outer_code, int * total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 8a2ba16c452..dea7ffe57fa 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -3225,8 +3225,8 @@ mips_binary_cost (rtx x, int single_cost, int double_cost) else cost = single_cost; return (cost - + rtx_cost (XEXP (x, 0), 0) - + rtx_cost (XEXP (x, 1), GET_CODE (x))); + + rtx_cost (XEXP (x, 0), 0, !optimize_size) + + rtx_cost (XEXP (x, 1), GET_CODE (x), !optimize_size)); } /* Return the cost of floating-point multiplications of mode MODE. */ @@ -3296,7 +3296,8 @@ mips_zero_extend_cost (enum machine_mode mode, rtx op) /* Implement TARGET_RTX_COSTS. */ static bool -mips_rtx_costs (rtx x, int code, int outer_code, int *total) +mips_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed) { enum machine_mode mode = GET_MODE (x); bool float_mode_p = FLOAT_MODE_P (mode); @@ -3443,7 +3444,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total) && UINTVAL (XEXP (x, 1)) == 0xffffffff) { *total = (mips_zero_extend_cost (mode, XEXP (x, 0)) - + rtx_cost (XEXP (x, 0), 0)); + + rtx_cost (XEXP (x, 0), 0, speed)); return true; } /* Fall through. */ @@ -3475,7 +3476,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total) case LO_SUM: /* Low-part immediates need an extended MIPS16 instruction. */ *total = (COSTS_N_INSNS (TARGET_MIPS16 ? 2 : 1) - + rtx_cost (XEXP (x, 0), 0)); + + rtx_cost (XEXP (x, 0), 0, speed)); return true; case LT: @@ -3515,17 +3516,17 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total) if (GET_CODE (op0) == MULT && GET_CODE (XEXP (op0, 0)) == NEG) { *total = (mips_fp_mult_cost (mode) - + rtx_cost (XEXP (XEXP (op0, 0), 0), 0) - + rtx_cost (XEXP (op0, 1), 0) - + rtx_cost (op1, 0)); + + rtx_cost (XEXP (XEXP (op0, 0), 0), 0, speed) + + rtx_cost (XEXP (op0, 1), 0, speed) + + rtx_cost (op1, 0, speed)); return true; } if (GET_CODE (op1) == MULT) { *total = (mips_fp_mult_cost (mode) - + rtx_cost (op0, 0) - + rtx_cost (XEXP (op1, 0), 0) - + rtx_cost (XEXP (op1, 1), 0)); + + rtx_cost (op0, 0, speed) + + rtx_cost (XEXP (op1, 0), 0, speed) + + rtx_cost (XEXP (op1, 1), 0, speed)); return true; } } @@ -3566,9 +3567,9 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total) && GET_CODE (XEXP (op, 0)) == MULT) { *total = (mips_fp_mult_cost (mode) - + rtx_cost (XEXP (XEXP (op, 0), 0), 0) - + rtx_cost (XEXP (XEXP (op, 0), 1), 0) - + rtx_cost (XEXP (op, 1), 0)); + + rtx_cost (XEXP (XEXP (op, 0), 0), 0, speed) + + rtx_cost (XEXP (XEXP (op, 0), 1), 0, speed) + + rtx_cost (XEXP (op, 1), 0, speed)); return true; } } @@ -3606,9 +3607,9 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total) if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT) /* An rsqrta or rsqrtb pattern. Count the division as being free. */ - *total = rtx_cost (XEXP (x, 1), 0); + *total = rtx_cost (XEXP (x, 1), 0, speed); else - *total = mips_fp_div_cost (mode) + rtx_cost (XEXP (x, 1), 0); + *total = mips_fp_div_cost (mode) + rtx_cost (XEXP (x, 1), 0, speed); return true; } /* Fall through. */ @@ -3636,7 +3637,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total) && CONST_INT_P (XEXP (x, 1)) && exact_log2 (INTVAL (XEXP (x, 1))) >= 0) { - *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), 0); + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), 0, speed); return true; } *total = COSTS_N_INSNS (mips_idiv_insns ()); @@ -3671,7 +3672,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total) /* Implement TARGET_ADDRESS_COST. */ static int -mips_address_cost (rtx addr) +mips_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED) { return mips_address_insns (addr, SImode, false); } diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c index e57f52dd796..53cb8f6007f 100644 --- a/gcc/config/mmix/mmix.c +++ b/gcc/config/mmix/mmix.c @@ -132,7 +132,7 @@ static void mmix_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int); static void mmix_file_start (void); static void mmix_file_end (void); -static bool mmix_rtx_costs (rtx, int, int, int *); +static bool mmix_rtx_costs (rtx, int, int, int *, bool); static rtx mmix_struct_value_rtx (tree, int); static bool mmix_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, const_tree, bool); @@ -181,7 +181,7 @@ static bool mmix_pass_by_reference (CUMULATIVE_ARGS *, #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS mmix_rtx_costs #undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG mmix_reorg @@ -1106,7 +1106,8 @@ static bool mmix_rtx_costs (rtx x ATTRIBUTE_UNUSED, int code ATTRIBUTE_UNUSED, int outer_code ATTRIBUTE_UNUSED, - int *total ATTRIBUTE_UNUSED) + int *total ATTRIBUTE_UNUSED, + bool speed ATTRIBUTE_UNUSED) { /* For the time being, this is just a stub and we'll accept the generic calculations, until we can do measurements, at least. diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c index 13c0ff72f2e..277497075b1 100644 --- a/gcc/config/mn10300/mn10300.c +++ b/gcc/config/mn10300/mn10300.c @@ -70,8 +70,8 @@ enum processor_type mn10300_processor = PROCESSOR_DEFAULT; static bool mn10300_handle_option (size_t, const char *, int); static int mn10300_address_cost_1 (rtx, int *); -static int mn10300_address_cost (rtx); -static bool mn10300_rtx_costs (rtx, int, int, int *); +static int mn10300_address_cost (rtx, bool); +static bool mn10300_rtx_costs (rtx, int, int, int *, bool); static void mn10300_file_start (void); static bool mn10300_return_in_memory (const_tree, const_tree); static rtx mn10300_builtin_saveregs (void); @@ -1942,7 +1942,7 @@ legitimate_address_p (enum machine_mode mode, rtx x, int strict) } static int -mn10300_address_cost_1 (rtx x, int *unsig) +mn10300_address_cost_1 (rtx x, int *unsig, bool speed ATTRIBUTE_UNUSED) { switch (GET_CODE (x)) { @@ -2008,14 +2008,14 @@ mn10300_address_cost_1 (rtx x, int *unsig) } static int -mn10300_address_cost (rtx x) +mn10300_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) { int s = 0; return mn10300_address_cost_1 (x, &s); } static bool -mn10300_rtx_costs (rtx x, int code, int outer_code, int *total) +mn10300_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed ATTRIBUTE_UNUSED) { switch (code) { diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index 9ff778b5b47..c9c6525c91d 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -86,8 +86,8 @@ hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn) static void copy_reg_pointer (rtx, rtx); static void fix_range (const char *); static bool pa_handle_option (size_t, const char *, int); -static int hppa_address_cost (rtx); -static bool hppa_rtx_costs (rtx, int, int, int *); +static int hppa_address_cost (rtx, bool); +static bool hppa_rtx_costs (rtx, int, int, int *, bool); static inline rtx force_mode (enum machine_mode, rtx); static void pa_reorg (void); static void pa_combine_instructions (void); @@ -1279,7 +1279,8 @@ hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, as GO_IF_LEGITIMATE_ADDRESS. */ static int -hppa_address_cost (rtx X) +hppa_address_cost (rtx X, + bool speed ATTRIBUTE_UNUSED) { switch (GET_CODE (X)) { @@ -1299,7 +1300,8 @@ hppa_address_cost (rtx X) scanned. In either case, *TOTAL contains the cost result. */ static bool -hppa_rtx_costs (rtx x, int code, int outer_code, int *total) +hppa_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { diff --git a/gcc/config/pdp11/pdp11.c b/gcc/config/pdp11/pdp11.c index 661980fd13e..52167974c81 100644 --- a/gcc/config/pdp11/pdp11.c +++ b/gcc/config/pdp11/pdp11.c @@ -149,7 +149,7 @@ static const char *singlemove_string (rtx *); static bool pdp11_assemble_integer (rtx, unsigned int, int); static void pdp11_output_function_prologue (FILE *, HOST_WIDE_INT); static void pdp11_output_function_epilogue (FILE *, HOST_WIDE_INT); -static bool pdp11_rtx_costs (rtx, int, int, int *); +static bool pdp11_rtx_costs (rtx, int, int, int *, bool); static bool pdp11_return_in_memory (const_tree, const_tree); /* Initialize the GCC target structure. */ @@ -1094,7 +1094,8 @@ register_move_cost(enum reg_class c1, enum reg_class c2) } static bool -pdp11_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) +pdp11_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2124ea3c50d..e2743edce29 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -829,7 +829,7 @@ static void rs6000_xcoff_file_start (void); static void rs6000_xcoff_file_end (void); #endif static int rs6000_variable_issue (FILE *, int, rtx, int); -static bool rs6000_rtx_costs (rtx, int, int, int *); +static bool rs6000_rtx_costs (rtx, int, int, int *, bool); static int rs6000_adjust_cost (rtx, rtx, rtx, int); static void rs6000_sched_init (FILE *, int, int); static bool is_microcoded_insn (rtx); @@ -1180,7 +1180,7 @@ static const char alt_reg_names[][8] = #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS rs6000_rtx_costs #undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 #undef TARGET_VECTOR_OPAQUE_P #define TARGET_VECTOR_OPAQUE_P rs6000_is_opaque_type @@ -21456,7 +21456,8 @@ rs6000_xcoff_file_end (void) scanned. In either case, *TOTAL contains the cost result. */ static bool -rs6000_rtx_costs (rtx x, int code, int outer_code, int *total) +rs6000_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed) { enum machine_mode mode = GET_MODE (x); @@ -21555,7 +21556,7 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int *total) /* When optimizing for size, MEM should be slightly more expensive than generating address, e.g., (plus (reg) (const)). L1 cache latency is about two instructions. */ - *total = optimize_size ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); + *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); return true; case LABEL_REF: @@ -21766,7 +21767,7 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int *total) case CALL: case IF_THEN_ELSE: - if (optimize_size) + if (!speed) { *total = COSTS_N_INSNS (1); return true; diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 957707b8607..d15ed2af14e 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -2298,7 +2298,8 @@ s390_float_const_zero_p (rtx value) of the superexpression of x. */ static bool -s390_rtx_costs (rtx x, int code, int outer_code, int *total) +s390_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { @@ -2336,9 +2337,9 @@ s390_rtx_costs (rtx x, int code, int outer_code, int *total) *total = s390_cost->madbr; else *total = s390_cost->maebr; - *total += rtx_cost (XEXP (XEXP (x, 0), 0), MULT) - + rtx_cost (XEXP (XEXP (x, 0), 1), MULT) - + rtx_cost (XEXP (x, 1), code); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), MULT, speed) + + rtx_cost (XEXP (XEXP (x, 0), 1), MULT, speed) + + rtx_cost (XEXP (x, 1), code, speed); return true; /* Do not do an additional recursive descent. */ } *total = COSTS_N_INSNS (1); @@ -2492,7 +2493,7 @@ s390_rtx_costs (rtx x, int code, int outer_code, int *total) /* Return the cost of an address rtx ADDR. */ static int -s390_address_cost (rtx addr) +s390_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED) { struct s390_address ad; if (!s390_decompose_address (addr, &ad)) diff --git a/gcc/config/score/score-protos.h b/gcc/config/score/score-protos.h index 5f444abdf73..4c7963ffb2f 100644 --- a/gcc/config/score/score-protos.h +++ b/gcc/config/score/score-protos.h @@ -85,7 +85,7 @@ extern const char * score_select (rtx *ops, const char *inst_pre, bool commu, extern const char * score_output_casesi (rtx *operands); extern const char * score_rpush (rtx *ops); extern const char * score_rpop (rtx *ops); -extern bool score_rtx_costs (rtx x, int code, int outer_code, int *total); +extern bool score_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed); #ifdef RTX_CODE extern enum machine_mode score_select_cc_mode (enum rtx_code op, rtx x, rtx y); diff --git a/gcc/config/score/score.c b/gcc/config/score/score.c index c2e4176e382..861e5ba37c4 100644 --- a/gcc/config/score/score.c +++ b/gcc/config/score/score.c @@ -569,7 +569,8 @@ score_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, /* Implement TARGET_RTX_COSTS macro. */ bool -score_rtx_costs (rtx x, int code, int outer_code, int *total) +score_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { if (TARGET_SCORE5 || TARGET_SCORE5U || TARGET_SCORE7 || TARGET_SCORE7D) return score7_rtx_costs (x, code, outer_code, total); @@ -581,7 +582,8 @@ score_rtx_costs (rtx x, int code, int outer_code, int *total) /* Implement TARGET_ADDRESS_COST macro. */ int -score_address_cost (rtx addr) +score_address_cost (rtx addr, + bool speed ATTRIBUTE_UNUSED) { if (TARGET_SCORE5 || TARGET_SCORE5U || TARGET_SCORE7 || TARGET_SCORE7D) return score7_address_cost (addr); diff --git a/gcc/config/score/score3.c b/gcc/config/score/score3.c index c976f38c005..3392c417c5c 100644 --- a/gcc/config/score/score3.c +++ b/gcc/config/score/score3.c @@ -1000,7 +1000,8 @@ score3_address_insns (rtx x, enum machine_mode mode) /* Implement TARGET_RTX_COSTS macro. */ bool -score3_rtx_costs (rtx x, int code, int outer_code, int *total) +score3_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { enum machine_mode mode = GET_MODE (x); @@ -1154,7 +1155,7 @@ score3_rtx_costs (rtx x, int code, int outer_code, int *total) /* Implement TARGET_ADDRESS_COST macro. */ int -score3_address_cost (rtx addr) +score3_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED) { return score3_address_insns (addr, SImode); } diff --git a/gcc/config/score/score3.h b/gcc/config/score/score3.h index 79677702d47..2585df9cfa2 100644 --- a/gcc/config/score/score3.h +++ b/gcc/config/score/score3.h @@ -126,7 +126,7 @@ extern int score3_address_p (enum machine_mode mode, rtx x, int strict); extern int score3_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, enum reg_class from, enum reg_class to); -extern bool score3_rtx_costs (rtx x, int code, int outer_code, int *total); +extern bool score3_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed); extern int score3_address_cost (rtx addr); extern int score3_output_external (FILE *file ATTRIBUTE_UNUSED, tree decl, diff --git a/gcc/config/score/score7.c b/gcc/config/score/score7.c index 03c47042ed5..52234fd270c 100644 --- a/gcc/config/score/score7.c +++ b/gcc/config/score/score7.c @@ -991,7 +991,8 @@ score7_address_insns (rtx x, enum machine_mode mode) /* Implement TARGET_RTX_COSTS macro. */ bool -score7_rtx_costs (rtx x, int code, int outer_code, int *total) +score7_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { enum machine_mode mode = GET_MODE (x); @@ -1145,7 +1146,8 @@ score7_rtx_costs (rtx x, int code, int outer_code, int *total) /* Implement TARGET_ADDRESS_COST macro. */ int -score7_address_cost (rtx addr) +score7_address_cost (rtx addr, + bool speed ATTRIBUTE_UNUSED) { return score7_address_insns (addr, SImode); } diff --git a/gcc/config/score/score7.h b/gcc/config/score/score7.h index 1797e472279..ab2c7b0af88 100644 --- a/gcc/config/score/score7.h +++ b/gcc/config/score/score7.h @@ -126,7 +126,7 @@ extern int score7_address_p (enum machine_mode mode, rtx x, int strict); extern int score7_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, enum reg_class from, enum reg_class to); -extern bool score7_rtx_costs (rtx x, int code, int outer_code, int *total); +extern bool score7_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed); extern int score7_address_cost (rtx addr); extern int score7_output_external (FILE *file ATTRIBUTE_UNUSED, tree decl, diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 74060738ee9..fdd98937f8c 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -241,8 +241,8 @@ static int addsubcosts (rtx); static int multcosts (rtx); static bool unspec_caller_rtx_p (rtx); static bool sh_cannot_copy_insn_p (rtx); -static bool sh_rtx_costs (rtx, int, int, int *); -static int sh_address_cost (rtx); +static bool sh_rtx_costs (rtx, int, int, int *, bool); +static int sh_address_cost (rtx, bool); static int sh_pr_n_sets (void); static rtx sh_allocate_initial_value (rtx); static int shmedia_target_regs_stack_space (HARD_REG_SET *); @@ -2361,7 +2361,7 @@ andcosts (rtx x) || satisfies_constraint_J16 (XEXP (x, 1))) return 1; else - return 1 + rtx_cost (XEXP (x, 1), AND); + return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size); } /* These constants are single cycle extu.[bw] instructions. */ @@ -2461,7 +2461,8 @@ multcosts (rtx x ATTRIBUTE_UNUSED) scanned. In either case, *TOTAL contains the cost result. */ static bool -sh_rtx_costs (rtx x, int code, int outer_code, int *total) +sh_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { @@ -2587,7 +2588,8 @@ sh_rtx_costs (rtx x, int code, int outer_code, int *total) since it increases pressure on r0. */ static int -sh_address_cost (rtx X) +sh_address_cost (rtx X, + bool speed ATTRIBUTE_UNUSED) { return (GET_CODE (X) == PLUS && ! CONSTANT_P (XEXP (X, 1)) diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index b74e81e9558..0852cd94e79 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -404,7 +404,7 @@ static rtx sparc_tls_get_addr (void); static rtx sparc_tls_got (void); static const char *get_some_local_dynamic_name (void); static int get_some_local_dynamic_name_1 (rtx *, void *); -static bool sparc_rtx_costs (rtx, int, int, int *); +static bool sparc_rtx_costs (rtx, int, int, int *, bool); static bool sparc_promote_prototypes (const_tree); static rtx sparc_struct_value_rtx (tree, int); static bool sparc_return_in_memory (const_tree, const_tree); @@ -513,7 +513,7 @@ static bool fpu_option_set = false; #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS sparc_rtx_costs #undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 /* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a no-op for TARGET_ARCH32 this is ok. Otherwise we'd need to add a runtime @@ -8401,7 +8401,8 @@ sparc_extra_constraint_check (rtx op, int c, int strict) ??? the latencies and then CSE will just use that. */ static bool -sparc_rtx_costs (rtx x, int code, int outer_code, int *total) +sparc_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { enum machine_mode mode = GET_MODE (x); bool float_mode_p = FLOAT_MODE_P (mode); diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index da99d3f108a..e22513ffd8a 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -208,7 +208,7 @@ tree spu_builtin_types[SPU_BTI_MAX]; #define TARGET_RTX_COSTS spu_rtx_costs #undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 #undef TARGET_SCHED_ISSUE_RATE #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate @@ -4209,7 +4209,8 @@ spu_asm_globalize_label (FILE * file, const char *name) } static bool -spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) +spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total, + bool speed ATTRIBUTE_UNUSED) { enum machine_mode mode = GET_MODE (x); int cost = COSTS_N_INSNS (2); diff --git a/gcc/config/stormy16/stormy16.c b/gcc/config/stormy16/stormy16.c index 77aedd6e01e..dd42fc833d0 100644 --- a/gcc/config/stormy16/stormy16.c +++ b/gcc/config/stormy16/stormy16.c @@ -57,8 +57,8 @@ static void xstormy16_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, static void xstormy16_init_builtins (void); static rtx xstormy16_expand_builtin (tree, rtx, rtx, enum machine_mode, int); -static bool xstormy16_rtx_costs (rtx, int, int, int *); -static int xstormy16_address_cost (rtx); +static bool xstormy16_rtx_costs (rtx, int, int, int *, bool); +static int xstormy16_address_cost (rtx, bool); static bool xstormy16_return_in_memory (const_tree, const_tree); /* Define the information needed to generate branch and scc insns. This is @@ -74,7 +74,7 @@ static GTY(()) section *bss100_section; static bool xstormy16_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, - int *total) + int *total, bool speed ATTRIBUTE_UNUSED) { switch (code) { @@ -107,7 +107,7 @@ xstormy16_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, } static int -xstormy16_address_cost (rtx x) +xstormy16_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) { return (GET_CODE (x) == CONST_INT ? 2 : GET_CODE (x) == PLUS ? 7 diff --git a/gcc/config/v850/v850.c b/gcc/config/v850/v850.c index b9f11b1583f..d4619797696 100644 --- a/gcc/config/v850/v850.c +++ b/gcc/config/v850/v850.c @@ -137,7 +137,7 @@ static GTY(()) section *zbss_section; #define TARGET_RTX_COSTS v850_rtx_costs #undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG v850_reorg @@ -422,7 +422,7 @@ static bool v850_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, - int * total) + int * total, bool speed) { switch (code) { @@ -438,7 +438,7 @@ v850_rtx_costs (rtx x, case DIV: case UMOD: case UDIV: - if (TARGET_V850E && optimize_size) + if (TARGET_V850E && !speed) *total = 6; else *total = 60; diff --git a/gcc/config/vax/vax.c b/gcc/config/vax/vax.c index 6cfd7e0d1dc..173799ebc65 100644 --- a/gcc/config/vax/vax.c +++ b/gcc/config/vax/vax.c @@ -49,8 +49,8 @@ static void vax_init_libfuncs (void); static void vax_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); static int vax_address_cost_1 (rtx); -static int vax_address_cost (rtx); -static bool vax_rtx_costs (rtx, int, int, int *); +static int vax_address_cost (rtx, bool); +static bool vax_rtx_costs (rtx, int, int, int *, bool); static rtx vax_struct_value_rtx (tree, int); /* Initialize the GCC target structure. */ @@ -520,7 +520,7 @@ vax_address_cost_1 (rtx addr) } static int -vax_address_cost (rtx x) +vax_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) { return (1 + (REG_P (x) ? 0 : vax_address_cost_1 (x))); } @@ -534,7 +534,8 @@ vax_address_cost (rtx x) costs on a per cpu basis. */ static bool -vax_rtx_costs (rtx x, int code, int outer_code, int *total) +vax_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { enum machine_mode mode = GET_MODE (x); int i = 0; /* may be modified in switch */ diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c index f9979e61934..3bb71eb5c13 100644 --- a/gcc/config/xtensa/xtensa.c +++ b/gcc/config/xtensa/xtensa.c @@ -139,7 +139,7 @@ static unsigned int xtensa_multibss_section_type_flags (tree, const char *, int) ATTRIBUTE_UNUSED; static section *xtensa_select_rtx_section (enum machine_mode, rtx, unsigned HOST_WIDE_INT); -static bool xtensa_rtx_costs (rtx, int, int, int *); +static bool xtensa_rtx_costs (rtx, int, int, int *, bool); static tree xtensa_build_builtin_va_list (void); static bool xtensa_return_in_memory (const_tree, const_tree); static tree xtensa_gimplify_va_arg_expr (tree, tree, gimple_seq *, @@ -177,7 +177,7 @@ static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] = #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS xtensa_rtx_costs #undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 #undef TARGET_BUILD_BUILTIN_VA_LIST #define TARGET_BUILD_BUILTIN_VA_LIST xtensa_build_builtin_va_list @@ -2947,7 +2947,8 @@ xtensa_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED, scanned. In either case, *TOTAL contains the cost result. */ static bool -xtensa_rtx_costs (rtx x, int code, int outer_code, int *total) +xtensa_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) { switch (code) { diff --git a/gcc/cse.c b/gcc/cse.c index 273e590323b..d586c6c26df 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -283,6 +283,7 @@ static enum machine_mode this_insn_cc0_mode, prev_insn_cc0_mode; /* Insn being scanned. */ static rtx this_insn; +static bool optimize_this_for_speed_p; /* Index by register number, gives the number of the next (or previous) register in the chain of registers sharing the same @@ -752,7 +753,7 @@ notreg_cost (rtx x, enum rtx_code outer) && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (x)), GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))))) ? 0 - : rtx_cost (x, outer) * 2); + : rtx_cost (x, outer, optimize_this_for_speed_p) * 2); } @@ -5970,6 +5971,7 @@ cse_extended_basic_block (struct cse_basic_block_data *ebb_data) FOR_BB_INSNS (bb, insn) { + optimize_this_for_speed_p = optimize_bb_for_speed_p (bb); /* If we have processed 1,000 insns, flush the hash table to avoid extreme quadratic behavior. We must not include NOTEs in the count since there may be more of them when generating diff --git a/gcc/dojump.c b/gcc/dojump.c index c22402a948d..0ff53952938 100644 --- a/gcc/dojump.c +++ b/gcc/dojump.c @@ -144,8 +144,8 @@ prefer_and_bit_test (enum machine_mode mode, int bitnum) XEXP (and_test, 1) = GEN_INT ((unsigned HOST_WIDE_INT) 1 << bitnum); XEXP (XEXP (shift_test, 0), 1) = GEN_INT (bitnum); - return (rtx_cost (and_test, IF_THEN_ELSE) - <= rtx_cost (shift_test, IF_THEN_ELSE)); + return (rtx_cost (and_test, IF_THEN_ELSE, optimize_insn_for_speed_p ()) + <= rtx_cost (shift_test, IF_THEN_ELSE, optimize_insn_for_speed_p ())); } /* Generate code to evaluate EXP and jump to IF_FALSE_LABEL if diff --git a/gcc/dse.c b/gcc/dse.c index 99233266377..7283bbe67e4 100644 --- a/gcc/dse.c +++ b/gcc/dse.c @@ -1424,7 +1424,8 @@ static rtx find_shift_sequence (int access_size, store_info_t store_info, read_info_t read_info, - int shift) + int shift, + bool speed) { enum machine_mode store_mode = GET_MODE (store_info->mem); enum machine_mode read_mode = GET_MODE (read_info->mem); @@ -1483,7 +1484,7 @@ find_shift_sequence (int access_size, cost = 0; for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn)) if (INSN_P (insn)) - cost += insn_rtx_cost (PATTERN (insn)); + cost += insn_rtx_cost (PATTERN (insn), speed); /* The computation up to here is essentially independent of the arguments and could be precomputed. It may @@ -1582,7 +1583,8 @@ replace_read (store_info_t store_info, insn_info_t store_insn, GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn)); start_sequence (); if (shift) - read_reg = find_shift_sequence (access_size, store_info, read_info, shift); + read_reg = find_shift_sequence (access_size, store_info, read_info, shift, + optimize_bb_for_speed_p (BLOCK_FOR_INSN (read_insn->insn))); else read_reg = extract_low_bits (read_mode, store_mode, copy_rtx (store_info->rhs)); diff --git a/gcc/expmed.c b/gcc/expmed.c index 83a8760b131..d4306f41a81 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -64,8 +64,8 @@ static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); Usually, this will mean that the MD file will emit non-branch sequences. */ -static bool sdiv_pow2_cheap[NUM_MACHINE_MODES]; -static bool smod_pow2_cheap[NUM_MACHINE_MODES]; +static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES]; +static bool smod_pow2_cheap[2][NUM_MACHINE_MODES]; #ifndef SLOW_UNALIGNED_ACCESS #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT @@ -98,17 +98,17 @@ static bool smod_pow2_cheap[NUM_MACHINE_MODES]; /* Cost of various pieces of RTL. Note that some of these are indexed by shift count and some by mode. */ -static int zero_cost; -static int add_cost[NUM_MACHINE_MODES]; -static int neg_cost[NUM_MACHINE_MODES]; -static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; -static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; -static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; -static int mul_cost[NUM_MACHINE_MODES]; -static int sdiv_cost[NUM_MACHINE_MODES]; -static int udiv_cost[NUM_MACHINE_MODES]; -static int mul_widen_cost[NUM_MACHINE_MODES]; -static int mul_highpart_cost[NUM_MACHINE_MODES]; +static int zero_cost[2]; +static int add_cost[2][NUM_MACHINE_MODES]; +static int neg_cost[2][NUM_MACHINE_MODES]; +static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; +static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; +static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; +static int mul_cost[2][NUM_MACHINE_MODES]; +static int sdiv_cost[2][NUM_MACHINE_MODES]; +static int udiv_cost[2][NUM_MACHINE_MODES]; +static int mul_widen_cost[2][NUM_MACHINE_MODES]; +static int mul_highpart_cost[2][NUM_MACHINE_MODES]; void init_expmed (void) @@ -137,15 +137,14 @@ init_expmed (void) rtx cint[MAX_BITS_PER_WORD]; int m, n; enum machine_mode mode, wider_mode; + int speed; - zero_cost = rtx_cost (const0_rtx, 0); for (m = 1; m < MAX_BITS_PER_WORD; m++) { pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); cint[m] = GEN_INT (m); } - memset (&all, 0, sizeof all); PUT_CODE (&all.reg, REG); @@ -206,61 +205,71 @@ init_expmed (void) XEXP (&all.shift_sub, 0) = &all.shift_mult; XEXP (&all.shift_sub, 1) = &all.reg; - for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); - mode != VOIDmode; - mode = GET_MODE_WIDER_MODE (mode)) + for (speed = 0; speed < 2; speed++) { - PUT_MODE (&all.reg, mode); - PUT_MODE (&all.plus, mode); - PUT_MODE (&all.neg, mode); - PUT_MODE (&all.mult, mode); - PUT_MODE (&all.sdiv, mode); - PUT_MODE (&all.udiv, mode); - PUT_MODE (&all.sdiv_32, mode); - PUT_MODE (&all.smod_32, mode); - PUT_MODE (&all.wide_trunc, mode); - PUT_MODE (&all.shift, mode); - PUT_MODE (&all.shift_mult, mode); - PUT_MODE (&all.shift_add, mode); - PUT_MODE (&all.shift_sub, mode); + crtl->maybe_hot_insn_p = speed; + zero_cost[speed] = rtx_cost (const0_rtx, 0, speed); - add_cost[mode] = rtx_cost (&all.plus, SET); - neg_cost[mode] = rtx_cost (&all.neg, SET); - mul_cost[mode] = rtx_cost (&all.mult, SET); - sdiv_cost[mode] = rtx_cost (&all.sdiv, SET); - udiv_cost[mode] = rtx_cost (&all.udiv, SET); - - sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET) - <= 2 * add_cost[mode]); - smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET) - <= 4 * add_cost[mode]); - - wider_mode = GET_MODE_WIDER_MODE (mode); - if (wider_mode != VOIDmode) + for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); + mode != VOIDmode; + mode = GET_MODE_WIDER_MODE (mode)) { - PUT_MODE (&all.zext, wider_mode); - PUT_MODE (&all.wide_mult, wider_mode); - PUT_MODE (&all.wide_lshr, wider_mode); - XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode)); + PUT_MODE (&all.reg, mode); + PUT_MODE (&all.plus, mode); + PUT_MODE (&all.neg, mode); + PUT_MODE (&all.mult, mode); + PUT_MODE (&all.sdiv, mode); + PUT_MODE (&all.udiv, mode); + PUT_MODE (&all.sdiv_32, mode); + PUT_MODE (&all.smod_32, mode); + PUT_MODE (&all.wide_trunc, mode); + PUT_MODE (&all.shift, mode); + PUT_MODE (&all.shift_mult, mode); + PUT_MODE (&all.shift_add, mode); + PUT_MODE (&all.shift_sub, mode); - mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET); - mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET); - } + add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed); + neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed); + mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed); + sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed); + udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed); - shift_cost[mode][0] = 0; - shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode]; + sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed) + <= 2 * add_cost[speed][mode]); + smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed) + <= 4 * add_cost[speed][mode]); - n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode)); - for (m = 1; m < n; m++) - { - XEXP (&all.shift, 1) = cint[m]; - XEXP (&all.shift_mult, 1) = pow2[m]; + wider_mode = GET_MODE_WIDER_MODE (mode); + if (wider_mode != VOIDmode) + { + PUT_MODE (&all.zext, wider_mode); + PUT_MODE (&all.wide_mult, wider_mode); + PUT_MODE (&all.wide_lshr, wider_mode); + XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode)); - shift_cost[mode][m] = rtx_cost (&all.shift, SET); - shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET); - shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET); + mul_widen_cost[speed][wider_mode] + = rtx_cost (&all.wide_mult, SET, speed); + mul_highpart_cost[speed][mode] + = rtx_cost (&all.wide_trunc, SET, speed); + } + + shift_cost[speed][mode][0] = 0; + shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0] + = add_cost[speed][mode]; + + n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode)); + for (m = 1; m < n; m++) + { + XEXP (&all.shift, 1) = cint[m]; + XEXP (&all.shift_mult, 1) = pow2[m]; + + shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed); + shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed); + shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed); + } } } + default_rtl_profile (); } /* Return an rtx representing minus the value of X. @@ -2057,6 +2066,7 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, optab rrotate_optab = rotr_optab; enum machine_mode op1_mode; int attempt; + bool speed = optimize_insn_for_speed_p (); op1 = expand_normal (amount); op1_mode = GET_MODE (op1); @@ -2098,8 +2108,8 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode) && INTVAL (op1) < MAX_BITS_PER_WORD - && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode] - && shift_cost[mode][INTVAL (op1)] != MAX_COST) + && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode] + && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST) { int i; for (i = 0; i < INTVAL (op1); i++) @@ -2293,6 +2303,9 @@ struct alg_hash_entry { Otherwise, the cost within which multiplication by T is impossible. */ struct mult_cost cost; + + /* OPtimized for speed? */ + bool speed; }; /* The number of cache/hash entries. */ @@ -2346,6 +2359,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, int hash_index; bool cache_hit = false; enum alg_code cache_alg = alg_zero; + bool speed = optimize_insn_for_speed_p (); /* Indicate that no algorithm is yet found. If no algorithm is found, this value will be returned and indicate failure. */ @@ -2373,13 +2387,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, fail now. */ if (t == 0) { - if (MULT_COST_LESS (cost_limit, zero_cost)) + if (MULT_COST_LESS (cost_limit, zero_cost[speed])) return; else { alg_out->ops = 1; - alg_out->cost.cost = zero_cost; - alg_out->cost.latency = zero_cost; + alg_out->cost.cost = zero_cost[speed]; + alg_out->cost.latency = zero_cost[speed]; alg_out->op[0] = alg_zero; return; } @@ -2392,11 +2406,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, best_cost = *cost_limit; /* Compute the hash index. */ - hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES; + hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; /* See if we already know what to do for T. */ if (alg_hash[hash_index].t == t && alg_hash[hash_index].mode == mode + && alg_hash[hash_index].mode == mode + && alg_hash[hash_index].speed == speed && alg_hash[hash_index].alg != alg_unknown) { cache_alg = alg_hash[hash_index].alg; @@ -2465,10 +2481,10 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, q = t >> m; /* The function expand_shift will choose between a shift and a sequence of additions, so the observed cost is given as - MIN (m * add_cost[mode], shift_cost[mode][m]). */ - op_cost = m * add_cost[mode]; - if (shift_cost[mode][m] < op_cost) - op_cost = shift_cost[mode][m]; + MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */ + op_cost = m * add_cost[speed][mode]; + if (shift_cost[speed][mode][m] < op_cost) + op_cost = shift_cost[speed][mode][m]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, q, &new_limit, mode); @@ -2509,7 +2525,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, { /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ - op_cost = add_cost[mode]; + op_cost = add_cost[speed][mode]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, t + 1, &new_limit, mode); @@ -2529,7 +2545,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, { /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ - op_cost = add_cost[mode]; + op_cost = add_cost[speed][mode]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, t - 1, &new_limit, mode); @@ -2574,14 +2590,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, equal to its cost, otherwise assume that on superscalar hardware the shift may be executed concurrently with the earlier steps in the algorithm. */ - op_cost = add_cost[mode] + shift_cost[mode][m]; - if (shiftadd_cost[mode][m] < op_cost) + op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; + if (shiftadd_cost[speed][mode][m] < op_cost) { - op_cost = shiftadd_cost[mode][m]; + op_cost = shiftadd_cost[speed][mode][m]; op_latency = op_cost; } else - op_latency = add_cost[mode]; + op_latency = add_cost[speed][mode]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_latency; @@ -2613,14 +2629,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, equal to it's cost, otherwise assume that on superscalar hardware the shift may be executed concurrently with the earlier steps in the algorithm. */ - op_cost = add_cost[mode] + shift_cost[mode][m]; - if (shiftsub_cost[mode][m] < op_cost) + op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; + if (shiftsub_cost[speed][mode][m] < op_cost) { - op_cost = shiftsub_cost[mode][m]; + op_cost = shiftsub_cost[speed][mode][m]; op_latency = op_cost; } else - op_latency = add_cost[mode]; + op_latency = add_cost[speed][mode]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_latency; @@ -2654,7 +2670,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, m = exact_log2 (q); if (m >= 0 && m < maxm) { - op_cost = shiftadd_cost[mode][m]; + op_cost = shiftadd_cost[speed][mode][m]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); @@ -2679,7 +2695,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, m = exact_log2 (q); if (m >= 0 && m < maxm) { - op_cost = shiftsub_cost[mode][m]; + op_cost = shiftsub_cost[speed][mode][m]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); @@ -2710,6 +2726,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, caller. */ alg_hash[hash_index].t = t; alg_hash[hash_index].mode = mode; + alg_hash[hash_index].speed = speed; alg_hash[hash_index].alg = alg_impossible; alg_hash[hash_index].cost = *cost_limit; return; @@ -2720,6 +2737,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, { alg_hash[hash_index].t = t; alg_hash[hash_index].mode = mode; + alg_hash[hash_index].speed = speed; alg_hash[hash_index].alg = best_alg->op[best_alg->ops]; alg_hash[hash_index].cost.cost = best_cost.cost; alg_hash[hash_index].cost.latency = best_cost.latency; @@ -2759,6 +2777,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, struct algorithm alg2; struct mult_cost limit; int op_cost; + bool speed = optimize_insn_for_speed_p (); /* Fail quickly for impossible bounds. */ if (mult_cost < 0) @@ -2767,7 +2786,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, /* Ensure that mult_cost provides a reasonable upper bound. Any constant multiplication can be performed with less than 2 * bits additions. */ - op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode]; + op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode]; if (mult_cost > op_cost) mult_cost = op_cost; @@ -2780,7 +2799,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, `unsigned int' */ if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode)) { - op_cost = neg_cost[mode]; + op_cost = neg_cost[speed][mode]; if (MULT_COST_LESS (&alg->cost, mult_cost)) { limit.cost = alg->cost.cost - op_cost; @@ -2800,7 +2819,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, } /* This proves very useful for division-by-constant. */ - op_cost = add_cost[mode]; + op_cost = add_cost[speed][mode]; if (MULT_COST_LESS (&alg->cost, mult_cost)) { limit.cost = alg->cost.cost - op_cost; @@ -2988,6 +3007,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, enum mult_variant variant; struct algorithm algorithm; int max_cost; + bool speed = optimize_insn_for_speed_p (); /* Handling const0_rtx here allows us to use zero as a rogue value for coeff below. */ @@ -3029,8 +3049,8 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, result is interpreted as an unsigned coefficient. Exclude cost of op0 from max_cost to match the cost calculation of the synth_mult. */ - max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET) - - neg_cost[mode]; + max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed) + - neg_cost[speed][mode]; if (max_cost > 0 && choose_mult_variant (mode, -INTVAL (op1), &algorithm, &variant, max_cost)) @@ -3074,7 +3094,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, /* Exclude cost of op0 from max_cost to match the cost calculation of the synth_mult. */ - max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET); + max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed); if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) return expand_mult_const (mode, op0, coeff, target, @@ -3317,6 +3337,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, optab moptab; rtx tem; int size; + bool speed = optimize_insn_for_speed_p (); gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); @@ -3325,7 +3346,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Firstly, try using a multiplication insn that only generates the needed high part of the product, and in the sign flavor of unsignedp. */ - if (mul_highpart_cost[mode] < max_cost) + if (mul_highpart_cost[speed][mode] < max_cost) { moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; tem = expand_binop (mode, moptab, op0, narrow_op1, target, @@ -3337,8 +3358,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Secondly, same as above, but use sign flavor opposite of unsignedp. Need to adjust the result after the multiplication. */ if (size - 1 < BITS_PER_WORD - && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1] - + 4 * add_cost[mode] < max_cost)) + && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1] + + 4 * add_cost[speed][mode] < max_cost)) { moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; tem = expand_binop (mode, moptab, op0, narrow_op1, target, @@ -3352,7 +3373,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Try widening multiplication. */ moptab = unsignedp ? umul_widen_optab : smul_widen_optab; if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing - && mul_widen_cost[wider_mode] < max_cost) + && mul_widen_cost[speed][wider_mode] < max_cost) { tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, unsignedp, OPTAB_WIDEN); @@ -3363,7 +3384,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Try widening the mode and perform a non-widening multiplication. */ if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing && size - 1 < BITS_PER_WORD - && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost) + && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost) { rtx insns, wop0, wop1; @@ -3390,8 +3411,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, moptab = unsignedp ? smul_widen_optab : umul_widen_optab; if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing && size - 1 < BITS_PER_WORD - && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1] - + 4 * add_cost[mode] < max_cost)) + && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1] + + 4 * add_cost[speed][mode] < max_cost)) { tem = expand_binop (wider_mode, moptab, op0, narrow_op1, NULL_RTX, ! unsignedp, OPTAB_WIDEN); @@ -3429,6 +3450,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, enum mult_variant variant; struct algorithm alg; rtx tem; + bool speed = optimize_insn_for_speed_p (); gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); /* We can't support modes wider than HOST_BITS_PER_INT. */ @@ -3444,13 +3466,13 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, return expand_mult_highpart_optab (mode, op0, op1, target, unsignedp, max_cost); - extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1]; + extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1]; /* Check whether we try to multiply by a negative constant. */ if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) { sign_adjust = true; - extra_cost += add_cost[mode]; + extra_cost += add_cost[speed][mode]; } /* See whether shift/add multiplication is cheap enough. */ @@ -3510,7 +3532,7 @@ expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) temp = gen_rtx_LSHIFTRT (mode, result, shift); if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing - || rtx_cost (temp, SET) > COSTS_N_INSNS (2)) + || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2)) { temp = expand_binop (mode, xor_optab, op0, signmask, NULL_RTX, 1, OPTAB_LIB_WIDEN); @@ -3641,7 +3663,7 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) temp = gen_reg_rtx (mode); temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); - if (shift_cost[mode][ushift] > COSTS_N_INSNS (1)) + if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1)) temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1), NULL_RTX, 0, OPTAB_LIB_WIDEN); else @@ -3714,6 +3736,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, int max_cost, extra_cost; static HOST_WIDE_INT last_div_const = 0; static HOST_WIDE_INT ext_op1; + bool speed = optimize_insn_for_speed_p (); op1_is_constant = GET_CODE (op1) == CONST_INT; if (op1_is_constant) @@ -3844,10 +3867,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, /* Only deduct something for a REM if the last divide done was for a different constant. Then set the constant of the last divide. */ - max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode]; + max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode]; if (rem_flag && ! (last_div_const != 0 && op1_is_constant && INTVAL (op1) == last_div_const)) - max_cost -= mul_cost[compute_mode] + add_cost[compute_mode]; + max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode]; last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; @@ -3966,9 +3989,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, goto fail1; extra_cost - = (shift_cost[compute_mode][post_shift - 1] - + shift_cost[compute_mode][1] - + 2 * add_cost[compute_mode]); + = (shift_cost[speed][compute_mode][post_shift - 1] + + shift_cost[speed][compute_mode][1] + + 2 * add_cost[speed][compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, ml, NULL_RTX, 1, max_cost - extra_cost); @@ -4002,8 +4025,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, build_int_cst (NULL_TREE, pre_shift), NULL_RTX, 1); extra_cost - = (shift_cost[compute_mode][pre_shift] - + shift_cost[compute_mode][post_shift]); + = (shift_cost[speed][compute_mode][pre_shift] + + shift_cost[speed][compute_mode][post_shift]); t2 = expand_mult_highpart (compute_mode, t1, ml, NULL_RTX, 1, max_cost - extra_cost); @@ -4133,9 +4156,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, || size - 1 >= BITS_PER_WORD) goto fail1; - extra_cost = (shift_cost[compute_mode][post_shift] - + shift_cost[compute_mode][size - 1] - + add_cost[compute_mode]); + extra_cost = (shift_cost[speed][compute_mode][post_shift] + + shift_cost[speed][compute_mode][size - 1] + + add_cost[speed][compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, mlr, NULL_RTX, 0, max_cost - extra_cost); @@ -4170,9 +4193,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); mlr = gen_int_mode (ml, compute_mode); - extra_cost = (shift_cost[compute_mode][post_shift] - + shift_cost[compute_mode][size - 1] - + 2 * add_cost[compute_mode]); + extra_cost = (shift_cost[speed][compute_mode][post_shift] + + shift_cost[speed][compute_mode][size - 1] + + 2 * add_cost[speed][compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, mlr, NULL_RTX, 0, max_cost - extra_cost); @@ -4265,9 +4288,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, NULL_RTX, 0); t2 = expand_binop (compute_mode, xor_optab, op0, t1, NULL_RTX, 0, OPTAB_WIDEN); - extra_cost = (shift_cost[compute_mode][post_shift] - + shift_cost[compute_mode][size - 1] - + 2 * add_cost[compute_mode]); + extra_cost = (shift_cost[speed][compute_mode][post_shift] + + shift_cost[speed][compute_mode][size - 1] + + 2 * add_cost[speed][compute_mode]); t3 = expand_mult_highpart (compute_mode, t2, ml, NULL_RTX, 1, max_cost - extra_cost); diff --git a/gcc/expr.c b/gcc/expr.c index 908d5641953..07685b22899 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -3458,13 +3458,14 @@ compress_float_constant (rtx x, rtx y) enum machine_mode srcmode; REAL_VALUE_TYPE r; int oldcost, newcost; + bool speed = optimize_insn_for_speed_p (); REAL_VALUE_FROM_CONST_DOUBLE (r, y); if (LEGITIMATE_CONSTANT_P (y)) - oldcost = rtx_cost (y, SET); + oldcost = rtx_cost (y, SET, speed); else - oldcost = rtx_cost (force_const_mem (dstmode, y), SET); + oldcost = rtx_cost (force_const_mem (dstmode, y), SET, speed); for (srcmode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (orig_srcmode)); srcmode != orig_srcmode; @@ -3491,7 +3492,7 @@ compress_float_constant (rtx x, rtx y) if (! (*insn_data[ic].operand[1].predicate) (trunc_y, srcmode)) continue; /* This is valid, but may not be cheaper than the original. */ - newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET); + newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET, speed); if (oldcost < newcost) continue; } @@ -3499,7 +3500,7 @@ compress_float_constant (rtx x, rtx y) { trunc_y = force_const_mem (srcmode, trunc_y); /* This is valid, but may not be cheaper than the original. */ - newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET); + newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET, speed); if (oldcost < newcost) continue; trunc_y = validize_mem (trunc_y); diff --git a/gcc/fwprop.c b/gcc/fwprop.c index 7c61ca008e5..c546f3e15a1 100644 --- a/gcc/fwprop.c +++ b/gcc/fwprop.c @@ -184,7 +184,8 @@ canonicalize_address (rtx x) for a memory access in the given MODE. */ static bool -should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode) +should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode, + bool speed) { int gain; @@ -196,14 +197,14 @@ should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode) return true; /* Prefer the new address if it is less expensive. */ - gain = address_cost (old_rtx, mode) - address_cost (new_rtx, mode); + gain = address_cost (old_rtx, mode, speed) - address_cost (new_rtx, mode, speed); /* If the addresses have equivalent cost, prefer the new address if it has the highest `rtx_cost'. That has the potential of eliminating the most insns without additional costs, and it is the same that cse.c used to do. */ if (gain == 0) - gain = rtx_cost (new_rtx, SET) - rtx_cost (old_rtx, SET); + gain = rtx_cost (new_rtx, SET, speed) - rtx_cost (old_rtx, SET, speed); return (gain > 0); } @@ -231,7 +232,10 @@ enum { PR_HANDLE_MEM is set when the source of the propagation was not another MEM. Then, it is safe not to treat non-read-only MEMs as ``opaque'' objects. */ - PR_HANDLE_MEM = 2 + PR_HANDLE_MEM = 2, + + /* Set when costs should be optimized for speed. */ + PR_OPTIMIZE_FOR_SPEED = 4 }; @@ -360,7 +364,8 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) /* Copy propagations are always ok. Otherwise check the costs. */ if (!(REG_P (old_rtx) && REG_P (new_rtx)) - && !should_replace_address (op0, new_op0, GET_MODE (x))) + && !should_replace_address (op0, new_op0, GET_MODE (x), + flags & PR_OPTIMIZE_FOR_SPEED)) return true; tem = replace_equiv_address_nv (x, new_op0); @@ -438,7 +443,8 @@ varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED) Otherwise, we accept simplifications that have a lower or equal cost. */ static rtx -propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx) +propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx, + bool speed) { rtx tem; bool collapsed; @@ -453,6 +459,9 @@ propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx) if (!for_each_rtx (&new_rtx, varying_mem_p, NULL)) flags |= PR_HANDLE_MEM; + if (speed) + flags |= PR_OPTIMIZE_FOR_SPEED; + tem = x; collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags); if (tem == x || !collapsed) @@ -728,7 +737,8 @@ try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new_rtx, rtx def_insn, bool enum df_ref_type type = DF_REF_TYPE (use); int flags = DF_REF_FLAGS (use); rtx set = single_set (insn); - int old_cost = rtx_cost (SET_SRC (set), SET); + bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); + int old_cost = rtx_cost (SET_SRC (set), SET, speed); bool ok; if (dump_file) @@ -750,7 +760,7 @@ try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new_rtx, rtx def_insn, bool } else if (DF_REF_TYPE (use) == DF_REF_REG_USE - && rtx_cost (SET_SRC (set), SET) > old_cost) + && rtx_cost (SET_SRC (set), SET, speed) > old_cost) { if (dump_file) fprintf (dump_file, "Changes to insn %d not profitable\n", @@ -928,7 +938,8 @@ forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set) else mode = GET_MODE (*loc); - new_rtx = propagate_rtx (*loc, mode, reg, src); + new_rtx = propagate_rtx (*loc, mode, reg, src, + optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn))); if (!new_rtx) return false; diff --git a/gcc/hooks.c b/gcc/hooks.c index 50761b69ca5..04dbd3eab2a 100644 --- a/gcc/hooks.c +++ b/gcc/hooks.c @@ -153,6 +153,12 @@ hook_int_rtx_0 (rtx a ATTRIBUTE_UNUSED) return 0; } +int +hook_int_rtx_bool_0 (rtx a ATTRIBUTE_UNUSED, bool b ATTRIBUTE_UNUSED) +{ + return 0; +} + int hook_int_size_t_constcharptr_int_0 (size_t a ATTRIBUTE_UNUSED, const char *b ATTRIBUTE_UNUSED, @@ -233,10 +239,11 @@ hook_bool_uintp_uintp_false (unsigned int *a ATTRIBUTE_UNUSED, } bool -hook_bool_rtx_int_int_intp_false (rtx a ATTRIBUTE_UNUSED, - int b ATTRIBUTE_UNUSED, - int c ATTRIBUTE_UNUSED, - int *d ATTRIBUTE_UNUSED) +hook_bool_rtx_int_int_intp_bool_false (rtx a ATTRIBUTE_UNUSED, + int b ATTRIBUTE_UNUSED, + int c ATTRIBUTE_UNUSED, + int *d ATTRIBUTE_UNUSED, + bool speed_p ATTRIBUTE_UNUSED) { return false; } diff --git a/gcc/hooks.h b/gcc/hooks.h index d6bbc4c2f07..9d7e56a3589 100644 --- a/gcc/hooks.h +++ b/gcc/hooks.h @@ -44,7 +44,7 @@ extern bool hook_bool_const_tree_hwi_hwi_const_tree_true (const_tree, const_tree); extern bool hook_bool_rtx_false (rtx); extern bool hook_bool_uintp_uintp_false (unsigned int *, unsigned int *); -extern bool hook_bool_rtx_int_int_intp_false (rtx, int, int, int *); +extern bool hook_bool_rtx_int_int_intp_bool_false (rtx, int, int, int *, bool); extern bool hook_bool_constcharptr_size_t_false (const char *, size_t); extern bool hook_bool_size_t_constcharptr_int_true (size_t, const char *, int); extern bool hook_bool_tree_tree_false (tree, tree); @@ -59,6 +59,7 @@ extern void hook_void_tree_treeptr (tree, tree *); extern int hook_int_const_tree_0 (const_tree); extern int hook_int_const_tree_const_tree_1 (const_tree, const_tree); extern int hook_int_rtx_0 (rtx); +extern int hook_int_rtx_bool_0 (rtx, bool); extern int hook_int_size_t_constcharptr_int_0 (size_t, const char *, int); extern int hook_int_void_no_regs (void); diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c index 51fc48c6043..845fa2104c2 100644 --- a/gcc/ifcvt.c +++ b/gcc/ifcvt.c @@ -143,12 +143,13 @@ cheap_bb_rtx_cost_p (const_basic_block bb, int max_cost) { int count = 0; rtx insn = BB_HEAD (bb); + bool speed = optimize_bb_for_speed_p (bb); while (1) { if (NONJUMP_INSN_P (insn)) { - int cost = insn_rtx_cost (PATTERN (insn)); + int cost = insn_rtx_cost (PATTERN (insn), speed); if (cost == 0) return false; @@ -1351,7 +1352,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info) if insn_rtx_cost can't be estimated. */ if (insn_a) { - insn_cost = insn_rtx_cost (PATTERN (insn_a)); + insn_cost = insn_rtx_cost (PATTERN (insn_a), + optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn_a))); if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost)) return FALSE; } @@ -1360,7 +1362,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info) if (insn_b) { - insn_cost += insn_rtx_cost (PATTERN (insn_b)); + insn_cost += insn_rtx_cost (PATTERN (insn_b), + optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn_b))); if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost)) return FALSE; } @@ -1901,7 +1904,8 @@ noce_try_sign_mask (struct noce_if_info *if_info) INSN_B which can happen for e.g. conditional stores to memory. */ b_unconditional = (if_info->insn_b == NULL_RTX || BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb); - if (rtx_cost (t, SET) >= COSTS_N_INSNS (2) + if (rtx_cost (t, SET, optimize_bb_for_speed_p (BLOCK_FOR_INSN (if_info->insn_b))) + >= COSTS_N_INSNS (2) && (!b_unconditional || t != if_info->b)) return FALSE; diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c index 293b3ae3776..a039f367925 100644 --- a/gcc/loop-doloop.c +++ b/gcc/loop-doloop.c @@ -586,7 +586,8 @@ doloop_optimize (struct loop *loop) max_cost = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST)); - if (rtx_cost (desc->niter_expr, SET) > max_cost) + if (rtx_cost (desc->niter_expr, SET, optimize_loop_for_speed_p (loop)) + > max_cost) { if (dump_file) fprintf (dump_file, diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c index 40e70ba18c0..524727e9a37 100644 --- a/gcc/loop-invariant.c +++ b/gcc/loop-invariant.c @@ -668,6 +668,7 @@ create_new_invariant (struct def *def, rtx insn, bitmap depends_on, { struct invariant *inv = XNEW (struct invariant); rtx set = single_set (insn); + bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); inv->def = def; inv->always_executed = always_executed; @@ -676,9 +677,9 @@ create_new_invariant (struct def *def, rtx insn, bitmap depends_on, /* If the set is simple, usually by moving it we move the whole store out of the loop. Otherwise we save only cost of the computation. */ if (def) - inv->cost = rtx_cost (set, SET); + inv->cost = rtx_cost (set, SET, speed); else - inv->cost = rtx_cost (SET_SRC (set), SET); + inv->cost = rtx_cost (SET_SRC (set), SET, speed); inv->move = false; inv->reg = NULL_RTX; @@ -1048,15 +1049,15 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed) static int gain_for_invariant (struct invariant *inv, unsigned *regs_needed, - unsigned new_regs, unsigned regs_used) + unsigned new_regs, unsigned regs_used, bool speed) { int comp_cost, size_cost; get_inv_cost (inv, &comp_cost, regs_needed); actual_stamp++; - size_cost = (estimate_reg_pressure_cost (new_regs + *regs_needed, regs_used) - - estimate_reg_pressure_cost (new_regs, regs_used)); + size_cost = (estimate_reg_pressure_cost (new_regs + *regs_needed, regs_used, speed) + - estimate_reg_pressure_cost (new_regs, regs_used, speed)); return comp_cost - size_cost; } @@ -1069,7 +1070,7 @@ gain_for_invariant (struct invariant *inv, unsigned *regs_needed, static int best_gain_for_invariant (struct invariant **best, unsigned *regs_needed, - unsigned new_regs, unsigned regs_used) + unsigned new_regs, unsigned regs_used, bool speed) { struct invariant *inv; int gain = 0, again; @@ -1084,7 +1085,8 @@ best_gain_for_invariant (struct invariant **best, unsigned *regs_needed, if (inv->eqto != inv->invno) continue; - again = gain_for_invariant (inv, &aregs_needed, new_regs, regs_used); + again = gain_for_invariant (inv, &aregs_needed, new_regs, regs_used, + speed); if (again > gain) { gain = again; @@ -1123,7 +1125,7 @@ set_move_mark (unsigned invno) /* Determines which invariants to move. */ static void -find_invariants_to_move (void) +find_invariants_to_move (bool speed) { unsigned i, regs_used, regs_needed = 0, new_regs; struct invariant *inv = NULL; @@ -1147,7 +1149,7 @@ find_invariants_to_move (void) } new_regs = 0; - while (best_gain_for_invariant (&inv, ®s_needed, new_regs, regs_used) > 0) + while (best_gain_for_invariant (&inv, ®s_needed, new_regs, regs_used, speed) > 0) { set_move_mark (inv->invno); new_regs += regs_needed; @@ -1314,7 +1316,7 @@ move_single_loop_invariants (struct loop *loop) init_inv_motion_data (); find_invariants (loop); - find_invariants_to_move (); + find_invariants_to_move (optimize_loop_for_speed_p (loop)); move_invariants (loop); free_inv_motion_data (); diff --git a/gcc/optabs.c b/gcc/optabs.c index 0c3b9caeed5..12731039091 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -1414,7 +1414,8 @@ avoid_expensive_constant (enum machine_mode mode, optab binoptab, if (mode != VOIDmode && optimize && CONSTANT_P (x) - && rtx_cost (x, binoptab->code) > COSTS_N_INSNS (1)) + && rtx_cost (x, binoptab->code, optimize_insn_for_speed_p ()) + > COSTS_N_INSNS (1)) { if (GET_CODE (x) == CONST_INT) { @@ -4037,11 +4038,13 @@ prepare_cmp_insn (rtx *px, rtx *py, enum rtx_code *pcomparison, rtx size, /* If we are inside an appropriately-short loop and we are optimizing, force expensive constants into a register. */ if (CONSTANT_P (x) && optimize - && rtx_cost (x, COMPARE) > COSTS_N_INSNS (1)) + && (rtx_cost (x, COMPARE, optimize_insn_for_speed_p ()) + > COSTS_N_INSNS (1))) x = force_reg (mode, x); if (CONSTANT_P (y) && optimize - && rtx_cost (y, COMPARE) > COSTS_N_INSNS (1)) + && (rtx_cost (y, COMPARE, optimize_insn_for_speed_p ()) + > COSTS_N_INSNS (1))) y = force_reg (mode, y); #ifdef HAVE_cc0 diff --git a/gcc/output.h b/gcc/output.h index 15daf5b8d0d..51adcd22648 100644 --- a/gcc/output.h +++ b/gcc/output.h @@ -621,7 +621,7 @@ extern void default_elf_asm_output_external (FILE *file, tree, const char *); extern int maybe_assemble_visibility (tree); -extern int default_address_cost (rtx); +extern int default_address_cost (rtx, bool); /* dbxout helper functions */ #if defined DBX_DEBUGGING_INFO || defined XCOFF_DEBUGGING_INFO diff --git a/gcc/postreload.c b/gcc/postreload.c index 7659bab435a..8abc90f83d9 100644 --- a/gcc/postreload.c +++ b/gcc/postreload.c @@ -233,6 +233,7 @@ reload_cse_simplify_set (rtx set, rtx insn) #ifdef LOAD_EXTEND_OP enum rtx_code extend_op = UNKNOWN; #endif + bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); dreg = true_regnum (SET_DEST (set)); if (dreg < 0) @@ -267,7 +268,7 @@ reload_cse_simplify_set (rtx set, rtx insn) old_cost = REGISTER_MOVE_COST (GET_MODE (src), REGNO_REG_CLASS (REGNO (src)), dclass); else - old_cost = rtx_cost (src, SET); + old_cost = rtx_cost (src, SET, speed); for (l = val->locs; l; l = l->next) { @@ -302,7 +303,7 @@ reload_cse_simplify_set (rtx set, rtx insn) this_rtx = GEN_INT (this_val); } #endif - this_cost = rtx_cost (this_rtx, SET); + this_cost = rtx_cost (this_rtx, SET, speed); } else if (REG_P (this_rtx)) { @@ -310,7 +311,7 @@ reload_cse_simplify_set (rtx set, rtx insn) if (extend_op != UNKNOWN) { this_rtx = gen_rtx_fmt_e (extend_op, word_mode, this_rtx); - this_cost = rtx_cost (this_rtx, SET); + this_cost = rtx_cost (this_rtx, SET, speed); } else #endif @@ -570,8 +571,10 @@ reload_cse_simplify_operands (rtx insn, rtx testreg) if (op_alt_regno[i][j] == -1 && reg_fits_class_p (testreg, rclass, 0, mode) && (GET_CODE (recog_data.operand[i]) != CONST_INT - || (rtx_cost (recog_data.operand[i], SET) - > rtx_cost (testreg, SET)))) + || (rtx_cost (recog_data.operand[i], SET, + optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn))) + > rtx_cost (testreg, SET, + optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)))))) { alternative_nregs[j]++; op_alt_regno[i][j] = regno; @@ -1240,6 +1243,8 @@ reload_cse_move2add (rtx first) { rtx new_src = gen_int_mode (INTVAL (src) - reg_offset[regno], GET_MODE (reg)); + bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); + /* (set (reg) (plus (reg) (const_int 0))) is not canonical; use (set (reg) (reg)) instead. We don't delete this insn, nor do we convert it into a @@ -1255,7 +1260,7 @@ reload_cse_move2add (rtx first) if (INTVAL (src) == reg_offset [regno]) validate_change (insn, &SET_SRC (pat), reg, 0); } - else if (rtx_cost (new_src, PLUS) < rtx_cost (src, SET) + else if (rtx_cost (new_src, PLUS, speed) < rtx_cost (src, SET, speed) && have_add2_insn (reg, new_src)) { rtx tem = gen_rtx_PLUS (GET_MODE (reg), reg, new_src); @@ -1331,14 +1336,15 @@ reload_cse_move2add (rtx first) + base_offset - regno_offset, GET_MODE (reg)); - int success = 0; + bool success = false; + bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); if (new_src == const0_rtx) /* See above why we create (set (reg) (reg)) here. */ success = validate_change (next, &SET_SRC (set), reg, 0); - else if ((rtx_cost (new_src, PLUS) - < COSTS_N_INSNS (1) + rtx_cost (src3, SET)) + else if ((rtx_cost (new_src, PLUS, speed) + < COSTS_N_INSNS (1) + rtx_cost (src3, SET, speed)) && have_add2_insn (reg, new_src)) { rtx newpat = gen_rtx_SET (VOIDmode, diff --git a/gcc/rtl.h b/gcc/rtl.h index 9e7d40ad6e5..8b6478984bb 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -1074,8 +1074,8 @@ rhs_regno (const_rtx x) #define MAX_COST INT_MAX extern void init_rtlanal (void); -extern int rtx_cost (rtx, enum rtx_code); -extern int address_cost (rtx, enum machine_mode); +extern int rtx_cost (rtx, enum rtx_code, bool); +extern int address_cost (rtx, enum machine_mode, bool); extern unsigned int subreg_lsb (const_rtx); extern unsigned int subreg_lsb_1 (enum machine_mode, enum machine_mode, unsigned int); @@ -1784,7 +1784,7 @@ extern int loc_mentioned_in_p (rtx *, const_rtx); extern rtx find_first_parameter_load (rtx, rtx); extern bool keep_with_call_p (const_rtx); extern bool label_is_jump_target_p (const_rtx, const_rtx); -extern int insn_rtx_cost (rtx); +extern int insn_rtx_cost (rtx, bool); /* Given an insn and condition, return a canonical description of the test being made. */ @@ -2339,5 +2339,7 @@ extern void insn_locators_finalize (void); extern void set_curr_insn_source_location (location_t); extern void set_curr_insn_block (tree); extern int curr_insn_locator (void); +extern bool optimize_insn_for_size_p (void); +extern bool optimize_insn_for_speed_p (void); #endif /* ! GCC_RTL_H */ diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index 9c5a1e53c7f..b2038aa840f 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -3501,10 +3501,13 @@ label_is_jump_target_p (const_rtx label, const_rtx jump_insn) /* Return an estimate of the cost of computing rtx X. One use is in cse, to decide which expression to keep in the hash table. Another is in rtl generation, to pick the cheapest way to multiply. - Other uses like the latter are expected in the future. */ + Other uses like the latter are expected in the future. + + SPEED parameter specify whether costs optimized for speed or size should + be returned. */ int -rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED) +rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED, bool speed) { int i, j; enum rtx_code code; @@ -3552,7 +3555,7 @@ rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED) break; default: - if (targetm.rtx_costs (x, code, outer_code, &total)) + if (targetm.rtx_costs (x, code, outer_code, &total, speed)) return total; break; } @@ -3563,19 +3566,22 @@ rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED) fmt = GET_RTX_FORMAT (code); for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) if (fmt[i] == 'e') - total += rtx_cost (XEXP (x, i), code); + total += rtx_cost (XEXP (x, i), code, speed); else if (fmt[i] == 'E') for (j = 0; j < XVECLEN (x, i); j++) - total += rtx_cost (XVECEXP (x, i, j), code); + total += rtx_cost (XVECEXP (x, i, j), code, speed); return total; } /* Return cost of address expression X. - Expect that X is properly formed address reference. */ + Expect that X is properly formed address reference. + + SPEED parameter specify whether costs optimized for speed or size should + be returned. */ int -address_cost (rtx x, enum machine_mode mode) +address_cost (rtx x, enum machine_mode mode, bool speed) { /* We may be asked for cost of various unusual addresses, such as operands of push instruction. It is not worthwhile to complicate writing @@ -3584,15 +3590,15 @@ address_cost (rtx x, enum machine_mode mode) if (!memory_address_p (mode, x)) return 1000; - return targetm.address_cost (x); + return targetm.address_cost (x, speed); } /* If the target doesn't override, compute the cost as with arithmetic. */ int -default_address_cost (rtx x) +default_address_cost (rtx x, bool speed) { - return rtx_cost (x, MEM); + return rtx_cost (x, MEM, speed); } @@ -4563,7 +4569,7 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x, zero indicates an instruction pattern without a known cost. */ int -insn_rtx_cost (rtx pat) +insn_rtx_cost (rtx pat, bool speed) { int i, cost; rtx set; @@ -4591,7 +4597,7 @@ insn_rtx_cost (rtx pat) else return 0; - cost = rtx_cost (SET_SRC (set), SET); + cost = rtx_cost (SET_SRC (set), SET, speed); return cost > 0 ? cost : COSTS_N_INSNS (1); } diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 606a850622c..30bafea8ab1 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -1665,12 +1665,13 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx coeff; unsigned HOST_WIDE_INT l; HOST_WIDE_INT h; + bool speed = optimize_function_for_speed_p (cfun); add_double (coeff0l, coeff0h, coeff1l, coeff1h, &l, &h); coeff = immed_double_const (l, h, mode); tem = simplify_gen_binary (MULT, mode, lhs, coeff); - return rtx_cost (tem, SET) <= rtx_cost (orig, SET) + return rtx_cost (tem, SET, speed) <= rtx_cost (orig, SET, speed) ? tem : 0; } } @@ -1859,12 +1860,13 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx coeff; unsigned HOST_WIDE_INT l; HOST_WIDE_INT h; + bool speed = optimize_function_for_speed_p (cfun); add_double (coeff0l, coeff0h, negcoeff1l, negcoeff1h, &l, &h); coeff = immed_double_const (l, h, mode); tem = simplify_gen_binary (MULT, mode, lhs, coeff); - return rtx_cost (tem, SET) <= rtx_cost (orig, SET) + return rtx_cost (tem, SET, speed) <= rtx_cost (orig, SET, speed) ? tem : 0; } } diff --git a/gcc/stmt.c b/gcc/stmt.c index 2464466b376..ee880964fe1 100644 --- a/gcc/stmt.c +++ b/gcc/stmt.c @@ -2125,7 +2125,8 @@ bool lshift_cheap_p (void) if (!init) { rtx reg = gen_rtx_REG (word_mode, 10000); - int cost = rtx_cost (gen_rtx_ASHIFT (word_mode, const1_rtx, reg), SET); + int cost = rtx_cost (gen_rtx_ASHIFT (word_mode, const1_rtx, reg), SET, + optimize_insn_for_speed_p ()); cheap = cost < COSTS_N_INSNS (3); init = true; } diff --git a/gcc/target-def.h b/gcc/target-def.h index 817cae4a1d2..740efc55703 100644 --- a/gcc/target-def.h +++ b/gcc/target-def.h @@ -481,7 +481,7 @@ #define TARGET_MS_BITFIELD_LAYOUT_P hook_bool_const_tree_false #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_false #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false -#define TARGET_RTX_COSTS hook_bool_rtx_int_int_intp_false +#define TARGET_RTX_COSTS hook_bool_rtx_int_int_intp_bool_false #define TARGET_MANGLE_TYPE hook_constcharptr_const_tree_null #define TARGET_ALLOCATE_INITIAL_VALUE NULL diff --git a/gcc/target.h b/gcc/target.h index a44eb921a60..69b270fc43a 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -648,11 +648,11 @@ struct gcc_target scanned. In either case, *TOTAL contains the cost result. */ /* Note that CODE and OUTER_CODE ought to be RTX_CODE, but that's not necessarily defined at this point. */ - bool (* rtx_costs) (rtx x, int code, int outer_code, int *total); + bool (* rtx_costs) (rtx x, int code, int outer_code, int *total, bool speed); /* Compute the cost of X, used as an address. Never called with invalid addresses. */ - int (* address_cost) (rtx x); + int (* address_cost) (rtx x, bool speed); /* Return where to allocate pseudo for a given hard register initial value. */ diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index 1e94037f737..eec48d9183e 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -1124,7 +1124,7 @@ extern void tree_check_data_deps (void); bool expr_invariant_in_loop_p (struct loop *, tree); bool stmt_invariant_in_loop_p (struct loop *, gimple); bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode); -unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode); +unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode, bool); /* In tree-ssa-threadupdate.c. */ extern bool thread_through_all_blocks (bool); @@ -1155,7 +1155,7 @@ struct mem_address struct affine_tree_combination; tree create_mem_ref (gimple_stmt_iterator *, tree, - struct affine_tree_combination *); + struct affine_tree_combination *, bool); rtx addr_for_mem_ref (struct mem_address *, bool); void get_address_description (tree, struct mem_address *); tree maybe_fold_tmr (tree); diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c index 198adb409e6..df4f5c9296f 100644 --- a/gcc/tree-ssa-address.c +++ b/gcc/tree-ssa-address.c @@ -437,7 +437,8 @@ add_to_parts (struct mem_address *parts, tree elt) element(s) to PARTS. */ static void -most_expensive_mult_to_index (struct mem_address *parts, aff_tree *addr) +most_expensive_mult_to_index (struct mem_address *parts, aff_tree *addr, + bool speed) { HOST_WIDE_INT coef; double_int best_mult, amult, amult_neg; @@ -459,7 +460,7 @@ most_expensive_mult_to_index (struct mem_address *parts, aff_tree *addr) || !multiplier_allowed_in_address_p (coef, Pmode)) continue; - acost = multiply_by_cost (coef, Pmode); + acost = multiply_by_cost (coef, Pmode, speed); if (acost > best_mult_cost) { @@ -512,7 +513,7 @@ most_expensive_mult_to_index (struct mem_address *parts, aff_tree *addr) addressing modes is useless. */ static void -addr_to_parts (aff_tree *addr, struct mem_address *parts) +addr_to_parts (aff_tree *addr, struct mem_address *parts, bool speed) { tree part; unsigned i; @@ -532,7 +533,7 @@ addr_to_parts (aff_tree *addr, struct mem_address *parts) /* First move the most expensive feasible multiplication to index. */ - most_expensive_mult_to_index (parts, addr); + most_expensive_mult_to_index (parts, addr, speed); /* Try to find a base of the reference. Since at the moment there is no reliable way how to distinguish between pointer and its @@ -573,13 +574,14 @@ gimplify_mem_ref_parts (gimple_stmt_iterator *gsi, struct mem_address *parts) of created memory reference. */ tree -create_mem_ref (gimple_stmt_iterator *gsi, tree type, aff_tree *addr) +create_mem_ref (gimple_stmt_iterator *gsi, tree type, aff_tree *addr, + bool speed) { tree mem_ref, tmp; tree atype; struct mem_address parts; - addr_to_parts (addr, &parts); + addr_to_parts (addr, &parts, speed); gimplify_mem_ref_parts (gsi, &parts); mem_ref = create_mem_ref_raw (type, &parts); if (mem_ref) diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 1a1e58b1a81..8fbb27a5667 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -219,6 +219,9 @@ struct ivopts_data /* The currently optimized loop. */ struct loop *current_loop; + /* Are we optimizing for speed? */ + bool speed; + /* Number of registers used in it. */ unsigned regs_used; @@ -2539,7 +2542,7 @@ get_use_iv_cost (struct ivopts_data *data, struct iv_use *use, /* Returns estimate on cost of computing SEQ. */ static unsigned -seq_cost (rtx seq) +seq_cost (rtx seq, bool speed) { unsigned cost = 0; rtx set; @@ -2548,7 +2551,7 @@ seq_cost (rtx seq) { set = single_set (seq); if (set) - cost += rtx_cost (set, SET); + cost += rtx_cost (set, SET,speed); else cost++; } @@ -2641,23 +2644,28 @@ prepare_decl_rtl (tree *expr_p, int *ws, void *data) /* Determines cost of the computation of EXPR. */ static unsigned -computation_cost (tree expr) +computation_cost (tree expr, bool speed) { rtx seq, rslt; tree type = TREE_TYPE (expr); unsigned cost; /* Avoid using hard regs in ways which may be unsupported. */ int regno = LAST_VIRTUAL_REGISTER + 1; + enum function_frequency real_frequency = cfun->function_frequency; + cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL; + crtl->maybe_hot_insn_p = speed; walk_tree (&expr, prepare_decl_rtl, ®no, NULL); start_sequence (); rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); seq = get_insns (); end_sequence (); + default_rtl_profile (); + cfun->function_frequency = real_frequency; - cost = seq_cost (seq); + cost = seq_cost (seq, speed); if (MEM_P (rslt)) - cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type)); + cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed); return cost; } @@ -2833,7 +2841,7 @@ get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand) /* Returns cost of addition in MODE. */ static unsigned -add_cost (enum machine_mode mode) +add_cost (enum machine_mode mode, bool speed) { static unsigned costs[NUM_MACHINE_MODES]; rtx seq; @@ -2850,7 +2858,7 @@ add_cost (enum machine_mode mode) seq = get_insns (); end_sequence (); - cost = seq_cost (seq); + cost = seq_cost (seq, speed); if (!cost) cost = 1; @@ -2895,7 +2903,7 @@ mbc_entry_eq (const void *entry1, const void *entry2) /* Returns cost of multiplication by constant CST in MODE. */ unsigned -multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode) +multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed) { static htab_t costs; struct mbc_entry **cached, act; @@ -2921,7 +2929,7 @@ multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode) seq = get_insns (); end_sequence (); - cost = seq_cost (seq); + cost = seq_cost (seq, speed); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Multiplication by %d in %s costs %d\n", @@ -2984,7 +2992,8 @@ multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode) static comp_cost get_address_cost (bool symbol_present, bool var_present, unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio, - enum machine_mode mem_mode) + enum machine_mode mem_mode, + bool speed) { static bool initialized[MAX_MACHINE_MODE]; static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE]; @@ -3100,8 +3109,8 @@ get_address_cost (bool symbol_present, bool var_present, seq = get_insns (); end_sequence (); - acost = seq_cost (seq); - acost += address_cost (addr, mem_mode); + acost = seq_cost (seq, speed); + acost += address_cost (addr, mem_mode, speed); if (!acost) acost = 1; @@ -3120,7 +3129,7 @@ get_address_cost (bool symbol_present, bool var_present, If VAR_PRESENT is true, try whether the mode with SYMBOL_PRESENT = false is cheaper even with cost of addition, and if this is the case, use it. */ - add_c = add_cost (Pmode); + add_c = add_cost (Pmode, speed); for (i = 0; i < 8; i++) { var_p = i & 1; @@ -3178,10 +3187,10 @@ get_address_cost (bool symbol_present, bool var_present, && multiplier_allowed_in_address_p (ratio, mem_mode)); if (ratio != 1 && !ratio_p) - cost += multiply_by_cost (ratio, Pmode); + cost += multiply_by_cost (ratio, Pmode, speed); if (s_offset && !offset_p && !symbol_present) - cost += add_cost (Pmode); + cost += add_cost (Pmode, speed); acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p]; complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p; @@ -3191,12 +3200,12 @@ get_address_cost (bool symbol_present, bool var_present, /* Estimates cost of forcing expression EXPR into a variable. */ static comp_cost -force_expr_to_var_cost (tree expr) +force_expr_to_var_cost (tree expr, bool speed) { static bool costs_initialized = false; - static unsigned integer_cost; - static unsigned symbol_cost; - static unsigned address_cost; + static unsigned integer_cost [2]; + static unsigned symbol_cost [2]; + static unsigned address_cost [2]; tree op0, op1; comp_cost cost0, cost1, cost; enum machine_mode mode; @@ -3206,30 +3215,36 @@ force_expr_to_var_cost (tree expr) tree type = build_pointer_type (integer_type_node); tree var, addr; rtx x; + int i; var = create_tmp_var_raw (integer_type_node, "test_var"); TREE_STATIC (var) = 1; x = produce_memory_decl_rtl (var, NULL); SET_DECL_RTL (var, x); - integer_cost = computation_cost (build_int_cst (integer_type_node, - 2000)); - addr = build1 (ADDR_EXPR, type, var); - symbol_cost = computation_cost (addr) + 1; - address_cost - = computation_cost (build2 (POINTER_PLUS_EXPR, type, - addr, - build_int_cst (sizetype, 2000))) + 1; - if (dump_file && (dump_flags & TDF_DETAILS)) + + for (i = 0; i < 2; i++) { - fprintf (dump_file, "force_expr_to_var_cost:\n"); - fprintf (dump_file, " integer %d\n", (int) integer_cost); - fprintf (dump_file, " symbol %d\n", (int) symbol_cost); - fprintf (dump_file, " address %d\n", (int) address_cost); - fprintf (dump_file, " other %d\n", (int) target_spill_cost); - fprintf (dump_file, "\n"); + integer_cost[i] = computation_cost (build_int_cst (integer_type_node, + 2000), i); + + symbol_cost[i] = computation_cost (addr, i) + 1; + + address_cost[i] + = computation_cost (build2 (POINTER_PLUS_EXPR, type, + addr, + build_int_cst (sizetype, 2000)), i) + 1; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size"); + fprintf (dump_file, " integer %d\n", (int) integer_cost[i]); + fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]); + fprintf (dump_file, " address %d\n", (int) address_cost[i]); + fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]); + fprintf (dump_file, "\n"); + } } costs_initialized = true; @@ -3243,7 +3258,7 @@ force_expr_to_var_cost (tree expr) if (is_gimple_min_invariant (expr)) { if (TREE_CODE (expr) == INTEGER_CST) - return new_cost (integer_cost, 0); + return new_cost (integer_cost [speed], 0); if (TREE_CODE (expr) == ADDR_EXPR) { @@ -3252,10 +3267,10 @@ force_expr_to_var_cost (tree expr) if (TREE_CODE (obj) == VAR_DECL || TREE_CODE (obj) == PARM_DECL || TREE_CODE (obj) == RESULT_DECL) - return new_cost (symbol_cost, 0); + return new_cost (symbol_cost [speed], 0); } - return new_cost (address_cost, 0); + return new_cost (address_cost [speed], 0); } switch (TREE_CODE (expr)) @@ -3272,18 +3287,18 @@ force_expr_to_var_cost (tree expr) if (is_gimple_val (op0)) cost0 = zero_cost; else - cost0 = force_expr_to_var_cost (op0); + cost0 = force_expr_to_var_cost (op0, speed); if (is_gimple_val (op1)) cost1 = zero_cost; else - cost1 = force_expr_to_var_cost (op1); + cost1 = force_expr_to_var_cost (op1, speed); break; default: /* Just an arbitrary value, FIXME. */ - return new_cost (target_spill_cost, 0); + return new_cost (target_spill_cost[speed], 0); } mode = TYPE_MODE (TREE_TYPE (expr)); @@ -3292,16 +3307,16 @@ force_expr_to_var_cost (tree expr) case POINTER_PLUS_EXPR: case PLUS_EXPR: case MINUS_EXPR: - cost = new_cost (add_cost (mode), 0); + cost = new_cost (add_cost (mode, speed), 0); break; case MULT_EXPR: if (cst_and_fits_in_hwi (op0)) - cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0); - else if (cst_and_fits_in_hwi (op1)) - cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0); + cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0); + else if (cst_and_fits_in_hwi (op1)) + cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0); else - return new_cost (target_spill_cost, 0); + return new_cost (target_spill_cost [speed], 0); break; default: @@ -3315,8 +3330,8 @@ force_expr_to_var_cost (tree expr) computations often are either loop invariant or at least can be shared between several iv uses, so letting this grow without limits would not give reasonable results. */ - if (cost.cost > target_spill_cost) - cost.cost = target_spill_cost; + if (cost.cost > target_spill_cost [speed]) + cost.cost = target_spill_cost [speed]; return cost; } @@ -3334,7 +3349,7 @@ force_var_cost (struct ivopts_data *data, walk_tree (&expr, find_depends, depends_on, NULL); } - return force_expr_to_var_cost (expr); + return force_expr_to_var_cost (expr, data->speed); } /* Estimates cost of expressing address ADDR as var + symbol + offset. The @@ -3365,7 +3380,7 @@ split_address_cost (struct ivopts_data *data, *var_present = true; fd_ivopts_data = data; walk_tree (&addr, find_depends, depends_on, NULL); - return new_cost (target_spill_cost, 0); + return new_cost (target_spill_cost[data->speed], 0); } *offset += bitpos / BITS_PER_UNIT; @@ -3395,6 +3410,7 @@ ptr_difference_cost (struct ivopts_data *data, { HOST_WIDE_INT diff = 0; comp_cost cost; + bool speed = optimize_loop_for_speed_p (data->current_loop); gcc_assert (TREE_CODE (e1) == ADDR_EXPR); @@ -3415,7 +3431,7 @@ ptr_difference_cost (struct ivopts_data *data, cost = force_var_cost (data, e1, depends_on); cost = add_costs (cost, force_var_cost (data, e2, depends_on)); - cost.cost += add_cost (Pmode); + cost.cost += add_cost (Pmode, speed); return cost; } @@ -3459,14 +3475,14 @@ difference_cost (struct ivopts_data *data, if (integer_zerop (e1)) { cost = force_var_cost (data, e2, depends_on); - cost.cost += multiply_by_cost (-1, mode); + cost.cost += multiply_by_cost (-1, mode, data->speed); return cost; } cost = force_var_cost (data, e1, depends_on); cost = add_costs (cost, force_var_cost (data, e2, depends_on)); - cost.cost += add_cost (mode); + cost.cost += add_cost (mode, data->speed); return cost; } @@ -3491,6 +3507,7 @@ get_computation_cost_at (struct ivopts_data *data, comp_cost cost; unsigned n_sums; double_int rat; + bool speed = optimize_bb_for_speed_p (gimple_bb (at)); *depends_on = NULL; @@ -3571,7 +3588,7 @@ get_computation_cost_at (struct ivopts_data *data, else { cost = force_var_cost (data, cbase, depends_on); - cost.cost += add_cost (TYPE_MODE (ctype)); + cost.cost += add_cost (TYPE_MODE (ctype), data->speed); cost = add_costs (cost, difference_cost (data, ubase, build_int_cst (utype, 0), @@ -3590,20 +3607,20 @@ get_computation_cost_at (struct ivopts_data *data, if (address_p) return add_costs (cost, get_address_cost (symbol_present, var_present, offset, ratio, - TYPE_MODE (TREE_TYPE (*use->op_p)))); + TYPE_MODE (TREE_TYPE (*use->op_p)), speed)); /* Otherwise estimate the costs for computing the expression. */ aratio = ratio > 0 ? ratio : -ratio; if (!symbol_present && !var_present && !offset) { if (ratio != 1) - cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype)); + cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed); return cost; } if (aratio != 1) - cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype)); + cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed); n_sums = 1; if (var_present @@ -3616,7 +3633,7 @@ get_computation_cost_at (struct ivopts_data *data, if (offset) cost.complexity++; - cost.cost += n_sums * add_cost (TYPE_MODE (ctype)); + cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed); return cost; fallback: @@ -3630,7 +3647,7 @@ fallback: if (address_p) comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp); - return new_cost (computation_cost (comp), 0); + return new_cost (computation_cost (comp, speed), 0); } } @@ -4008,7 +4025,7 @@ determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) base = cand->iv->base; cost_base = force_var_cost (data, base, NULL); - cost_step = add_cost (TYPE_MODE (TREE_TYPE (base))); + cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed); cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop); @@ -4062,7 +4079,7 @@ ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size) { /* We add size to the cost, so that we prefer eliminating ivs if possible. */ - return size + estimate_reg_pressure_cost (size, data->regs_used); + return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed); } /* For each size of the induction variable set determine the penalty. */ @@ -4101,8 +4118,8 @@ determine_set_costs (struct ivopts_data *data) { fprintf (dump_file, "Global costs:\n"); fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); - fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost); - fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost); + fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); + fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); } n = 0; @@ -5255,7 +5272,7 @@ rewrite_use_address (struct ivopts_data *data, gcc_assert (ok); unshare_aff_combination (&aff); - ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff); + ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed); copy_ref_info (ref, *use->op_p); *use->op_p = ref; } @@ -5469,6 +5486,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) gcc_assert (!data->niters); data->current_loop = loop; + data->speed = optimize_loop_for_speed_p (loop); if (dump_file && (dump_flags & TDF_DETAILS)) {