diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 58a30ed75f0..31ae6a1416a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2007-07-16 Sandra Loosemore + Nigel Stephens + + * config/mips/mips.md: Include 20kc.md. + * config/mips/20kc.md: New file. + * config/mips/mips.c (mips_rtx_cost_data): Fill in 20Kc costs. + (mips_adjust_cost): Tweak for 20Kc. + (mips_issue_rate): Likewise. + * config/mips/mips.h (TUNE_20KC): Define. + 2007-07-16 David Edelsohn * config/rs6000/rs6000.c (struct processor_cost): Add diff --git a/gcc/config/mips/20kc.md b/gcc/config/mips/20kc.md new file mode 100644 index 00000000000..32442393632 --- /dev/null +++ b/gcc/config/mips/20kc.md @@ -0,0 +1,266 @@ +;; ......................... +;; +;; DFA-based pipeline description for MIPS64 model R20Kc. +;; Contributed by Jason Eckhardt (jle@cygnus.com). +;; +;; The R20Kc is a dual-issue processor that can generally bundle +;; instructions as follows: +;; 1. integer with integer +;; 2. integer with fp +;; 3. fp with fpload/fpstore +;; +;; Of course, there are various restrictions. +;; Reference: +;; "Ruby (R20K) Technical Specification Rev. 1.2, December 28, 1999." +;; +;; ......................... + +;; Use three automata to isolate long latency operations, reducing space. +(define_automaton "r20kc_other, r20kc_fdiv, r20kc_idiv") + +;; +;; Describe the resources. +;; + +;; Global. +(define_cpu_unit "r20kc_iss0, r20kc_iss1" "r20kc_other") + +;; Integer execution unit (pipeline A). +(define_cpu_unit "r20kc_ixua_addsub_agen" "r20kc_other") +(define_cpu_unit "r20kc_ixua_shift" "r20kc_other") + +(exclusion_set "r20kc_ixua_addsub_agen" "r20kc_ixua_shift") + +;; Integer execution unit (pipeline B). +(define_cpu_unit "r20kc_ixub_addsub" "r20kc_other") +(define_cpu_unit "r20kc_ixub_branch" "r20kc_other") +(define_cpu_unit "r20kc_ixub_mpydiv" "r20kc_other") +(define_cpu_unit "r20kc_ixub_mpydiv_iter" "r20kc_idiv") + +(exclusion_set "r20kc_ixub_addsub" "r20kc_ixub_branch, r20kc_ixub_mpydiv") +(exclusion_set "r20kc_ixub_branch" "r20kc_ixub_mpydiv") + +;; Cache / memory interface. +(define_cpu_unit "r20kc_cache" "r20kc_other") + +;; Floating-point unit. +(define_cpu_unit "r20kc_fpu_add" "r20kc_other") +(define_cpu_unit "r20kc_fpu_mpy" "r20kc_other") +(define_cpu_unit "r20kc_fpu_mpy_iter" "r20kc_fdiv") +(define_cpu_unit "r20kc_fpu_divsqrt" "r20kc_other") +(define_cpu_unit "r20kc_fpu_divsqrt_iter" "r20kc_fdiv") + +(exclusion_set "r20kc_fpu_add" "r20kc_fpu_mpy, r20kc_fpu_divsqrt") +(exclusion_set "r20kc_fpu_mpy" "r20kc_fpu_divsqrt") + +;; After branch any insn can not be issued. +(absence_set "r20kc_iss0,r20kc_iss1" "r20kc_ixub_branch") + +;; +;; Define reservations for unit name mnemonics or combinations. +;; + +(define_reservation "r20kc_iss" + "r20kc_iss0|r20kc_iss1") +(define_reservation "r20kc_single_dispatch" + "r20kc_iss0+r20kc_iss1") +(define_reservation "r20kc_iaddsub" + "r20kc_iss+(r20kc_ixua_addsub_agen|r20kc_ixub_addsub)") +(define_reservation "r20kc_ishift" + "r20kc_iss+r20kc_ixua_shift") +(define_reservation "r20kc_fpmove" + "r20kc_iss+r20kc_ixua_addsub_agen") +(define_reservation "r20kc_imem" + "r20kc_iss+r20kc_ixua_addsub_agen+r20kc_cache") +(define_reservation "r20kc_icache" + "r20kc_cache") +(define_reservation "r20kc_impydiv" + "r20kc_iss+r20kc_ixub_mpydiv") +(define_reservation "r20kc_impydiv_iter" + "r20kc_ixub_mpydiv_iter") +(define_reservation "r20kc_ibranch" + "r20kc_iss+r20kc_ixub_branch") + +(define_reservation "r20kc_fpadd" + "r20kc_iss+r20kc_fpu_add") +(define_reservation "r20kc_fpmpy" + "r20kc_iss+r20kc_fpu_mpy") +(define_reservation "r20kc_fpmpy_iter" + "r20kc_fpu_mpy_iter") +(define_reservation "r20kc_fpdivsqrt" + "r20kc_iss+r20kc_fpu_divsqrt") +(define_reservation "r20kc_fpdivsqrt_iter" + "r20kc_fpu_divsqrt_iter") + +;; +;; Describe instruction reservations for integer operations. +;; + +;; Conditional moves always force single-dispatch. +(define_insn_reservation "r20kc_cond_move_int" 1 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "condmove") + (eq_attr "mode" "!SF,DF"))) + "r20kc_single_dispatch") + +(define_insn_reservation "r20kc_cond_move_fp" 4 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "condmove") + (eq_attr "mode" "SF,DF"))) + "r20kc_single_dispatch") + +(define_insn_reservation "r20kc_int_other" 1 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "move,arith,const,nop")) + "r20kc_iaddsub") + +;; Shifts can only execute on ixu pipeline A. +(define_insn_reservation "r20kc_int_shift" 1 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "shift")) + "r20kc_ishift") + +(define_insn_reservation "r20kc_ld" 2 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "load,prefetch,prefetchx")) + "r20kc_imem") + + +;; A load immediately following a store will stall, so +;; say that a store uses the cache for an extra cycle. +(define_insn_reservation "r20kc_st" 2 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "store")) + "r20kc_imem,r20kc_icache") + +(define_insn_reservation "r20kc_fld" 3 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "fpload")) + "r20kc_imem") + +(define_insn_reservation "r20kc_ffst" 3 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "fpstore")) + "r20kc_imem,r20kc_icache*2") + +;; Integer divide latency is between 13 and 42 cycles for DIV[U] and between +;; 13 and 72 cycles for DDIV[U]. This depends on the value of the inputs +;; so we just choose the worst case latency. +(define_insn_reservation "r20kc_idiv_si" 42 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "SI"))) + "r20kc_impydiv+(r20kc_impydiv_iter*42)") + +(define_insn_reservation "r20kc_idiv_di" 72 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "DI"))) + "r20kc_impydiv+(r20kc_impydiv_iter*72)") + +;; Integer multiply latency is 4 or 7 cycles for word and double-word +;; respectively. +(define_insn_reservation "r20kc_impy_si" 4 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "imadd,imul,imul3") + (eq_attr "mode" "SI"))) + "r20kc_impydiv+(r20kc_impydiv_iter*2)") + +(define_insn_reservation "r20kc_impy_di" 7 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "imadd,imul,imul3") + (eq_attr "mode" "DI"))) + "r20kc_impydiv+(r20kc_impydiv_iter*7)") + +;; Move to/from HI/LO. +;; Moving to HI/LO has a 3 cycle latency while moving from only has a 1 +;; cycle latency. Repeat rate is 3 for both. +(define_insn_reservation "r20kc_imthilo" 3 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "mthilo")) + "r20kc_impydiv+(r20kc_impydiv_iter*3)") + +(define_insn_reservation "r20kc_imfhilo" 1 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "mfhilo")) + "r20kc_impydiv+(r20kc_impydiv_iter*3)") + +;; Move to fp coprocessor. +(define_insn_reservation "r20kc_ixfer_mt" 3 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "mtc")) + "r20kc_fpmove") + +;; Move from fp coprocessor. +(define_insn_reservation "r20kc_ixfer_mf" 2 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "mfc")) + "r20kc_fpmove") + +;; Assume branch predicted correctly. +(define_insn_reservation "r20kc_ibr" 1 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "branch,jump,call")) + "r20kc_ibranch") + +;; +;; Describe instruction reservations for the floating-point operations. +;; +(define_insn_reservation "r20kc_fp_other" 4 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "fmove,fadd,fabs,fneg,fcmp")) + "r20kc_fpadd") + +(define_insn_reservation "r20kc_fp_cvt_a" 4 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "fcvt") + (eq_attr "cnv_mode" "I2S,I2D,S2D"))) + "r20kc_fpadd") + +(define_insn_reservation "r20kc_fp_cvt_b" 5 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "fcvt") + (eq_attr "cnv_mode" "D2S,S2I"))) + "r20kc_fpadd") + +(define_insn_reservation "r20kc_fp_divsqrt_df" 32 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "fdiv,fsqrt") + (eq_attr "mode" "DF"))) + "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*32)") + +(define_insn_reservation "r20kc_fp_divsqrt_sf" 17 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "fdiv,fsqrt") + (eq_attr "mode" "SF"))) + "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*17)") + +(define_insn_reservation "r20kc_fp_rsqrt_df" 35 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "frsqrt") + (eq_attr "mode" "DF"))) + "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*35)") + +(define_insn_reservation "r20kc_fp_rsqrt_sf" 17 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "frsqrt") + (eq_attr "mode" "SF"))) + "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*17)") + +(define_insn_reservation "r20kc_fp_mpy_sf" 4 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "fmul,fmadd") + (eq_attr "mode" "SF"))) + "r20kc_fpmpy+r20kc_fpmpy_iter") + +(define_insn_reservation "r20kc_fp_mpy_df" 5 + (and (eq_attr "cpu" "20kc") + (and (eq_attr "type" "fmul,fmadd") + (eq_attr "mode" "DF"))) + "r20kc_fpmpy+(r20kc_fpmpy_iter*2)") + +;; Force single-dispatch for unknown or multi. +(define_insn_reservation "r20kc_unknown" 1 + (and (eq_attr "cpu" "20kc") + (eq_attr "type" "unknown,multi")) + "r20kc_single_dispatch") diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index d94dfa9f287..b47f59fed2a 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -911,7 +911,17 @@ static struct mips_rtx_cost_data const mips_rtx_cost_data[PROCESSOR_MAX] = 4 /* memory_latency */ }, { /* 20KC */ - DEFAULT_COSTS + COSTS_N_INSNS (4), /* fp_add */ + COSTS_N_INSNS (4), /* fp_mult_sf */ + COSTS_N_INSNS (5), /* fp_mult_df */ + COSTS_N_INSNS (17), /* fp_div_sf */ + COSTS_N_INSNS (32), /* fp_div_df */ + COSTS_N_INSNS (4), /* int_mult_si */ + COSTS_N_INSNS (7), /* int_mult_di */ + COSTS_N_INSNS (42), /* int_div_si */ + COSTS_N_INSNS (72), /* int_div_di */ + 1, /* branch_cost */ + 4 /* memory_latency */ }, { /* 24KC */ SOFT_FP_COSTS, @@ -10866,12 +10876,16 @@ mips_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, } /* Implement TARGET_SCHED_ADJUST_COST. We assume that anti and output - dependencies have no cost. */ + dependencies have no cost, except on the 20Kc where output-dependence + is treated like input-dependence. */ static int mips_adjust_cost (rtx insn ATTRIBUTE_UNUSED, rtx link, rtx dep ATTRIBUTE_UNUSED, int cost) { + if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT + && TUNE_20KC) + return cost; if (REG_NOTE_KIND (link) != 0) return 0; return cost; @@ -10894,6 +10908,7 @@ mips_issue_rate (void) floating point load/stores also require a slot in the AGEN pipe. */ return 4; + case PROCESSOR_20KC: case PROCESSOR_R4130: case PROCESSOR_R5400: case PROCESSOR_R5500: diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 6ad6cf74be6..05392218527 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -254,6 +254,7 @@ extern const struct mips_rtx_cost_data *mips_cost; || mips_tune == PROCESSOR_74KF2_1 \ || mips_tune == PROCESSOR_74KF1_1 \ || mips_tune == PROCESSOR_74KF3_2) +#define TUNE_20KC (mips_tune == PROCESSOR_20KC) /* True if the pre-reload scheduler should try to create chains of multiply-add or multiply-subtract instructions. For example, diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index df4f80d3ff3..5d679083a31 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -640,6 +640,7 @@ (include "4k.md") (include "5k.md") +(include "20kc.md") (include "24k.md") (include "74k.md") (include "3000.md")