diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fa92fdbf065..196c1aef134 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,44 @@ +2008-06-15 Maxim Kuvyrkov + + * config/mips/loongson2ef.md: New file. + * config/mips/mips.md (UNSPEC_LOONGSON_ALU1_TURN_ENABLED_INSN) + (UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN) + (UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN) + (UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN): New constants. + (define_attr "cpu"): Rename loongson2e and loongson2f to loongson_2e + and loongson_2f. + (loongson2ef.md): New include. + * config/mips/loongson.md (vec_pack_ssat_, vec_pack_usat_) + (add3, paddd, ssadd3, usadd3) + (loongson_and_not_, loongson_average_, loongson_eq_) + (loongson_gt_, loongson_extract_halfword) + (loongson_insert_halfword_0, loongson_insert_halfword_2) + (loongson_insert_halfword_3, loongson_mult_add, smax3) + (umax3, smin3, umin3, loongson_move_byte_mask) + (umul3_highpart, smul3_highpart, loongson_smul_lowpart) + (loongson_umul_word, loongson_pasubub, reduc_uplus_) + (loongson_psadbh, loongson_pshufh, loongson_psll) + (loongson_psra, loongson_psrl, sub3, psubd) + (sssub3, ussub3, vec_interleave_high) + (vec_interleave_low): Define type attribute. + * config/mips/mips.c (mips_ls2): New static variable. + (mips_issue_rate): Update to handle tuning for Loongson 2E/2F. + (mips_ls2_init_dfa_post_cycle_insn, mips_init_dfa_post_cycle_insn) + (sched_ls2_dfa_post_advance_cycle, mips_dfa_post_advance_cycle): + Implement target scheduling hooks. + (mips_multipass_dfa_lookahead): Update to handle tuning for + Loongson 2E/2F. + (mips_sched_init): Initialize data for Loongson scheduling. + (mips_ls2_variable_issue): New static function. + (mips_variable_issue): Update to handle tuning for Loongson 2E/2F. + Add sanity check. + (TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN) + (TARGET_SCHED_DFA_POST_ADVANCE_CYCLE): Override target hooks. + * config/mips/mips.h (TUNE_LOONGSON_2EF): New macros. + (ISA_HAS_XFER_DELAY, ISA_HAS_FCMP_DELAY, ISA_HAS_HILO_INTERLOCKS): + Handle ST Loongson 2E/2F cores. + (CPU_UNITS_QUERY): Define macro to enable querying of DFA units. + 2008-06-15 Ralf Wildenhues * omp-low.c (extract_omp_for_data): Fix comment typo. diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md index 5177ae477dc..73d183ac95c 100644 --- a/gcc/config/mips/loongson.md +++ b/gcc/config/mips/loongson.md @@ -108,7 +108,8 @@ (ss_truncate: (match_operand:VWH 2 "register_operand" "f"))))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "packss\t%0,%1,%2") + "packss\t%0,%1,%2" + [(set_attr "type" "fmul")]) ;; Pack with unsigned saturation. (define_insn "vec_pack_usat_" @@ -119,7 +120,8 @@ (us_truncate: (match_operand:VH 2 "register_operand" "f"))))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "packus\t%0,%1,%2") + "packus\t%0,%1,%2" + [(set_attr "type" "fmul")]) ;; Addition, treating overflow by wraparound. (define_insn "add3" @@ -127,7 +129,8 @@ (plus:VWHB (match_operand:VWHB 1 "register_operand" "f") (match_operand:VWHB 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "padd\t%0,%1,%2") + "padd\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Addition of doubleword integers stored in FP registers. ;; Overflow is treated by wraparound. @@ -141,7 +144,8 @@ (match_operand:DI 2 "register_operand" "f")] UNSPEC_LOONGSON_PADDD))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "paddd\t%0,%1,%2") + "paddd\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Addition, treating overflow by signed saturation. (define_insn "ssadd3" @@ -149,7 +153,8 @@ (ss_plus:VHB (match_operand:VHB 1 "register_operand" "f") (match_operand:VHB 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "padds\t%0,%1,%2") + "padds\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Addition, treating overflow by unsigned saturation. (define_insn "usadd3" @@ -157,7 +162,8 @@ (us_plus:VHB (match_operand:VHB 1 "register_operand" "f") (match_operand:VHB 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "paddus\t%0,%1,%2") + "paddus\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Logical AND NOT. (define_insn "loongson_pandn_" @@ -166,7 +172,8 @@ (not:VWHBDI (match_operand:VWHBDI 1 "register_operand" "f")) (match_operand:VWHBDI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pandn\t%0,%1,%2") + "pandn\t%0,%1,%2" + [(set_attr "type" "fmul")]) ;; Average. (define_insn "loongson_pavg" @@ -175,7 +182,8 @@ (match_operand:VHB 2 "register_operand" "f")] UNSPEC_LOONGSON_PAVG))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pavg\t%0,%1,%2") + "pavg\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Equality test. (define_insn "loongson_pcmpeq" @@ -184,7 +192,8 @@ (match_operand:VWHB 2 "register_operand" "f")] UNSPEC_LOONGSON_PCMPEQ))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pcmpeq\t%0,%1,%2") + "pcmpeq\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Greater-than test. (define_insn "loongson_pcmpgt" @@ -193,7 +202,8 @@ (match_operand:VWHB 2 "register_operand" "f")] UNSPEC_LOONGSON_PCMPGT))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pcmpgt\t%0,%1,%2") + "pcmpgt\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Extract halfword. (define_insn "loongson_pextr" @@ -202,7 +212,8 @@ (match_operand:SI 2 "register_operand" "f")] UNSPEC_LOONGSON_PEXTR))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pextr\t%0,%1,%2") + "pextr\t%0,%1,%2" + [(set_attr "type" "fmul")]) ;; Insert halfword. (define_insn "loongson_pinsr_0" @@ -211,7 +222,8 @@ (match_operand:VH 2 "register_operand" "f")] UNSPEC_LOONGSON_PINSR_0))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr_0\t%0,%1,%2") + "pinsr_0\t%0,%1,%2" + [(set_attr "type" "fdiv")]) (define_insn "loongson_pinsr_1" [(set (match_operand:VH 0 "register_operand" "=f") @@ -219,7 +231,8 @@ (match_operand:VH 2 "register_operand" "f")] UNSPEC_LOONGSON_PINSR_1))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr_1\t%0,%1,%2") + "pinsr_1\t%0,%1,%2" + [(set_attr "type" "fdiv")]) (define_insn "loongson_pinsr_2" [(set (match_operand:VH 0 "register_operand" "=f") @@ -227,7 +240,8 @@ (match_operand:VH 2 "register_operand" "f")] UNSPEC_LOONGSON_PINSR_2))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr_2\t%0,%1,%2") + "pinsr_2\t%0,%1,%2" + [(set_attr "type" "fdiv")]) (define_insn "loongson_pinsr_3" [(set (match_operand:VH 0 "register_operand" "=f") @@ -235,7 +249,8 @@ (match_operand:VH 2 "register_operand" "f")] UNSPEC_LOONGSON_PINSR_3))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr_3\t%0,%1,%2") + "pinsr_3\t%0,%1,%2" + [(set_attr "type" "fdiv")]) ;; Multiply and add packed integers. (define_insn "loongson_pmadd" @@ -244,7 +259,8 @@ (match_operand:VH 2 "register_operand" "f")] UNSPEC_LOONGSON_PMADD))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmadd\t%0,%1,%2") + "pmadd\t%0,%1,%2" + [(set_attr "type" "fmul")]) ;; Maximum of signed halfwords. (define_insn "smax3" @@ -252,7 +268,8 @@ (smax:VH (match_operand:VH 1 "register_operand" "f") (match_operand:VH 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmaxs\t%0,%1,%2") + "pmaxs\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Maximum of unsigned bytes. (define_insn "umax3" @@ -260,7 +277,8 @@ (umax:VB (match_operand:VB 1 "register_operand" "f") (match_operand:VB 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmaxu\t%0,%1,%2") + "pmaxu\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Minimum of signed halfwords. (define_insn "smin3" @@ -268,7 +286,8 @@ (smin:VH (match_operand:VH 1 "register_operand" "f") (match_operand:VH 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmins\t%0,%1,%2") + "pmins\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Minimum of unsigned bytes. (define_insn "umin3" @@ -276,7 +295,8 @@ (umin:VB (match_operand:VB 1 "register_operand" "f") (match_operand:VB 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pminu\t%0,%1,%2") + "pminu\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Move byte mask. (define_insn "loongson_pmovmsk" @@ -284,7 +304,8 @@ (unspec:VB [(match_operand:VB 1 "register_operand" "f")] UNSPEC_LOONGSON_PMOVMSK))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmovmsk\t%0,%1") + "pmovmsk\t%0,%1" + [(set_attr "type" "fabs")]) ;; Multiply unsigned integers and store high result. (define_insn "umul3_highpart" @@ -293,7 +314,8 @@ (match_operand:VH 2 "register_operand" "f")] UNSPEC_LOONGSON_PMULHU))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmulhu\t%0,%1,%2") + "pmulhu\t%0,%1,%2" + [(set_attr "type" "fmul")]) ;; Multiply signed integers and store high result. (define_insn "smul3_highpart" @@ -302,7 +324,8 @@ (match_operand:VH 2 "register_operand" "f")] UNSPEC_LOONGSON_PMULH))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmulh\t%0,%1,%2") + "pmulh\t%0,%1,%2" + [(set_attr "type" "fmul")]) ;; Multiply signed integers and store low result. (define_insn "loongson_pmull" @@ -311,7 +334,8 @@ (match_operand:VH 2 "register_operand" "f")] UNSPEC_LOONGSON_PMULL))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmull\t%0,%1,%2") + "pmull\t%0,%1,%2" + [(set_attr "type" "fmul")]) ;; Multiply unsigned word integers. (define_insn "loongson_pmulu" @@ -320,7 +344,8 @@ (match_operand:VW 2 "register_operand" "f")] UNSPEC_LOONGSON_PMULU))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmulu\t%0,%1,%2") + "pmulu\t%0,%1,%2" + [(set_attr "type" "fmul")]) ;; Absolute difference. (define_insn "loongson_pasubub" @@ -329,7 +354,8 @@ (match_operand:VB 2 "register_operand" "f")] UNSPEC_LOONGSON_PASUBUB))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pasubub\t%0,%1,%2") + "pasubub\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Sum of unsigned byte integers. (define_insn "reduc_uplus_" @@ -337,7 +363,8 @@ (unspec: [(match_operand:VB 1 "register_operand" "f")] UNSPEC_LOONGSON_BIADD))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "biadd\t%0,%1") + "biadd\t%0,%1" + [(set_attr "type" "fabs")]) ;; Sum of absolute differences. (define_insn "loongson_psadbh" @@ -346,7 +373,8 @@ (match_operand:VB 2 "register_operand" "f")] UNSPEC_LOONGSON_PSADBH))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pasubub\t%0,%1,%2;biadd\t%0,%0") + "pasubub\t%0,%1,%2;biadd\t%0,%0" + [(set_attr "type" "fadd")]) ;; Shuffle halfwords. (define_insn "loongson_pshufh" @@ -356,7 +384,8 @@ (match_operand:SI 3 "register_operand" "f")] UNSPEC_LOONGSON_PSHUFH))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pshufh\t%0,%2,%3") + "pshufh\t%0,%2,%3" + [(set_attr "type" "fmul")]) ;; Shift left logical. (define_insn "loongson_psll" @@ -364,7 +393,8 @@ (ashift:VWH (match_operand:VWH 1 "register_operand" "f") (match_operand:SI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "psll\t%0,%1,%2") + "psll\t%0,%1,%2" + [(set_attr "type" "fmul")]) ;; Shift right arithmetic. (define_insn "loongson_psra" @@ -372,7 +402,8 @@ (ashiftrt:VWH (match_operand:VWH 1 "register_operand" "f") (match_operand:SI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "psra\t%0,%1,%2") + "psra\t%0,%1,%2" + [(set_attr "type" "fdiv")]) ;; Shift right logical. (define_insn "loongson_psrl" @@ -380,7 +411,8 @@ (lshiftrt:VWH (match_operand:VWH 1 "register_operand" "f") (match_operand:SI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "psrl\t%0,%1,%2") + "psrl\t%0,%1,%2" + [(set_attr "type" "fdiv")]) ;; Subtraction, treating overflow by wraparound. (define_insn "sub3" @@ -388,7 +420,8 @@ (minus:VWHB (match_operand:VWHB 1 "register_operand" "f") (match_operand:VWHB 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "psub\t%0,%1,%2") + "psub\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Subtraction of doubleword integers stored in FP registers. ;; Overflow is treated by wraparound. @@ -400,7 +433,8 @@ (match_operand:DI 2 "register_operand" "f")] UNSPEC_LOONGSON_PSUBD))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "psubd\t%0,%1,%2") + "psubd\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Subtraction, treating overflow by signed saturation. (define_insn "sssub3" @@ -408,7 +442,8 @@ (ss_minus:VHB (match_operand:VHB 1 "register_operand" "f") (match_operand:VHB 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "psubs\t%0,%1,%2") + "psubs\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Subtraction, treating overflow by unsigned saturation. (define_insn "ussub3" @@ -416,7 +451,8 @@ (us_minus:VHB (match_operand:VHB 1 "register_operand" "f") (match_operand:VHB 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "psubus\t%0,%1,%2") + "psubus\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Unpack high data. (define_insn "vec_interleave_high" @@ -425,7 +461,8 @@ (match_operand:VWHB 2 "register_operand" "f")] UNSPEC_LOONGSON_PUNPCKH))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "punpckh\t%0,%1,%2") + "punpckh\t%0,%1,%2" + [(set_attr "type" "fdiv")]) ;; Unpack low data. (define_insn "vec_interleave_low" @@ -434,4 +471,5 @@ (match_operand:VWHB 2 "register_operand" "f")] UNSPEC_LOONGSON_PUNPCKL))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "punpckl\t%0,%1,%2") + "punpckl\t%0,%1,%2" + [(set_attr "type" "fdiv")]) diff --git a/gcc/config/mips/loongson2ef.md b/gcc/config/mips/loongson2ef.md new file mode 100644 index 00000000000..8d294822102 --- /dev/null +++ b/gcc/config/mips/loongson2ef.md @@ -0,0 +1,240 @@ +;; Pipeline model for ST Microelectronics Loongson-2E/2F cores. + +;; Copyright (C) 2008 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Automaton for integer instructions. +(define_automaton "ls2_alu") + +;; ALU1 and ALU2. +;; We need to query these units to adjust round-robin counter. +(define_query_cpu_unit "ls2_alu1_core,ls2_alu2_core" "ls2_alu") + +;; Pseudo units to help modeling of ALU1/2 round-robin dispatch strategy. +(define_cpu_unit "ls2_alu1_turn,ls2_alu2_turn" "ls2_alu") + +;; Pseudo units to enable/disable ls2_alu[12]_turn units. +;; ls2_alu[12]_turn unit can be subscribed only after ls2_alu[12]_turn_enabled +;; unit is subscribed. +(define_cpu_unit "ls2_alu1_turn_enabled,ls2_alu2_turn_enabled" "ls2_alu") +(presence_set "ls2_alu1_turn" "ls2_alu1_turn_enabled") +(presence_set "ls2_alu2_turn" "ls2_alu2_turn_enabled") + +;; Reservations for ALU1 (ALU2) instructions. +;; Instruction goes to ALU1 (ALU2) and makes next ALU1/2 instruction to +;; be dispatched to ALU2 (ALU1). +(define_reservation "ls2_alu1" + "(ls2_alu1_core+ls2_alu2_turn_enabled)|ls2_alu1_core") +(define_reservation "ls2_alu2" + "(ls2_alu2_core+ls2_alu1_turn_enabled)|ls2_alu2_core") + +;; Reservation for ALU1/2 instructions. +;; Instruction will go to ALU1 iff ls2_alu1_turn_enabled is subscribed and +;; switch the turn to ALU2 by subscribing ls2_alu2_turn_enabled. +;; Or to ALU2 otherwise. +(define_reservation "ls2_alu" + "(ls2_alu1_core+ls2_alu1_turn+ls2_alu2_turn_enabled) + |(ls2_alu1_core+ls2_alu1_turn) + |(ls2_alu2_core+ls2_alu2_turn+ls2_alu1_turn_enabled) + |(ls2_alu2_core+ls2_alu2_turn)") + +;; Automaton for floating-point instructions. +(define_automaton "ls2_falu") + +;; FALU1 and FALU2. +;; We need to query these units to adjust round-robin counter. +(define_query_cpu_unit "ls2_falu1_core,ls2_falu2_core" "ls2_falu") + +;; Pseudo units to help modeling of FALU1/2 round-robin dispatch strategy. +(define_cpu_unit "ls2_falu1_turn,ls2_falu2_turn" "ls2_falu") + +;; Pseudo units to enable/disable ls2_falu[12]_turn units. +;; ls2_falu[12]_turn unit can be subscribed only after +;; ls2_falu[12]_turn_enabled unit is subscribed. +(define_cpu_unit "ls2_falu1_turn_enabled,ls2_falu2_turn_enabled" "ls2_falu") +(presence_set "ls2_falu1_turn" "ls2_falu1_turn_enabled") +(presence_set "ls2_falu2_turn" "ls2_falu2_turn_enabled") + +;; Reservations for FALU1 (FALU2) instructions. +;; Instruction goes to FALU1 (FALU2) and makes next FALU1/2 instruction to +;; be dispatched to FALU2 (FALU1). +(define_reservation "ls2_falu1" + "(ls2_falu1_core+ls2_falu2_turn_enabled)|ls2_falu1_core") +(define_reservation "ls2_falu2" + "(ls2_falu2_core+ls2_falu1_turn_enabled)|ls2_falu2_core") + +;; Reservation for FALU1/2 instructions. +;; Instruction will go to FALU1 iff ls2_falu1_turn_enabled is subscribed and +;; switch the turn to FALU2 by subscribing ls2_falu2_turn_enabled. +;; Or to FALU2 otherwise. +(define_reservation "ls2_falu" + "(ls2_falu1+ls2_falu1_turn+ls2_falu2_turn_enabled) + |(ls2_falu1+ls2_falu1_turn) + |(ls2_falu2+ls2_falu2_turn+ls2_falu1_turn_enabled) + |(ls2_falu2+ls2_falu2_turn)") + +;; The following 4 instructions each subscribe one of +;; ls2_[f]alu{1,2}_turn_enabled units according to this attribute. +;; These instructions are used in mips.c: sched_ls2_dfa_post_advance_cycle. + +(define_attr "ls2_turn_type" "alu1,alu2,falu1,falu2,unknown" + (const_string "unknown")) + +;; Subscribe ls2_alu1_turn_enabled. +(define_insn "ls2_alu1_turn_enabled_insn" + [(unspec [(const_int 0)] UNSPEC_LOONGSON_ALU1_TURN_ENABLED_INSN)] + "TUNE_LOONGSON_2EF" + { gcc_unreachable (); } + [(set_attr "ls2_turn_type" "alu1")]) + +(define_insn_reservation "ls2_alu1_turn_enabled" 0 + (eq_attr "ls2_turn_type" "alu1") + "ls2_alu1_turn_enabled") + +;; Subscribe ls2_alu2_turn_enabled. +(define_insn "ls2_alu2_turn_enabled_insn" + [(unspec [(const_int 0)] UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN)] + "TUNE_LOONGSON_2EF" + { gcc_unreachable (); } + [(set_attr "ls2_turn_type" "alu2")]) + +(define_insn_reservation "ls2_alu2_turn_enabled" 0 + (eq_attr "ls2_turn_type" "alu2") + "ls2_alu2_turn_enabled") + +;; Subscribe ls2_falu1_turn_enabled. +(define_insn "ls2_falu1_turn_enabled_insn" + [(unspec [(const_int 0)] UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN)] + "TUNE_LOONGSON_2EF" + { gcc_unreachable (); } + [(set_attr "ls2_turn_type" "falu1")]) + +(define_insn_reservation "ls2_falu1_turn_enabled" 0 + (eq_attr "ls2_turn_type" "falu1") + "ls2_falu1_turn_enabled") + +;; Subscribe ls2_falu2_turn_enabled. +(define_insn "ls2_falu2_turn_enabled_insn" + [(unspec [(const_int 0)] UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN)] + "TUNE_LOONGSON_2EF" + { gcc_unreachable (); } + [(set_attr "ls2_turn_type" "falu2")]) + +(define_insn_reservation "ls2_falu2_turn_enabled" 0 + (eq_attr "ls2_turn_type" "falu2") + "ls2_falu2_turn_enabled") + +;; Automaton for memory operations. +(define_automaton "ls2_mem") + +;; Memory unit. +(define_query_cpu_unit "ls2_mem" "ls2_mem") + +;; Reservation for integer instructions. +(define_insn_reservation "ls2_alu" 2 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "arith,condmove,const,logical,mfhilo,move, + mthilo,nop,shift,signext,slt")) + "ls2_alu") + +;; Reservation for branch instructions. +(define_insn_reservation "ls2_branch" 2 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "branch,jump,call,trap")) + "ls2_alu1") + +;; Reservation for integer multiplication instructions. +(define_insn_reservation "ls2_imult" 5 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "imul,imul3")) + "ls2_alu2,ls2_alu2_core") + +;; Reservation for integer division / remainder instructions. +;; These instructions use the SRT algorithm and hence take 2-38 cycles. +(define_insn_reservation "ls2_idiv" 20 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "idiv")) + "ls2_alu2,ls2_alu2_core*18") + +;; Reservation for memory load instructions. +(define_insn_reservation "ls2_load" 5 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "load,fpload,mfc,mtc")) + "ls2_mem") + +;; Reservation for memory store instructions. +;; With stores we assume they don't alias with dependent loads. +;; Therefore we set the latency to zero. +(define_insn_reservation "ls2_store" 0 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "store,fpstore")) + "ls2_mem") + +;; Reservation for floating-point instructions of latency 3. +(define_insn_reservation "ls2_fp3" 3 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "fabs,fneg,fcmp,fmove")) + "ls2_falu1") + +;; Reservation for floating-point instructions of latency 5. +(define_insn_reservation "ls2_fp5" 5 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "fcvt")) + "ls2_falu1") + +;; Reservation for floating-point instructions that can go +;; to either of FALU1/2 units. +(define_insn_reservation "ls2_falu" 7 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "fadd,fmul,fmadd")) + "ls2_falu") + +;; Reservation for floating-point division / remainder instructions. +;; These instructions use the SRT algorithm and hence take a variable amount +;; of cycles: +;; div.s takes 5-11 cycles +;; div.d takes 5-18 cycles +(define_insn_reservation "ls2_fdiv" 9 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "fdiv")) + "ls2_falu2,ls2_falu2_core*7") + +;; Reservation for floating-point sqrt instructions. +;; These instructions use the SRT algorithm and hence take a variable amount +;; of cycles: +;; sqrt.s takes 5-17 cycles +;; sqrt.d takes 5-32 cycles +(define_insn_reservation "ls2_fsqrt" 15 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "fsqrt")) + "ls2_falu2,ls2_falu2_core*13") + +;; Two consecutive ALU instructions. +(define_insn_reservation "ls2_multi" 4 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "multi")) + "(ls2_alu1,ls2_alu2_core)|(ls2_alu2,ls2_alu1_core)") + +;; Reservation for everything else. Normally, this reservation +;; will only be used to handle cases like compiling for non-loongson +;; CPUs with -mtune=loongson2?. +;; +;; This reservation depends upon the fact that DFA will check +;; reservations in the same order as they appear in the file. +(define_insn_reservation "ls2_unknown" 1 + (eq_attr "cpu" "loongson_2e,loongson_2f") + "ls2_alu1_core+ls2_alu2_core+ls2_falu1_core+ls2_falu2_core+ls2_mem") diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index cd94ac15e5a..3ef0b331373 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -9778,6 +9778,41 @@ mips_store_data_bypass_p (rtx out_insn, rtx in_insn) return !store_data_bypass_p (out_insn, in_insn); } + +/* Variables and flags used in scheduler hooks when tuning for + Loongson 2E/2F. */ +static struct +{ + /* Variables to support Loongson 2E/2F round-robin [F]ALU1/2 dispatch + strategy. */ + + /* If true, then next ALU1/2 instruction will go to ALU1. */ + bool alu1_turn_p; + + /* If true, then next FALU1/2 unstruction will go to FALU1. */ + bool falu1_turn_p; + + /* Codes to query if [f]alu{1,2}_core units are subscribed or not. */ + int alu1_core_unit_code; + int alu2_core_unit_code; + int falu1_core_unit_code; + int falu2_core_unit_code; + + /* True if current cycle has a multi instruction. + This flag is used in mips_ls2_dfa_post_advance_cycle. */ + bool cycle_has_multi_p; + + /* Instructions to subscribe ls2_[f]alu{1,2}_turn_enabled units. + These are used in mips_ls2_dfa_post_advance_cycle to initialize + DFA state. + E.g., when alu1_turn_enabled_insn is issued it makes next ALU1/2 + instruction to go ALU1. */ + rtx alu1_turn_enabled_insn; + rtx alu2_turn_enabled_insn; + rtx falu1_turn_enabled_insn; + rtx falu2_turn_enabled_insn; +} mips_ls2; + /* Implement TARGET_SCHED_ADJUST_COST. We assume that anti and output dependencies have no cost, except on the 20Kc where output-dependence is treated like input-dependence. */ @@ -9828,11 +9863,124 @@ mips_issue_rate (void) reach the theoretical max of 4. */ return 3; + case PROCESSOR_LOONGSON_2E: + case PROCESSOR_LOONGSON_2F: + return 4; + default: return 1; } } +/* Implement TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN hook for Loongson2. */ + +static void +mips_ls2_init_dfa_post_cycle_insn (void) +{ + start_sequence (); + emit_insn (gen_ls2_alu1_turn_enabled_insn ()); + mips_ls2.alu1_turn_enabled_insn = get_insns (); + end_sequence (); + + start_sequence (); + emit_insn (gen_ls2_alu2_turn_enabled_insn ()); + mips_ls2.alu2_turn_enabled_insn = get_insns (); + end_sequence (); + + start_sequence (); + emit_insn (gen_ls2_falu1_turn_enabled_insn ()); + mips_ls2.falu1_turn_enabled_insn = get_insns (); + end_sequence (); + + start_sequence (); + emit_insn (gen_ls2_falu2_turn_enabled_insn ()); + mips_ls2.falu2_turn_enabled_insn = get_insns (); + end_sequence (); + + mips_ls2.alu1_core_unit_code = get_cpu_unit_code ("ls2_alu1_core"); + mips_ls2.alu2_core_unit_code = get_cpu_unit_code ("ls2_alu2_core"); + mips_ls2.falu1_core_unit_code = get_cpu_unit_code ("ls2_falu1_core"); + mips_ls2.falu2_core_unit_code = get_cpu_unit_code ("ls2_falu2_core"); +} + +/* Implement TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN hook. + Init data used in mips_dfa_post_advance_cycle. */ + +static void +mips_init_dfa_post_cycle_insn (void) +{ + if (TUNE_LOONGSON_2EF) + mips_ls2_init_dfa_post_cycle_insn (); +} + +/* Initialize STATE when scheduling for Loongson 2E/2F. + Support round-robin dispatch scheme by enabling only one of + ALU1/ALU2 and one of FALU1/FALU2 units for ALU1/2 and FALU1/2 instructions + respectively. */ + +static void +mips_ls2_dfa_post_advance_cycle (state_t state) +{ + if (cpu_unit_reservation_p (state, mips_ls2.alu1_core_unit_code)) + { + /* Though there are no non-pipelined ALU1 insns, + we can get an instruction of type 'multi' before reload. */ + gcc_assert (mips_ls2.cycle_has_multi_p); + mips_ls2.alu1_turn_p = false; + } + + mips_ls2.cycle_has_multi_p = false; + + if (cpu_unit_reservation_p (state, mips_ls2.alu2_core_unit_code)) + /* We have a non-pipelined alu instruction in the core, + adjust round-robin counter. */ + mips_ls2.alu1_turn_p = true; + + if (mips_ls2.alu1_turn_p) + { + if (state_transition (state, mips_ls2.alu1_turn_enabled_insn) >= 0) + gcc_unreachable (); + } + else + { + if (state_transition (state, mips_ls2.alu2_turn_enabled_insn) >= 0) + gcc_unreachable (); + } + + if (cpu_unit_reservation_p (state, mips_ls2.falu1_core_unit_code)) + { + /* There are no non-pipelined FALU1 insns. */ + gcc_unreachable (); + mips_ls2.falu1_turn_p = false; + } + + if (cpu_unit_reservation_p (state, mips_ls2.falu2_core_unit_code)) + /* We have a non-pipelined falu instruction in the core, + adjust round-robin counter. */ + mips_ls2.falu1_turn_p = true; + + if (mips_ls2.falu1_turn_p) + { + if (state_transition (state, mips_ls2.falu1_turn_enabled_insn) >= 0) + gcc_unreachable (); + } + else + { + if (state_transition (state, mips_ls2.falu2_turn_enabled_insn) >= 0) + gcc_unreachable (); + } +} + +/* Implement TARGET_SCHED_DFA_POST_ADVANCE_CYCLE. + This hook is being called at the start of each cycle. */ + +static void +mips_dfa_post_advance_cycle (void) +{ + if (TUNE_LOONGSON_2EF) + mips_ls2_dfa_post_advance_cycle (curr_state); +} + /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD. This should be as wide as the scheduling freedom in the DFA. */ @@ -9843,6 +9991,9 @@ mips_multipass_dfa_lookahead (void) if (TUNE_SB1) return 4; + if (TUNE_LOONGSON_2EF) + return 4; + return 0; } @@ -10103,6 +10254,12 @@ mips_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, mips_macc_chains_last_hilo = 0; vr4130_last_insn = 0; mips_74k_agen_init (NULL_RTX); + + /* When scheduling for Loongson2, branch instructions go to ALU1, + therefore basic block is most likely to start with round-robin counter + pointed to ALU2. */ + mips_ls2.alu1_turn_p = false; + mips_ls2.falu1_turn_p = true; } /* Implement TARGET_SCHED_REORDER and TARGET_SCHED_REORDER2. */ @@ -10128,6 +10285,37 @@ mips_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, return mips_issue_rate (); } +/* Update round-robin counters for ALU1/2 and FALU1/2. */ + +static void +mips_ls2_variable_issue (rtx insn) +{ + if (mips_ls2.alu1_turn_p) + { + if (cpu_unit_reservation_p (curr_state, mips_ls2.alu1_core_unit_code)) + mips_ls2.alu1_turn_p = false; + } + else + { + if (cpu_unit_reservation_p (curr_state, mips_ls2.alu2_core_unit_code)) + mips_ls2.alu1_turn_p = true; + } + + if (mips_ls2.falu1_turn_p) + { + if (cpu_unit_reservation_p (curr_state, mips_ls2.falu1_core_unit_code)) + mips_ls2.falu1_turn_p = false; + } + else + { + if (cpu_unit_reservation_p (curr_state, mips_ls2.falu2_core_unit_code)) + mips_ls2.falu1_turn_p = true; + } + + if (recog_memoized (insn) >= 0) + mips_ls2.cycle_has_multi_p |= (get_attr_type (insn) == TYPE_MULTI); +} + /* Implement TARGET_SCHED_VARIABLE_ISSUE. */ static int @@ -10143,7 +10331,16 @@ mips_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, vr4130_last_insn = insn; if (TUNE_74K) mips_74k_agen_init (insn); + else if (TUNE_LOONGSON_2EF) + mips_ls2_variable_issue (insn); } + + /* Instructions of type 'multi' should all be split before + the second scheduling pass. */ + gcc_assert (!reload_completed + || recog_memoized (insn) < 0 + || get_attr_type (insn) != TYPE_MULTI); + return more; } @@ -12881,6 +13078,10 @@ mips_order_regs_for_local_alloc (void) #define TARGET_SCHED_ADJUST_COST mips_adjust_cost #undef TARGET_SCHED_ISSUE_RATE #define TARGET_SCHED_ISSUE_RATE mips_issue_rate +#undef TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN +#define TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN mips_init_dfa_post_cycle_insn +#undef TARGET_SCHED_DFA_POST_ADVANCE_CYCLE +#define TARGET_SCHED_DFA_POST_ADVANCE_CYCLE mips_dfa_post_advance_cycle #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ mips_multipass_dfa_lookahead diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 74c079a7d4a..5bcebd845e0 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -266,6 +266,8 @@ enum mips_code_readable_setting { || mips_tune == PROCESSOR_74KF1_1 \ || mips_tune == PROCESSOR_74KF3_2) #define TUNE_20KC (mips_tune == PROCESSOR_20KC) +#define TUNE_LOONGSON_2EF (mips_tune == PROCESSOR_LOONGSON_2E \ + || mips_tune == PROCESSOR_LOONGSON_2F) /* Whether vector modes and intrinsics for ST Microelectronics Loongson-2E/2F processors should be enabled. In o32 pairs of @@ -891,10 +893,12 @@ enum mips_code_readable_setting { && !TARGET_MIPS16) /* Likewise mtc1 and mfc1. */ -#define ISA_HAS_XFER_DELAY (mips_isa <= 3) +#define ISA_HAS_XFER_DELAY (mips_isa <= 3 \ + && !TARGET_LOONGSON_2EF) /* Likewise floating-point comparisons. */ -#define ISA_HAS_FCMP_DELAY (mips_isa <= 3) +#define ISA_HAS_FCMP_DELAY (mips_isa <= 3 \ + && !TARGET_LOONGSON_2EF) /* True if mflo and mfhi can be immediately followed by instructions which write to the HI and LO registers. @@ -911,7 +915,8 @@ enum mips_code_readable_setting { #define ISA_HAS_HILO_INTERLOCKS (ISA_MIPS32 \ || ISA_MIPS32R2 \ || ISA_MIPS64 \ - || TARGET_MIPS5500) + || TARGET_MIPS5500 \ + || TARGET_LOONGSON_2EF) /* ISA includes synci, jr.hb and jalr.hb. */ #define ISA_HAS_SYNCI (ISA_MIPS32R2 && !TARGET_MIPS16) @@ -3212,3 +3217,6 @@ extern const struct mips_cpu_info *mips_tune_info; extern const struct mips_rtx_cost_data *mips_cost; extern enum mips_code_readable_setting mips_code_readable; #endif + +/* Enable querying of DFA units. */ +#define CPU_UNITS_QUERY 1 diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index ac1dc60dc81..d75ea8cd3b0 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -239,6 +239,12 @@ (UNSPEC_LOONGSON_PUNPCKL 519) (UNSPEC_LOONGSON_PADDD 520) (UNSPEC_LOONGSON_PSUBD 521) + + ;; Used in loongson2ef.md + (UNSPEC_LOONGSON_ALU1_TURN_ENABLED_INSN 530) + (UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN 531) + (UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN 532) + (UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN 533) ] ) @@ -441,7 +447,7 @@ ;; Attribute describing the processor. This attribute must match exactly ;; with the processor_type enumeration in mips.h. (define_attr "cpu" - "r3000,4kc,4kp,5kc,5kf,20kc,24kc,24kf2_1,24kf1_1,74kc,74kf2_1,74kf1_1,74kf3_2,loongson2e,loongson2f,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,sb1,sb1a,sr71000,xlr" + "r3000,4kc,4kp,5kc,5kf,20kc,24kc,24kf2_1,24kf1_1,74kc,74kf2_1,74kf1_1,74kf3_2,loongson_2e,loongson_2f,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,sb1,sb1a,sr71000,xlr" (const (symbol_ref "mips_tune"))) ;; The type of hardware hazard associated with this instruction. @@ -793,6 +799,7 @@ (include "sb1.md") (include "sr71k.md") (include "xlr.md") +(include "loongson2ef.md") (include "generic.md") ;;