diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 147c28cd54d..e3a24355f37 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,72 @@ +2006-11-01 Pete Steinmetz + Peter Bergner + + * doc/invoke.texi: Add cpu_type power6x + (RS/6000 and PowerPC Options): Add -mmfpgpr. + * config.gcc: Add cpu_type power6x. + * configure.ac: Add test for mf{t,f}gpr instructions. + (HAVE_AS_MFPGPR): New. + * config.in: Regenerate. + * configure: Regenerate. + * config/rs6000/aix52.h (ASM_CPU_SPEC): Add power6x. + * config/rs6000/rs6000.md (define_attr "type"): Add insert_dword, + shift,trap,var_shift_rotate,cntlz,exts, var_delayed_compare, mffgpr + and mftgpr attributes. + (define_attr "cpu"): Add power6. + Change instruction sequences to use new attributes. + (floatsidf2,fix_truncdfsi2): use TARGET_MFPGPR. + (fix_truncdfsi2_mfpgpr): New. + (floatsidf_ppc64_mfpgpr): New. + (floatsidf_ppc64): Added !TARGET_MFPGPR condition. + (movdf_hardfloat64_mfpgpr,movdi_mfpgpr): New. + (movdf_hardfloat64): Added !TARGET_MFPGPR condition. + (movdi_internal64): Added !TARGET_MFPGPR and related conditions. + (fix_truncdfsi2): Use gpc_reg_operand constraint. + * config/rs6000/{6xx.md,power4.md,8540.md,603.md,mpc.md, + 7xx.md,rios2.md,7450.md,440.md,rios1.md,rs64.md,power5.md,40x.md}: + Add descriptions for insert_dword, shift,trap,var_shift_rotate, + cntlz,exts and var_delayed_compare. + * config/rs6000/rs6000-c.c (rs6000_cpu_cpp_builtins): Define + _ARCH_PWR6X, if features enabled. + * config/rs6000/rs6000.opt (mmfpgpr): New. + * config/rs6000/rs6000.c (rs6000_align_branch_targets): New variable. + (cached_can_issue_more): New variable. + (processor_costs): Add power6_cost. + (rs6000_sched_init): New function. + (is_dispatch_slot_restricted): Deleted. + (set_to_load_agen): New function. + (is_load_insn,is_store_insn): New functions. + (adjacent_mem_locations): New function. + (insn_must_be_first_in_group): New function. + (insn_must_be_last_in_group): New function. + (rs6000_sched_reorder): New function. + (rs6000_sched_reorder2): New function. + (TARGET_SCHED_INIT,TARGET_SCHED_REORDER, + TARGET_SCHED_REORDER2): Define. + (processor_target_table): Use PROCESSOR_POWER6 for power6. + Add power6x. Add MASK_MFPGPR for power6x. + (POWERPC_MASKS): Add MASK_MFPGPR. + (rs6000_override_options): Set rs6000_always_hint to false + for power6. Set rs6000_align_branch_targets. Replace + rs6000_sched_groups check with rs6000_align_branch_targets. + Use PROCESSOR_POWER6. + (last_scheduled_insn): New variable. + (load_store_pendulum): New variable. + (rs6000_variable_issue): Set last_scheduled_insn and + cached_can_issue_more. + (rs6000_adjust_cost): Add power6 cost adjustments. + (rs6000_adjust_priority): Replace is_dispatch_slot_restricted + with insn_must_be_first_in_group. Add power6 priority adjustments. + (rs6000_issue_rate): Add CPU_POWER6. + (insn_terminates_group_p): Use insn_must_be_{first,last}_in_group. + * config/rs6000/rs6000.h (processor_type): Add PROCESSOR_POWER6. + (TARGET_MFPGPR): New. + (SECONDARY_MEMORY_NEEDED): Use TARGET_MFPGPR. + (ASM_CPU_SPEC): Add power6x. + (SECONDARY_MEMORY_NEEDED): Added mode!=DFmode and mode!=DImode + conditions. + * config/rs6000/power6.md: New file. + 2006-11-01 Adam Nemet * tree-pretty-print.c (dump_generic_node) : Use diff --git a/gcc/config.gcc b/gcc/config.gcc index 9ab3a659c6c..fcdb89c8e71 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -303,7 +303,7 @@ powerpc*-*-*) extra_headers="ppc-asm.h altivec.h spe.h" need_64bit_hwint=yes case x$with_cpu in - xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456]|xrs64a) + xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456]|xpower6x|xrs64a) cpu_is_64bit=yes ;; esac @@ -2870,7 +2870,7 @@ case "${target}" in eval "with_$which=405" ;; "" | common \ - | power | power[23456] | powerpc | powerpc64 \ + | power | power[23456] | power6x | powerpc | powerpc64 \ | rios | rios1 | rios2 | rsc | rsc1 | rs64a \ | 401 | 403 | 405 | 405fp | 440 | 440fp | 505 \ | 601 | 602 | 603 | 603e | ec603e | 604 \ diff --git a/gcc/config.in b/gcc/config.in index 2fc11775dde..0f5f0116455 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -233,6 +233,12 @@ #endif +/* Define if your assembler supports mffgpr and mftgpr. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_MFPGPR +#endif + + /* Define if your assembler supports the -no-mul-bug-abort option. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_NO_MUL_BUG_ABORT_OPTION diff --git a/gcc/config/rs6000/40x.md b/gcc/config/rs6000/40x.md index 94b6c459ae1..5f963e93721 100644 --- a/gcc/config/rs6000/40x.md +++ b/gcc/config/rs6000/40x.md @@ -38,7 +38,8 @@ "iu_40x") (define_insn_reservation "ppc403-integer" 1 - (and (eq_attr "type" "integer,insert_word") + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\ + var_shift_rotate,cntlz,exts") (eq_attr "cpu" "ppc403,ppc405")) "iu_40x") @@ -53,7 +54,8 @@ "iu_40x,iu_40x,iu_40x") (define_insn_reservation "ppc403-compare" 3 - (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare") + (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\ + var_delayed_compare") (eq_attr "cpu" "ppc403,ppc405")) "iu_40x,nothing,bpu_40x") diff --git a/gcc/config/rs6000/440.md b/gcc/config/rs6000/440.md index 60e0f72dc16..38ce1660ab5 100644 --- a/gcc/config/rs6000/440.md +++ b/gcc/config/rs6000/440.md @@ -55,7 +55,8 @@ "ppc440_issue,ppc440_l_pipe") (define_insn_reservation "ppc440-integer" 1 - (and (eq_attr "type" "integer,insert_word") + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\ + trap,var_shift_rotate,cntlz,exts") (eq_attr "cpu" "ppc440")) "ppc440_issue,ppc440_i_pipe|ppc440_j_pipe") diff --git a/gcc/config/rs6000/603.md b/gcc/config/rs6000/603.md index 4721aca798c..c35b872e08f 100644 --- a/gcc/config/rs6000/603.md +++ b/gcc/config/rs6000/603.md @@ -59,7 +59,8 @@ "lsu_603") (define_insn_reservation "ppc603-integer" 1 - (and (eq_attr "type" "integer,insert_word") + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\ + var_shift_rotate,cntlz,exts") (eq_attr "cpu" "ppc603")) "iu_603") @@ -90,7 +91,8 @@ "iu_603*37") (define_insn_reservation "ppc603-compare" 3 - (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare") + (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\ + var_delayed_compare") (eq_attr "cpu" "ppc603")) "iu_603,nothing,bpu_603") diff --git a/gcc/config/rs6000/6xx.md b/gcc/config/rs6000/6xx.md index 31aa6062070..9c7ce9ae9a9 100644 --- a/gcc/config/rs6000/6xx.md +++ b/gcc/config/rs6000/6xx.md @@ -74,7 +74,8 @@ "lsu_6xx") (define_insn_reservation "ppc604-integer" 1 - (and (eq_attr "type" "integer,insert_word") + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\ + var_shift_rotate,cntlz,exts") (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) "iu1_6xx|iu2_6xx") @@ -139,7 +140,8 @@ "mciu_6xx*36") (define_insn_reservation "ppc604-compare" 3 - (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare") + (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\ + var_delayed_compare") (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) "(iu1_6xx|iu2_6xx)") diff --git a/gcc/config/rs6000/7450.md b/gcc/config/rs6000/7450.md index 99e87125fb9..61893e0b37e 100644 --- a/gcc/config/rs6000/7450.md +++ b/gcc/config/rs6000/7450.md @@ -75,7 +75,8 @@ "ppc7450_du,lsu_7450") (define_insn_reservation "ppc7450-integer" 1 - (and (eq_attr "type" "integer,insert_word") + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\ + trap,var_shift_rotate,cntlz,exts") (eq_attr "cpu" "ppc7450")) "ppc7450_du,iu1_7450|iu2_7450|iu3_7450") @@ -106,7 +107,8 @@ "ppc7450_du,mciu_7450*23") (define_insn_reservation "ppc7450-compare" 2 - (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare") + (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\ + var_delayed_compare") (eq_attr "cpu" "ppc7450")) "ppc7450_du,(iu1_7450|iu2_7450|iu3_7450)") diff --git a/gcc/config/rs6000/7xx.md b/gcc/config/rs6000/7xx.md index 77e58a3cb6c..68542c7ead9 100644 --- a/gcc/config/rs6000/7xx.md +++ b/gcc/config/rs6000/7xx.md @@ -65,7 +65,8 @@ "ppc750_du,lsu_7xx") (define_insn_reservation "ppc750-integer" 1 - (and (eq_attr "type" "integer,insert_word") + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\ + trap,var_shift_rotate,cntlz,exts") (eq_attr "cpu" "ppc750,ppc7400")) "ppc750_du,iu1_7xx|iu2_7xx") @@ -100,7 +101,8 @@ "ppc750_du,iu1_7xx*19") (define_insn_reservation "ppc750-compare" 2 - (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare") + (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\ + var_delayed_compare") (eq_attr "cpu" "ppc750,ppc7400")) "ppc750_du,(iu1_7xx|iu2_7xx)") diff --git a/gcc/config/rs6000/8540.md b/gcc/config/rs6000/8540.md index b42e2472a41..91781af7f57 100644 --- a/gcc/config/rs6000/8540.md +++ b/gcc/config/rs6000/8540.md @@ -85,7 +85,9 @@ ;; Simple SU insns (define_insn_reservation "ppc8540_su" 1 - (and (eq_attr "type" "integer,insert_word,cmp,compare,delayed_compare,fast_compare") + (and (eq_attr "type" "integer,insert_word,insert_dword,cmp,compare,\ + delayed_compare,var_delayed_compare,fast_compare,\ + shift,trap,var_shift_rotate,cntlz,exts") (eq_attr "cpu" "ppc8540")) "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire") diff --git a/gcc/config/rs6000/aix52.h b/gcc/config/rs6000/aix52.h index 87d2157c61e..8a2ae8ab21d 100644 --- a/gcc/config/rs6000/aix52.h +++ b/gcc/config/rs6000/aix52.h @@ -69,6 +69,7 @@ do { \ %{mcpu=power5: -m620} \ %{mcpu=power5+: -m620} \ %{mcpu=power6: -m620} \ +%{mcpu=power6x: -m620} \ %{mcpu=powerpc: -mppc} \ %{mcpu=rs64a: -mppc} \ %{mcpu=603: -m603} \ diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h index f52b2451abe..5c62a1d133b 100644 --- a/gcc/config/rs6000/linux64.h +++ b/gcc/config/rs6000/linux64.h @@ -58,9 +58,9 @@ extern int dot_symbols; #endif #undef PROCESSOR_DEFAULT -#define PROCESSOR_DEFAULT PROCESSOR_POWER4 +#define PROCESSOR_DEFAULT PROCESSOR_POWER6 #undef PROCESSOR_DEFAULT64 -#define PROCESSOR_DEFAULT64 PROCESSOR_POWER4 +#define PROCESSOR_DEFAULT64 PROCESSOR_POWER6 /* We don't need to generate entries in .fixup, except when -mrelocatable or -mrelocatable-lib is given. */ diff --git a/gcc/config/rs6000/mpc.md b/gcc/config/rs6000/mpc.md index 75e475208d3..20064de8af6 100644 --- a/gcc/config/rs6000/mpc.md +++ b/gcc/config/rs6000/mpc.md @@ -43,7 +43,8 @@ "lsu_mpc") (define_insn_reservation "mpccore-integer" 1 - (and (eq_attr "type" "integer,insert_word") + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\ + var_shift_rotate,cntlz,exts") (eq_attr "cpu" "mpccore")) "iu_mpc") @@ -69,7 +70,8 @@ "mciu_mpc*6") (define_insn_reservation "mpccore-compare" 3 - (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare") + (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\ + var_delayed_compare") (eq_attr "cpu" "mpccore")) "iu_mpc,nothing,bpu_mpc") diff --git a/gcc/config/rs6000/power4.md b/gcc/config/rs6000/power4.md index 53ac066d856..d379ed998c5 100644 --- a/gcc/config/rs6000/power4.md +++ b/gcc/config/rs6000/power4.md @@ -183,7 +183,8 @@ ; Integer latency is 2 cycles (define_insn_reservation "power4-integer" 2 - (and (eq_attr "type" "integer") + (and (eq_attr "type" "integer,insert_dword,shift,trap,\ + var_shift_rotate,cntlz,exts") (eq_attr "cpu" "power4")) "iq_power4") @@ -220,7 +221,7 @@ "iq_power4") (define_insn_reservation "power4-compare" 2 - (and (eq_attr "type" "compare,delayed_compare") + (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare") (eq_attr "cpu" "power4")) "(du1_power4+du2_power4,iu1_power4,iu2_power4)\ |(du2_power4+du3_power4,iu2_power4,iu2_power4)\ diff --git a/gcc/config/rs6000/power5.md b/gcc/config/rs6000/power5.md index ce6892605d1..d765a2795ad 100644 --- a/gcc/config/rs6000/power5.md +++ b/gcc/config/rs6000/power5.md @@ -144,7 +144,8 @@ ; Integer latency is 2 cycles (define_insn_reservation "power5-integer" 2 - (and (eq_attr "type" "integer") + (and (eq_attr "type" "integer,insert_dword,shift,trap,\ + var_shift_rotate,cntlz,exts") (eq_attr "cpu" "power5")) "iq_power5") @@ -179,7 +180,7 @@ "iq_power5") (define_insn_reservation "power5-compare" 2 - (and (eq_attr "type" "compare,delayed_compare") + (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare") (eq_attr "cpu" "power5")) "du1_power5+du2_power5,iu1_power5,iu2_power5") diff --git a/gcc/config/rs6000/power6.md b/gcc/config/rs6000/power6.md new file mode 100644 index 00000000000..0768f67fe1a --- /dev/null +++ b/gcc/config/rs6000/power6.md @@ -0,0 +1,569 @@ +;; Scheduling description for IBM POWER6 processor. +;; Copyright (C) 2006 Free Software Foundation, Inc. +;; Contributed by Peter Steinmetz (steinmtz@us.ibm.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 2, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to the +;; Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, +;; MA 02110-1301, USA. + +;; Sources: + +;; The POWER6 has 2 iu, 2 fpu, 2 lsu, and 1 bu/cru unit per engine +;; (2 engines per chip). The chip can issue up to 5 internal ops +;; per cycle. + +(define_automaton "power6iu,power6lsu,power6fpu,power6bu") + +(define_cpu_unit "iu1_power6,iu2_power6" "power6iu") +(define_cpu_unit "lsu1_power6,lsu2_power6" "power6lsu") +(define_cpu_unit "bpu_power6" "power6bu") +(define_cpu_unit "fpu1_power6,fpu2_power6" "power6fpu") + +(define_reservation "LS2_power6" + "lsu1_power6+lsu2_power6") + +(define_reservation "FPU_power6" + "fpu1_power6|fpu2_power6") + +(define_reservation "BRU_power6" + "bpu_power6") + +(define_reservation "LSU_power6" + "lsu1_power6|lsu2_power6") + +(define_reservation "LSF_power6" + "(lsu1_power6+fpu1_power6)\ + |(lsu1_power6+fpu2_power6)\ + |(lsu2_power6+fpu1_power6)\ + |(lsu2_power6+fpu2_power6)") + +(define_reservation "LX2_power6" + "(iu1_power6+iu2_power6+lsu1_power6)\ + |(iu1_power6+iu2_power6+lsu2_power6)") + +(define_reservation "FX2_power6" + "iu1_power6+iu2_power6") + +(define_reservation "X2F_power6" + "(iu1_power6+iu2_power6+fpu1_power6)\ + |(iu1_power6+iu2_power6+fpu2_power6)") + +(define_reservation "BX2_power6" + "iu1_power6+iu2_power6+bpu_power6") + +(define_reservation "LSX_power6" + "(iu1_power6+lsu1_power6)\ + |(iu1_power6+lsu2_power6)\ + |(iu2_power6+lsu1_power6)\ + |(iu2_power6+lsu2_power6)") + +(define_reservation "FXU_power6" + "iu1_power6|iu2_power6") + +(define_reservation "XLF_power6" + "(iu1_power6+lsu1_power6+fpu1_power6)\ + |(iu1_power6+lsu1_power6+fpu2_power6)\ + |(iu1_power6+lsu2_power6+fpu1_power6)\ + |(iu1_power6+lsu2_power6+fpu2_power6)\ + |(iu2_power6+lsu1_power6+fpu1_power6)\ + |(iu2_power6+lsu1_power6+fpu2_power6)\ + |(iu2_power6+lsu2_power6+fpu1_power6)\ + |(iu2_power6+lsu2_power6+fpu2_power6)") + +(define_reservation "BRX_power6" + "(bpu_power6+iu1_power6)\ + |(bpu_power6+iu2_power6)") + +; Load/store + +; The default for a value written by a fixed point load +; that is read/written by a subsequent fixed point op. +(define_insn_reservation "power6-load" 2 ; fx + (and (eq_attr "type" "load") + (eq_attr "cpu" "power6")) + "LSU_power6") + +; define the bypass for the case where the value written +; by a fixed point load is used as the source value on +; a store. +(define_bypass 1 "power6-load,\ + power6-load-update,\ + power6-load-update-indexed" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "store_data_bypass_p") + +(define_insn_reservation "power6-load-ext" 4 ; fx + (and (eq_attr "type" "load_ext") + (eq_attr "cpu" "power6")) + "LSU_power6") + +; define the bypass for the case where the value written +; by a fixed point load ext is used as the source value on +; a store. +(define_bypass 1 "power6-load-ext,\ + power6-load-ext-update,\ + power6-load-ext-update-indexed" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "store_data_bypass_p") + +(define_insn_reservation "power6-load-update" 2 ; fx + (and (eq_attr "type" "load_u") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-load-update-indexed" 2 ; fx + (and (eq_attr "type" "load_ux") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-load-ext-update" 4 ; fx + (and (eq_attr "type" "load_ext_u") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-load-ext-update-indexed" 4 ; fx + (and (eq_attr "type" "load_ext_ux") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-fpload" 1 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "power6")) + "LSU_power6") + +(define_insn_reservation "power6-fpload-update" 1 + (and (eq_attr "type" "fpload_u,fpload_ux") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-store" 14 + (and (eq_attr "type" "store") + (eq_attr "cpu" "power6")) + "LSU_power6") + +(define_insn_reservation "power6-store-update" 14 + (and (eq_attr "type" "store_u") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-store-update-indexed" 14 + (and (eq_attr "type" "store_ux") + (eq_attr "cpu" "power6")) + "LX2_power6") + +(define_insn_reservation "power6-fpstore" 14 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "power6")) + "LSF_power6") + +(define_insn_reservation "power6-fpstore-update" 14 + (and (eq_attr "type" "fpstore_u,fpstore_ux") + (eq_attr "cpu" "power6")) + "XLF_power6") + +(define_insn_reservation "power6-larx" 3 + (and (eq_attr "type" "load_l") + (eq_attr "cpu" "power6")) + "LS2_power6") + +(define_insn_reservation "power6-stcx" 10 ; best case + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-sync" 11 ; N/A + (and (eq_attr "type" "sync") + (eq_attr "cpu" "power6")) + "LSU_power6") + +(define_insn_reservation "power6-integer" 1 + (and (eq_attr "type" "integer") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-exts" 1 + (and (eq_attr "type" "exts") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-shift" 1 + (and (eq_attr "type" "shift") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-insert" 1 + (and (eq_attr "type" "insert_word") + (eq_attr "cpu" "power6")) + "FX2_power6") + +(define_insn_reservation "power6-insert-dword" 1 + (and (eq_attr "type" "insert_dword") + (eq_attr "cpu" "power6")) + "FX2_power6") + +; define the bypass for the case where the value written +; by a fixed point op is used as the source value on a +; store. +(define_bypass 1 "power6-integer,\ + power6-exts,\ + power6-shift,\ + power6-insert,\ + power6-insert-dword" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "store_data_bypass_p") + +(define_insn_reservation "power6-cntlz" 2 + (and (eq_attr "type" "cntlz") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_bypass 1 "power6-cntlz" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "store_data_bypass_p") + +(define_insn_reservation "power6-var-rotate" 4 + (and (eq_attr "type" "var_shift_rotate") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-trap" 1 ; N/A + (and (eq_attr "type" "trap") + (eq_attr "cpu" "power6")) + "BRX_power6") + +(define_insn_reservation "power6-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power6")) + "(iu1_power6,iu1_power6)\ + |(iu1_power6+iu2_power6,nothing)\ + |(iu1_power6,iu2_power6)\ + |(iu2_power6,iu1_power6)\ + |(iu2_power6,iu2_power6)") + +(define_insn_reservation "power6-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power6")) + "(iu1_power6,iu1_power6,iu1_power6)\ + |(iu1_power6,iu1_power6,iu2_power6)\ + |(iu1_power6,iu2_power6,iu1_power6)\ + |(iu1_power6,iu2_power6,iu2_power6)\ + |(iu2_power6,iu1_power6,iu1_power6)\ + |(iu2_power6,iu1_power6,iu2_power6)\ + |(iu2_power6,iu2_power6,iu1_power6)\ + |(iu2_power6,iu2_power6,iu2_power6)\ + |(iu1_power6+iu2_power6,iu1_power6)\ + |(iu1_power6+iu2_power6,iu2_power6)\ + |(iu1_power6,iu1_power6+iu2_power6)\ + |(iu2_power6,iu1_power6+iu2_power6)") + +(define_insn_reservation "power6-cmp" 1 + (and (eq_attr "type" "cmp") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-compare" 1 + (and (eq_attr "type" "compare") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-fast-compare" 1 + (and (eq_attr "type" "fast_compare") + (eq_attr "cpu" "power6")) + "FXU_power6") + +; define the bypass for the case where the value written +; by a fixed point rec form op is used as the source value +; on a store. +(define_bypass 1 "power6-compare,\ + power6-fast-compare" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "store_data_bypass_p") + +(define_insn_reservation "power6-delayed-compare" 2 ; N/A + (and (eq_attr "type" "delayed_compare") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-var-delayed-compare" 4 + (and (eq_attr "type" "var_delayed_compare") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-lmul-cmp" 16 + (and (eq_attr "type" "lmul_compare") + (eq_attr "cpu" "power6")) + "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\ + |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)"); + +(define_insn_reservation "power6-imul-cmp" 16 + (and (eq_attr "type" "imul_compare") + (eq_attr "cpu" "power6")) + "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\ + |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)"); + +(define_insn_reservation "power6-lmul" 16 + (and (eq_attr "type" "lmul") + (eq_attr "cpu" "power6")) + "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\ + |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)"); + +(define_insn_reservation "power6-imul" 16 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "power6")) + "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\ + |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)"); + +(define_insn_reservation "power6-imul3" 16 + (and (eq_attr "type" "imul2,imul3") + (eq_attr "cpu" "power6")) + "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\ + |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)"); + +(define_bypass 9 "power6-imul,\ + power6-lmul,\ + power6-imul-cmp,\ + power6-lmul-cmp,\ + power6-imul3" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "store_data_bypass_p") + +(define_insn_reservation "power6-idiv" 44 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "power6")) + "(iu1_power6*44+iu2_power6*44+fpu1_power6*44)\ + |(iu1_power6*44+iu2_power6*44+fpu2_power6*44)"); + +; The latency for this bypass is yet to be defined +;(define_bypass ? "power6-idiv" +; "power6-store,\ +; power6-store-update,\ +; power6-store-update-indexed,\ +; power6-fpstore,\ +; power6-fpstore-update" +; "store_data_bypass_p") + +(define_insn_reservation "power6-ldiv" 56 + (and (eq_attr "type" "ldiv") + (eq_attr "cpu" "power6")) + "(iu1_power6*56+iu2_power6*56+fpu1_power6*56)\ + |(iu1_power6*56+iu2_power6*56+fpu2_power6*56)"); + +; The latency for this bypass is yet to be defined +;(define_bypass ? "power6-ldiv" +; "power6-store,\ +; power6-store-update,\ +; power6-store-update-indexed,\ +; power6-fpstore,\ +; power6-fpstore-update" +; "store_data_bypass_p") + +(define_insn_reservation "power6-mtjmpr" 2 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "power6")) + "BX2_power6") + +(define_bypass 5 "power6-mtjmpr" "power6-branch") + +(define_insn_reservation "power6-branch" 2 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power6")) + "BRU_power6") + +(define_bypass 5 "power6-branch" "power6-mtjmpr") + +(define_insn_reservation "power6-crlogical" 3 + (and (eq_attr "type" "cr_logical") + (eq_attr "cpu" "power6")) + "BRU_power6") + +(define_bypass 3 "power6-crlogical" "power6-branch") + +(define_insn_reservation "power6-delayedcr" 3 + (and (eq_attr "type" "delayed_cr") + (eq_attr "cpu" "power6")) + "BRU_power6") + +(define_insn_reservation "power6-mfcr" 6 ; N/A + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power6")) + "BX2_power6") + +; mfcrf (1 field) +(define_insn_reservation "power6-mfcrf" 3 ; N/A + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power6")) + "BX2_power6") ; + +; mtcrf (1 field) +(define_insn_reservation "power6-mtcr" 4 ; N/A + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power6")) + "BX2_power6") + +(define_bypass 9 "power6-mtcr" "power6-branch") + +(define_insn_reservation "power6-fp" 6 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "power6")) + "FPU_power6") + +; Any fp instruction that updates a CR has a latency +; of 6 to a dependent branch +(define_bypass 6 "power6-fp" "power6-branch") + +(define_bypass 1 "power6-fp" + "power6-fpstore,power6-fpstore-update" + "store_data_bypass_p") + +(define_insn_reservation "power6-fpcompare" 8 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 12 "power6-fpcompare" + "power6-branch,power6-crlogical") + +(define_insn_reservation "power6-sdiv" 26 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_insn_reservation "power6-ddiv" 32 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_insn_reservation "power6-sqrt" 30 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_insn_reservation "power6-dsqrt" 42 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_insn_reservation "power6-isync" 2 ; N/A + (and (eq_attr "type" "isync") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-vecload" 1 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "power6")) + "LSU_power6") + +(define_insn_reservation "power6-vecstore" 1 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "power6")) + "LSF_power6") + +(define_insn_reservation "power6-vecsimple" 3 + (and (eq_attr "type" "vecsimple") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 6 "power6-vecsimple" "power6-veccomplex,\ + power6-vecperm") + +(define_bypass 5 "power6-vecsimple" "power6-vecfloat") + +(define_bypass 4 "power6-vecsimple" "power6-vecstore" ) + +(define_insn_reservation "power6-veccmp" 1 + (and (eq_attr "type" "veccmp") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 10 "power6-veccmp" "power6-branch") + +(define_insn_reservation "power6-vecfloat" 7 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 10 "power6-vecfloat" "power6-vecsimple") + +(define_bypass 11 "power6-vecfloat" "power6-veccomplex,\ + power6-vecperm") + +(define_bypass 9 "power6-vecfloat" "power6-vecstore" ) + +(define_insn_reservation "power6-veccomplex" 7 + (and (eq_attr "type" "vecsimple") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 10 "power6-veccomplex" "power6-vecsimple,\ + power6-vecfloat" ) + +(define_bypass 9 "power6-veccomplex" "power6-vecperm" ) + +(define_bypass 8 "power6-veccomplex" "power6-vecstore" ) + +(define_insn_reservation "power6-vecperm" 4 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 7 "power6-vecperm" "power6-vecsimple,\ + power6-vecfloat" ) + +(define_bypass 6 "power6-vecperm" "power6-veccomplex" ) + +(define_bypass 5 "power6-vecperm" "power6-vecstore" ) + +(define_insn_reservation "power6-mftgpr" 8 + (and (eq_attr "type" "mftgpr") + (eq_attr "cpu" "power6")) + "X2F_power6") + +(define_insn_reservation "power6-mffgpr" 14 + (and (eq_attr "type" "mffgpr") + (eq_attr "cpu" "power6")) + "LX2_power6") + +(define_bypass 4 "power6-mftgpr" "power6-imul,\ + power6-lmul,\ + power6-imul-cmp,\ + power6-lmul-cmp,\ + power6-imul3,\ + power6-idiv,\ + power6-ldiv" ) diff --git a/gcc/config/rs6000/rios1.md b/gcc/config/rs6000/rios1.md index 59b34c5f8d7..028d2739a46 100644 --- a/gcc/config/rs6000/rios1.md +++ b/gcc/config/rs6000/rios1.md @@ -52,7 +52,8 @@ "iu_rios1+fpu_rios1") (define_insn_reservation "rios1-integer" 1 - (and (eq_attr "type" "integer,insert_word") + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\ + trap,var_shift_rotate,cntlz,exts") (eq_attr "cpu" "rios1,ppc601")) "iu_rios1") @@ -104,12 +105,13 @@ "iu_rios1,nothing*2,bpu_rios1") (define_insn_reservation "rios1-delayed_compare" 5 - (and (eq_attr "type" "delayed_compare") + (and (eq_attr "type" "delayed_compare,var_delayed_compare") (eq_attr "cpu" "rios1")) "iu_rios1,nothing*3,bpu_rios1") (define_insn_reservation "ppc601-compare" 3 - (and (eq_attr "type" "cmp,compare,delayed_compare") + (and (eq_attr "type" "cmp,compare,delayed_compare,\ + var_delayed_compare") (eq_attr "cpu" "ppc601")) "iu_rios1,nothing,bpu_rios1") diff --git a/gcc/config/rs6000/rios2.md b/gcc/config/rs6000/rios2.md index b2f5cb282ab..baaab892ac2 100644 --- a/gcc/config/rs6000/rios2.md +++ b/gcc/config/rs6000/rios2.md @@ -40,7 +40,8 @@ "iu1_rios2|iu2_rios2") (define_insn_reservation "rios2-integer" 1 - (and (eq_attr "type" "integer,insert_word") + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\ + var_shift_rotate,cntlz,exts") (eq_attr "cpu" "rios2")) "iu1_rios2|iu2_rios2") @@ -67,7 +68,8 @@ ; compare executes on integer unit, but feeds insns which ; execute on the branch unit. (define_insn_reservation "rios2-compare" 3 - (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare") + (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\ + var_delayed_compare") (eq_attr "cpu" "rios2")) "(iu1_rios2|iu2_rios2),nothing,bpu_rios2") diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 67416c20e62..94d4a6a8c3b 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -106,6 +106,8 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile) builtin_define ("_ARCH_PWR5"); if (TARGET_FPRND) builtin_define ("_ARCH_PWR5X"); + if (TARGET_MFPGPR) + builtin_define ("_ARCH_PWR6X"); if (! TARGET_POWER && ! TARGET_POWER2 && ! TARGET_POWERPC) builtin_define ("_ARCH_COM"); if (TARGET_ALTIVEC) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index e82b6d59581..9d784aeb50d 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -145,6 +145,9 @@ static GTY(()) bool rs6000_always_hint; /* Schedule instructions for group formation. */ static GTY(()) bool rs6000_sched_groups; +/* Align branch targets. */ +static GTY(()) bool rs6000_align_branch_targets; + /* Support for -msched-costly-dep option. */ const char *rs6000_sched_costly_dep_str; enum rs6000_dependence_cost rs6000_sched_costly_dep; @@ -235,6 +238,10 @@ static enum { int toc_initialized; char toc_label_name[10]; +/* Cached value of rs6000_variable_issue. This is cached in + rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */ +static short cached_can_issue_more; + static GTY(()) section *read_only_data_section; static GTY(()) section *private_data_section; static GTY(()) section *read_only_private_data_section; @@ -572,6 +579,21 @@ struct processor_costs power4_cost = { COSTS_N_INSNS (17), /* ddiv */ }; +/* Instruction costs on POWER6 processors. */ +static const +struct processor_costs power6_cost = { + COSTS_N_INSNS (8), /* mulsi */ + COSTS_N_INSNS (8), /* mulsi_const */ + COSTS_N_INSNS (8), /* mulsi_const9 */ + COSTS_N_INSNS (8), /* muldi */ + COSTS_N_INSNS (22), /* divsi */ + COSTS_N_INSNS (28), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (13), /* sdiv */ + COSTS_N_INSNS (16), /* ddiv */ +}; + static bool rs6000_function_ok_for_sibcall (tree, tree); static const char *rs6000_invalid_within_doloop (rtx); @@ -647,20 +669,28 @@ static void rs6000_xcoff_file_end (void); static int rs6000_variable_issue (FILE *, int, rtx, int); static bool rs6000_rtx_costs (rtx, int, int, int *); static int rs6000_adjust_cost (rtx, rtx, rtx, int); +static void rs6000_sched_init (FILE *, int, int); static bool is_microcoded_insn (rtx); -static int is_dispatch_slot_restricted (rtx); static bool is_cracked_insn (rtx); static bool is_branch_slot_insn (rtx); +static bool is_load_insn (rtx); +static bool is_store_insn (rtx); +static bool set_to_load_agen (rtx,rtx); +static bool adjacent_mem_locations (rtx,rtx); static int rs6000_adjust_priority (rtx, int); static int rs6000_issue_rate (void); static bool rs6000_is_costly_dependence (rtx, rtx, rtx, int, int); static rtx get_next_active_insn (rtx, rtx); static bool insn_terminates_group_p (rtx , enum group_termination); +static bool insn_must_be_first_in_group (rtx); +static bool insn_must_be_last_in_group (rtx); static bool is_costly_group (rtx *, rtx); static int force_new_group (int, FILE *, rtx *, rtx, bool *, int, int *); static int redefine_groups (FILE *, int, rtx, rtx); static int pad_groups (FILE *, int, rtx, rtx); static void rs6000_sched_finish (FILE *, int); +static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int); +static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int); static int rs6000_use_sched_lookahead (void); static tree rs6000_builtin_mask_for_load (void); @@ -908,8 +938,14 @@ static const char alt_reg_names[][8] = #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT rs6000_sched_init #undef TARGET_SCHED_FINISH #define TARGET_SCHED_FINISH rs6000_sched_finish +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER rs6000_sched_reorder +#undef TARGET_SCHED_REORDER2 +#define TARGET_SCHED_REORDER2 rs6000_sched_reorder2 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead @@ -1194,9 +1230,12 @@ rs6000_override_options (const char *default_cpu) {"power5+", PROCESSOR_POWER5, POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND}, - {"power6", PROCESSOR_POWER5, + {"power6", PROCESSOR_POWER6, POWERPC_7400_MASK | MASK_POWERPC64 | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND}, + {"power6x", PROCESSOR_POWER6, + POWERPC_7400_MASK | MASK_POWERPC64 | MASK_MFCRF | MASK_POPCNTB + | MASK_FPRND | MASK_MFPGPR}, {"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK}, {"powerpc64", PROCESSOR_POWERPC64, POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64}, @@ -1223,7 +1262,7 @@ rs6000_override_options (const char *default_cpu) POWERPC_MASKS = (POWERPC_BASE_MASK | MASK_PPC_GPOPT | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_ALTIVEC | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW - | MASK_DLMZB) + | MASK_DLMZB | MASK_MFPGPR) }; rs6000_init_hard_regno_mode_ok (); @@ -1399,9 +1438,13 @@ rs6000_override_options (const char *default_cpu) } rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4 - && rs6000_cpu != PROCESSOR_POWER5); + && rs6000_cpu != PROCESSOR_POWER5 + && rs6000_cpu != PROCESSOR_POWER6); rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5); + rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4 + || rs6000_cpu == PROCESSOR_POWER5 + || rs6000_cpu == PROCESSOR_POWER6); rs6000_sched_restricted_insns_priority = (rs6000_sched_groups ? 1 : 0); @@ -1470,7 +1513,7 @@ rs6000_override_options (const char *default_cpu) /* Set branch target alignment, if not optimizing for size. */ if (!optimize_size) { - if (rs6000_sched_groups) + if (rs6000_align_branch_targets) { if (align_functions <= 0) align_functions = 16; @@ -1569,6 +1612,10 @@ rs6000_override_options (const char *default_cpu) rs6000_cost = &power4_cost; break; + case PROCESSOR_POWER6: + rs6000_cost = &power6_cost; + break; + default: gcc_unreachable (); } @@ -16461,6 +16508,16 @@ output_function_profiler (FILE *file, int labelno) } + +/* The following variable value is the last issued insn. */ + +static rtx last_scheduled_insn; + +/* The following variable helps to balance issuing of load and + store instructions */ + +static int load_store_pendulum; + /* Power4 load update and store update instructions are cracked into a load or store and an integer insn which are executed in the same cycle. Branches have their own dispatch slot which does not count against the @@ -16472,19 +16529,34 @@ rs6000_variable_issue (FILE *stream ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, rtx insn, int more) { + last_scheduled_insn = insn; if (GET_CODE (PATTERN (insn)) == USE || GET_CODE (PATTERN (insn)) == CLOBBER) - return more; + { + cached_can_issue_more = more; + return cached_can_issue_more; + } + + if (insn_terminates_group_p (insn, current_group)) + { + cached_can_issue_more = 0; + return cached_can_issue_more; + } if (rs6000_sched_groups) { if (is_microcoded_insn (insn)) - return 0; + cached_can_issue_more = 0; else if (is_cracked_insn (insn)) - return more > 2 ? more - 2 : 0; + cached_can_issue_more = more > 2 ? more - 2 : 0; + else + cached_can_issue_more = more - 1; + + return cached_can_issue_more; } - return more - 1; + cached_can_issue_more = more - 1; + return cached_can_issue_more; } /* Adjust the cost of a scheduling dependency. Return the new cost of @@ -16493,64 +16565,285 @@ rs6000_variable_issue (FILE *stream ATTRIBUTE_UNUSED, static int rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) { + enum attr_type attr_type; + if (! recog_memoized (insn)) return 0; - if (REG_NOTE_KIND (link) != 0) - return 0; - - if (REG_NOTE_KIND (link) == 0) + switch (REG_NOTE_KIND (link)) { - /* Data dependency; DEP_INSN writes a register that INSN reads - some cycles later. */ + case REG_DEP_TRUE: + { + /* Data dependency; DEP_INSN writes a register that INSN reads + some cycles later. */ - /* Separate a load from a narrower, dependent store. */ - if (rs6000_sched_groups - && GET_CODE (PATTERN (insn)) == SET - && GET_CODE (PATTERN (dep_insn)) == SET - && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM - && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM - && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) - > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) - return cost + 14; + /* Separate a load from a narrower, dependent store. */ + if (rs6000_sched_groups + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (PATTERN (dep_insn)) == SET + && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM + && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM + && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) + > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) + return cost + 14; + + attr_type = get_attr_type (insn); + + switch (attr_type) + { + case TYPE_JMPREG: + /* Tell the first scheduling pass about the latency between + a mtctr and bctr (and mtlr and br/blr). The first + scheduling pass will not know about this latency since + the mtctr instruction, which has the latency associated + to it, will be generated by reload. */ + return TARGET_POWER ? 5 : 4; + case TYPE_BRANCH: + /* Leave some extra cycles between a compare and its + dependent branch, to inhibit expensive mispredicts. */ + if ((rs6000_cpu_attr == CPU_PPC603 + || rs6000_cpu_attr == CPU_PPC604 + || rs6000_cpu_attr == CPU_PPC604E + || rs6000_cpu_attr == CPU_PPC620 + || rs6000_cpu_attr == CPU_PPC630 + || rs6000_cpu_attr == CPU_PPC750 + || rs6000_cpu_attr == CPU_PPC7400 + || rs6000_cpu_attr == CPU_PPC7450 + || rs6000_cpu_attr == CPU_POWER4 + || rs6000_cpu_attr == CPU_POWER5) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + + switch (get_attr_type (dep_insn)) + { + case TYPE_CMP: + case TYPE_COMPARE: + case TYPE_DELAYED_COMPARE: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + case TYPE_FPCOMPARE: + case TYPE_CR_LOGICAL: + case TYPE_DELAYED_CR: + return cost + 2; + default: + break; + } + break; + + case TYPE_STORE: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPSTORE: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + + if (GET_CODE (PATTERN (insn)) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return default for now. */ + return cost; + + /* Adjust the cost for the case where the value written + by a fixed point operation is used as the address + gen value on a store. */ + switch (get_attr_type (dep_insn)) + { + case TYPE_LOAD: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_CNTLZ: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 4; + break; + } + case TYPE_LOAD_EXT: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_VAR_DELAYED_COMPARE: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 6; + break; + } + case TYPE_INTEGER: + case TYPE_COMPARE: + case TYPE_FAST_COMPARE: + case TYPE_EXTS: + case TYPE_SHIFT: + case TYPE_INSERT_WORD: + case TYPE_INSERT_DWORD: + case TYPE_FPLOAD_U: + case TYPE_FPLOAD_UX: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 3; + break; + } + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 17; + break; + } + case TYPE_IDIV: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 45; + break; + } + case TYPE_LDIV: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 57; + break; + } + default: + break; + } + } + break; + + case TYPE_LOAD: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_LOAD_EXT: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + + /* Adjust the cost for the case where the value written + by a fixed point instruction is used within the address + gen portion of a subsequent load(u)(x) */ + switch (get_attr_type (dep_insn)) + { + case TYPE_LOAD: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_CNTLZ: + { + if (set_to_load_agen (dep_insn, insn)) + return 4; + break; + } + case TYPE_LOAD_EXT: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_VAR_DELAYED_COMPARE: + { + if (set_to_load_agen (dep_insn, insn)) + return 6; + break; + } + case TYPE_INTEGER: + case TYPE_COMPARE: + case TYPE_FAST_COMPARE: + case TYPE_EXTS: + case TYPE_SHIFT: + case TYPE_INSERT_WORD: + case TYPE_INSERT_DWORD: + case TYPE_FPLOAD_U: + case TYPE_FPLOAD_UX: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + { + if (set_to_load_agen (dep_insn, insn)) + return 3; + break; + } + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + { + if (set_to_load_agen (dep_insn, insn)) + return 17; + break; + } + case TYPE_IDIV: + { + if (set_to_load_agen (dep_insn, insn)) + return 45; + break; + } + case TYPE_LDIV: + { + if (set_to_load_agen (dep_insn, insn)) + return 57; + break; + } + default: + break; + } + } + break; + + case TYPE_FPLOAD: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0) + && (get_attr_type (dep_insn) == TYPE_MFFGPR)) + return 2; + + default: + break; + } - switch (get_attr_type (insn)) - { - case TYPE_JMPREG: - /* Tell the first scheduling pass about the latency between - a mtctr and bctr (and mtlr and br/blr). The first - scheduling pass will not know about this latency since - the mtctr instruction, which has the latency associated - to it, will be generated by reload. */ - return TARGET_POWER ? 5 : 4; - case TYPE_BRANCH: - /* Leave some extra cycles between a compare and its - dependent branch, to inhibit expensive mispredicts. */ - if ((rs6000_cpu_attr == CPU_PPC603 - || rs6000_cpu_attr == CPU_PPC604 - || rs6000_cpu_attr == CPU_PPC604E - || rs6000_cpu_attr == CPU_PPC620 - || rs6000_cpu_attr == CPU_PPC630 - || rs6000_cpu_attr == CPU_PPC750 - || rs6000_cpu_attr == CPU_PPC7400 - || rs6000_cpu_attr == CPU_PPC7450 - || rs6000_cpu_attr == CPU_POWER4 - || rs6000_cpu_attr == CPU_POWER5) - && recog_memoized (dep_insn) - && (INSN_CODE (dep_insn) >= 0) - && (get_attr_type (dep_insn) == TYPE_CMP - || get_attr_type (dep_insn) == TYPE_COMPARE - || get_attr_type (dep_insn) == TYPE_DELAYED_COMPARE - || get_attr_type (dep_insn) == TYPE_IMUL_COMPARE - || get_attr_type (dep_insn) == TYPE_LMUL_COMPARE - || get_attr_type (dep_insn) == TYPE_FPCOMPARE - || get_attr_type (dep_insn) == TYPE_CR_LOGICAL - || get_attr_type (dep_insn) == TYPE_DELAYED_CR)) - return cost + 2; - default: - break; - } /* Fall out to return default cost. */ + } + break; + + case REG_DEP_OUTPUT: + /* Output dependency; DEP_INSN writes a register that INSN writes some + cycles later. */ + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + attr_type = get_attr_type (insn); + + switch (attr_type) + { + case TYPE_FP: + if (get_attr_type (dep_insn) == TYPE_FP) + return 1; + break; + case TYPE_FPLOAD: + if (get_attr_type (dep_insn) == TYPE_MFFGPR) + return 2; + break; + default: + break; + } + } + case REG_DEP_ANTI: + /* Anti dependency; DEP_INSN reads a register that INSN writes some + cycles later. */ + return 0; + + default: + gcc_unreachable (); } return cost; @@ -16581,55 +16874,6 @@ is_microcoded_insn (rtx insn) return false; } -/* The function returns a nonzero value if INSN can be scheduled only - as the first insn in a dispatch group ("dispatch-slot restricted"). - In this case, the returned value indicates how many dispatch slots - the insn occupies (at the beginning of the group). - Return 0 otherwise. */ - -static int -is_dispatch_slot_restricted (rtx insn) -{ - enum attr_type type; - - if (!rs6000_sched_groups) - return 0; - - if (!insn - || insn == NULL_RTX - || GET_CODE (insn) == NOTE - || GET_CODE (PATTERN (insn)) == USE - || GET_CODE (PATTERN (insn)) == CLOBBER) - return 0; - - type = get_attr_type (insn); - - switch (type) - { - case TYPE_MFCR: - case TYPE_MFCRF: - case TYPE_MTCR: - case TYPE_DELAYED_CR: - case TYPE_CR_LOGICAL: - case TYPE_MTJMPR: - case TYPE_MFJMPR: - return 1; - case TYPE_IDIV: - case TYPE_LDIV: - return 2; - case TYPE_LOAD_L: - case TYPE_STORE_C: - case TYPE_ISYNC: - case TYPE_SYNC: - return 4; - default: - if (rs6000_cpu == PROCESSOR_POWER5 - && is_cracked_insn (insn)) - return 2; - return 0; - } -} - /* The function returns true if INSN is cracked into 2 instructions by the processor (and therefore occupies 2 issue slots). */ @@ -16680,6 +16924,74 @@ is_branch_slot_insn (rtx insn) return false; } +/* The function returns true if out_inst sets a value that is + used in the address generation computation of in_insn */ +static bool +set_to_load_agen (rtx out_insn, rtx in_insn) +{ + rtx out_set, in_set; + + /* For performance reasons, only handle the simple case where + both loads are a single_set. */ + out_set = single_set (out_insn); + if (out_set) + { + in_set = single_set (in_insn); + if (in_set) + return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set)); + } + + return false; +} + +/* The function returns true if the target storage location of + out_insn is adjacent to the target storage location of in_insn */ +/* Return 1 if memory locations are adjacent. */ + +static bool +adjacent_mem_locations (rtx insn1, rtx insn2) +{ + + rtx a = SET_DEST (PATTERN (insn1)); + rtx b = SET_DEST (PATTERN (insn2)); + + if ((GET_CODE (XEXP (a, 0)) == REG + || (GET_CODE (XEXP (a, 0)) == PLUS + && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT)) + && (GET_CODE (XEXP (b, 0)) == REG + || (GET_CODE (XEXP (b, 0)) == PLUS + && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT))) + { + HOST_WIDE_INT val0 = 0, val1 = 0; + rtx reg0, reg1; + int val_diff; + + if (GET_CODE (XEXP (a, 0)) == PLUS) + { + reg0 = XEXP (XEXP (a, 0), 0); + val0 = INTVAL (XEXP (XEXP (a, 0), 1)); + } + else + reg0 = XEXP (a, 0); + + if (GET_CODE (XEXP (b, 0)) == PLUS) + { + reg1 = XEXP (XEXP (b, 0), 0); + val1 = INTVAL (XEXP (XEXP (b, 0), 1)); + } + else + reg1 = XEXP (b, 0); + + val_diff = val1 - val0; + + return ((REGNO (reg0) == REGNO (reg1)) + && (val_diff == INTVAL (MEM_SIZE (a)) + || val_diff == -INTVAL (MEM_SIZE (b)))); + } + + return false; +} + /* A C statement (sans semicolon) to update the integer scheduling priority INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier, reduce the priority to execute INSN later. Do not @@ -16719,7 +17031,7 @@ rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority) } #endif - if (is_dispatch_slot_restricted (insn) + if (insn_must_be_first_in_group (insn) && reload_completed && current_sched_info->sched_max_insns_priority && rs6000_sched_restricted_insns_priority) @@ -16739,6 +17051,15 @@ rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority) return (priority + 1); } + if (rs6000_cpu == PROCESSOR_POWER6 + && ((load_store_pendulum == -2 && is_load_insn (insn)) + || (load_store_pendulum == 2 && is_store_insn (insn)))) + /* Attach highest priority to insn if the scheduler has just issued two + stores and this instruction is a load, or two loads and this instruction + is a store. Power6 wants loads and stores scheduled alternately + when possible */ + return current_sched_info->sched_max_insns_priority; + return priority; } @@ -16771,6 +17092,7 @@ rs6000_issue_rate (void) return 4; case CPU_POWER4: case CPU_POWER5: + case CPU_POWER6: return 5; default: return 1; @@ -16955,6 +17277,221 @@ get_next_active_insn (rtx insn, rtx tail) return insn; } +/* We are about to begin issuing insns for this clock cycle. */ + +static int +rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose, + rtx *ready ATTRIBUTE_UNUSED, + int *pn_ready ATTRIBUTE_UNUSED, + int clock_var ATTRIBUTE_UNUSED) +{ + if (sched_verbose) + fprintf (dump, "// rs6000_sched_reorder :\n"); + + if (rs6000_cpu == PROCESSOR_POWER6) + load_store_pendulum = 0; + + return rs6000_issue_rate (); +} + +/* Like rs6000_sched_reorder, but called after issuing each insn. */ + +static int +rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready, + int *pn_ready, int clock_var ATTRIBUTE_UNUSED) +{ + if (sched_verbose) + fprintf (dump, "// rs6000_sched_reorder2 :\n"); + + /* For Power6, we need to handle some special cases to try and keep the + store queue from overflowing and triggering expensive flushes. + + This code monitors how load and store instructions are being issued + and skews the ready list one way or the other to increase the likelihood + that a desired instruction is issued at the proper time. + + A couple of things are done. First, we maintain a "load_store_pendulum" + to track the current state of load/store issue. + + - If the pendulum is at zero, then no loads or stores have been + issued in the current cycle so we do nothing. + + - If the pendulum is 1, then a single load has been issued in this + cycle and we attempt to locate another load in the ready list to + issue with it. + + - If the pedulum is -2, then two stores have already been + issued in this cycle, so we increase the priority of the first load + in the ready list to increase it's likelihood of being chosen first + in the next cycle. + + - If the pendulum is -1, then a single store has been issued in this + cycle and we attempt to locate another store in the ready list to + issue with it, preferring a store to an adjacent memory location to + facilitate store pairing in the store queue. + + - If the pendulum is 2, then two loads have already been + issued in this cycle, so we increase the priority of the first store + in the ready list to increase it's likelihood of being chosen first + in the next cycle. + + - If the pendulum < -2 or > 2, then do nothing. + + Note: This code covers the most common scenarios. There exist non + load/store instructions which make use of the LSU and which + would need to be accounted for to strictly model the behavior + of the machine. Those instructions are currently unaccounted + for to help minimize compile time overhead of this code. + */ + if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn) + { + int pos; + int i; + rtx tmp; + + if (is_store_insn (last_scheduled_insn)) + /* Issuing a store, swing the load_store_pendulum to the left */ + load_store_pendulum--; + else if (is_load_insn (last_scheduled_insn)) + /* Issuing a load, swing the load_store_pendulum to the right */ + load_store_pendulum++; + else + return cached_can_issue_more; + + /* If the pendulum is balanced, or there is only one instruction on + the ready list, then all is well, so return. */ + if ((load_store_pendulum == 0) || (*pn_ready <= 1)) + return cached_can_issue_more; + + if (load_store_pendulum == 1) + { + /* A load has been issued in this cycle. Scan the ready list + for another load to issue with it */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_load_insn (ready[pos])) + { + /* Found a load. Move it to the head of the ready list, + and adjust it's priority so that it is more likely to + stay there */ + tmp = ready[pos]; + for (i=pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + if INSN_PRIORITY_KNOWN (tmp) + INSN_PRIORITY (tmp)++; + break; + } + pos--; + } + } + else if (load_store_pendulum == -2) + { + /* Two stores have been issued in this cycle. Increase the + priority of the first load in the ready list to favor it for + issuing in the next cycle. */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_load_insn (ready[pos]) + && INSN_PRIORITY_KNOWN (ready[pos])) + { + INSN_PRIORITY (ready[pos])++; + + /* Adjust the pendulum to account for the fact that a load + was found and increased in priority. This is to prevent + increasing the priority of multiple loads */ + load_store_pendulum--; + + break; + } + pos--; + } + } + else if (load_store_pendulum == -1) + { + /* A store has been issued in this cycle. Scan the ready list for + another store to issue with it, preferring a store to an adjacent + memory location */ + int first_store_pos = -1; + + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_store_insn (ready[pos])) + { + /* Maintain the index of the first store found on the + list */ + if (first_store_pos == -1) + first_store_pos = pos; + + if (is_store_insn (last_scheduled_insn) + && adjacent_mem_locations (last_scheduled_insn,ready[pos])) + { + /* Found an adjacent store. Move it to the head of the + ready list, and adjust it's priority so that it is + more likely to stay there */ + tmp = ready[pos]; + for (i=pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + if INSN_PRIORITY_KNOWN (tmp) + INSN_PRIORITY (tmp)++; + first_store_pos = -1; + + break; + }; + } + pos--; + } + + if (first_store_pos >= 0) + { + /* An adjacent store wasn't found, but a non-adjacent store was, + so move the non-adjacent store to the front of the ready + list, and adjust its priority so that it is more likely to + stay there. */ + tmp = ready[first_store_pos]; + for (i=first_store_pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + if INSN_PRIORITY_KNOWN (tmp) + INSN_PRIORITY (tmp)++; + } + } + else if (load_store_pendulum == 2) + { + /* Two loads have been issued in this cycle. Increase the priority + of the first store in the ready list to favor it for issuing in + the next cycle. */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_store_insn (ready[pos]) + && INSN_PRIORITY_KNOWN (ready[pos])) + { + INSN_PRIORITY (ready[pos])++; + + /* Adjust the pendulum to account for the fact that a store + was found and increased in priority. This is to prevent + increasing the priority of multiple stores */ + load_store_pendulum++; + + break; + } + pos--; + } + } + } + + return cached_can_issue_more; +} + /* Return whether the presence of INSN causes a dispatch group termination of group WHICH_GROUP. @@ -16971,32 +17508,183 @@ get_next_active_insn (rtx insn, rtx tail) static bool insn_terminates_group_p (rtx insn, enum group_termination which_group) { - enum attr_type type; + bool first, last; if (! insn) return false; - type = get_attr_type (insn); + first = insn_must_be_first_in_group (insn); + last = insn_must_be_last_in_group (insn); - if (is_microcoded_insn (insn)) + if (first && last) return true; if (which_group == current_group) - { - if (is_branch_slot_insn (insn)) - return true; - return false; - } + return last; else if (which_group == previous_group) + return first; + + return false; +} + + +static bool +insn_must_be_first_in_group (rtx insn) +{ + enum attr_type type; + + if (!insn + || insn == NULL_RTX + || GET_CODE (insn) == NOTE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + switch (rs6000_cpu) { - if (is_dispatch_slot_restricted (insn)) - return true; - return false; + case PROCESSOR_POWER5: + if (is_cracked_insn (insn)) + return true; + case PROCESSOR_POWER4: + if (is_microcoded_insn (insn)) + return true; + + if (!rs6000_sched_groups) + return false; + + type = get_attr_type (insn); + + switch (type) + { + case TYPE_MFCR: + case TYPE_MFCRF: + case TYPE_MTCR: + case TYPE_DELAYED_CR: + case TYPE_CR_LOGICAL: + case TYPE_MTJMPR: + case TYPE_MFJMPR: + case TYPE_IDIV: + case TYPE_LDIV: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_ISYNC: + case TYPE_SYNC: + return true; + default: + break; + } + break; + case PROCESSOR_POWER6: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_INSERT_DWORD: + case TYPE_EXTS: + case TYPE_CNTLZ: + case TYPE_SHIFT: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_TRAP: + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IDIV: + case TYPE_INSERT_WORD: + case TYPE_DELAYED_COMPARE: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + case TYPE_FPCOMPARE: + case TYPE_MFCR: + case TYPE_MTCR: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_LOAD_EXT_UX: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPLOAD_U: + case TYPE_FPLOAD_UX: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + return true; + default: + break; + } + break; + default: + break; } return false; } +static bool +insn_must_be_last_in_group (rtx insn) +{ + enum attr_type type; + + if (!insn + || insn == NULL_RTX + || GET_CODE (insn) == NOTE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + switch (rs6000_cpu) { + case PROCESSOR_POWER4: + case PROCESSOR_POWER5: + if (is_microcoded_insn (insn)) + return true; + + if (is_branch_slot_insn (insn)) + return true; + + break; + case PROCESSOR_POWER6: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_EXTS: + case TYPE_CNTLZ: + case TYPE_SHIFT: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_TRAP: + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IDIV: + case TYPE_DELAYED_COMPARE: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + case TYPE_FPCOMPARE: + case TYPE_MFCR: + case TYPE_MTCR: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + return true; + default: + break; + } + break; + default: + break; + } + + return false; +} + /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */ @@ -17317,6 +18005,17 @@ pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail) return group_count; } +/* We're beginning a new block. Initialize data structures as necessary. */ + +static void +rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) +{ + last_scheduled_insn = NULL_RTX; + load_store_pendulum = 0; +} + /* The following function is called at the end of scheduling BB. After reload, it inserts nops at insn group bundling. */ diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 1edb09703f6..c9856d12aca 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -75,6 +75,7 @@ %{mcpu=power5: -mpower4} \ %{mcpu=power5+: -mpower4} \ %{mcpu=power6: -mpower4 -maltivec} \ +%{mcpu=power6x: -mpower4 -maltivec} \ %{mcpu=powerpc: -mppc} \ %{mcpu=rios: -mpwr} \ %{mcpu=rios1: -mpwr} \ @@ -162,6 +163,14 @@ #define TARGET_FPRND 0 #endif +/* Define TARGET_MFPGPR if the target assembler does not support the + mffpr and mftgpr instructions. */ + +#ifndef HAVE_AS_MFPGPR +#undef TARGET_MFPGPR +#define TARGET_MFPGPR 0 +#endif + #ifndef TARGET_SECURE_PLT #define TARGET_SECURE_PLT 0 #endif @@ -212,7 +221,8 @@ enum processor_type PROCESSOR_PPC7450, PROCESSOR_PPC8540, PROCESSOR_POWER4, - PROCESSOR_POWER5 + PROCESSOR_POWER5, + PROCESSOR_POWER6 }; extern enum processor_type rs6000_cpu; @@ -1109,12 +1119,18 @@ enum reg_class rs6000_secondary_reload_class (CLASS, MODE, IN) /* If we are copying between FP or AltiVec registers and anything - else, we need a memory location. */ + else, we need a memory location. The exception is when we are + targeting ppc64 and the move to/from fpr to gpr instructions + are available.*/ -#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE) \ - ((CLASS1) != (CLASS2) && ((CLASS1) == FLOAT_REGS \ - || (CLASS2) == FLOAT_REGS \ - || (CLASS1) == ALTIVEC_REGS \ +#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE) \ + ((CLASS1) != (CLASS2) && (((CLASS1) == FLOAT_REGS \ + && (!TARGET_MFPGPR || !TARGET_POWERPC64 \ + || ((MODE != DFmode) && (MODE != DImode)))) \ + || ((CLASS2) == FLOAT_REGS \ + && (!TARGET_MFPGPR || !TARGET_POWERPC64 \ + || ((MODE != DFmode) && (MODE != DImode)))) \ + || (CLASS1) == ALTIVEC_REGS \ || (CLASS2) == ALTIVEC_REGS)) /* Return the maximum number of consecutive registers diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index db60b8ef944..8ddf26d924d 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -87,7 +87,7 @@ ;; Define an insn type attribute. This is used in function unit delay ;; computations. -(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,isync,sync,load_l,store_c" +(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr" (const_string "integer")) ;; Length (in bytes). @@ -106,7 +106,7 @@ ;; Processor type -- this attribute must exactly match the processor_type ;; enumeration in rs6000.h. -(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4,power5" +(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4,power5,power6" (const (symbol_ref "rs6000_cpu_attr"))) (automata_option "ndfa") @@ -124,6 +124,7 @@ (include "8540.md") (include "power4.md") (include "power5.md") +(include "power6.md") (include "predicates.md") (include "constraints.md") @@ -241,7 +242,8 @@ [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))] "TARGET_POWERPC64" - "extsb %0,%1") + "extsb %0,%1" + [(set_attr "type" "exts")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") @@ -308,7 +310,7 @@ "@ lha%U1%X1 %0,%1 extsh %0,%1" - [(set_attr "type" "load_ext,*")]) + [(set_attr "type" "load_ext,exts")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") @@ -375,7 +377,7 @@ "@ lwa%U1%X1 %0,%1 extsw %0,%1" - [(set_attr "type" "load_ext,*")]) + [(set_attr "type" "load_ext,exts")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") @@ -515,7 +517,8 @@ [(set (match_operand:SI 0 "gpc_reg_operand" "=r") (sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r")))] "TARGET_POWERPC" - "extsb %0,%1") + "extsb %0,%1" + [(set_attr "type" "exts")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") @@ -681,7 +684,8 @@ [(set (match_operand:HI 0 "gpc_reg_operand" "=r") (sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r")))] "TARGET_POWERPC" - "extsb %0,%1") + "extsb %0,%1" + [(set_attr "type" "exts")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") @@ -843,7 +847,7 @@ "@ lha%U1%X1 %0,%1 {exts|extsh} %0,%1" - [(set_attr "type" "load_ext,*")]) + [(set_attr "type" "load_ext,exts")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") @@ -2100,7 +2104,8 @@ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (clz:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))] "" - "{cntlz|cntlz} %0,%1") + "{cntlz|cntlz} %0,%1" + [(set_attr "type" "cntlz")]) (define_expand "ctz2" [(set (match_dup 2) @@ -2397,7 +2402,11 @@ (match_operand:GPR 2 "gpc_reg_operand" "r")))] "TARGET_POWERPC && ! TARGET_POWER" "divu %0,%1,%2" - [(set_attr "type" "idiv")]) + [(set (attr "type") + (cond [(match_operand:SI 0 "" "") + (const_string "idiv")] + (const_string "ldiv")))]) + ;; For powers of two we can do srai/aze for divide and then adjust for ;; modulus. If it isn't a power of two, FAIL on POWER so divmodsi4 will be @@ -2450,7 +2459,10 @@ (match_operand:GPR 2 "gpc_reg_operand" "r")))] "TARGET_POWERPC && ! TARGET_POWER" "div %0,%1,%2" - [(set_attr "type" "idiv")]) + [(set (attr "type") + (cond [(match_operand:SI 0 "" "") + (const_string "idiv")] + (const_string "ldiv")))]) (define_expand "mod3" [(use (match_operand:GPR 0 "gpc_reg_operand" "")) @@ -3549,7 +3561,8 @@ operands[1] = GEN_INT (64 - start - size); return \"rldimi %0,%3,%H1,%H2\"; -}") +}" + [(set_attr "type" "insert_dword")]) (define_insn "*insvdi_internal2" [(set (zero_extract:DI (match_operand:DI 0 "gpc_reg_operand" "+r") @@ -3670,7 +3683,7 @@ operands[3] = GEN_INT (start + size); return \"{rlinm.|rlwinm.} %4,%1,%3,%s2,31\"; }" - [(set_attr "type" "compare") + [(set_attr "type" "delayed_compare") (set_attr "length" "4,8")]) (define_split @@ -3721,7 +3734,7 @@ operands[3] = GEN_INT (start + size); return \"{rlinm.|rlwinm.} %0,%1,%3,%s2,31\"; }" - [(set_attr "type" "compare") + [(set_attr "type" "delayed_compare") (set_attr "length" "4,8")]) (define_split @@ -3805,24 +3818,29 @@ [(set_attr "type" "compare")]) (define_insn "rotlsi3" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "reg_or_cint_operand" "ri")))] - "" - "{rl%I2nm|rlw%I2nm} %0,%1,%h2,0xffffffff") - -(define_insn "*rotlsi3_internal2" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") - (compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) - (const_int 0))) - (clobber (match_scratch:SI 3 "=r,r"))] + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i")))] "" "@ - {rl%I2nm.|rlw%I2nm.} %3,%1,%h2,0xffffffff + {rlnm|rlwnm} %0,%1,%2,0xffffffff + {rlinm|rlwinm} %0,%1,%h2,0xffffffff" + [(set_attr "type" "var_shift_rotate,integer")]) + +(define_insn "*rotlsi3_internal2" + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) + (const_int 0))) + (clobber (match_scratch:SI 3 "=r,r,r,r"))] + "" + "@ + {rlnm.|rlwnm.} %3,%1,%2,0xffffffff + {rlinm.|rlwinm.} %3,%1,%h2,0xffffffff + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -3839,18 +3857,20 @@ "") (define_insn "*rotlsi3_internal3" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") - (compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r") (rotate:SI (match_dup 1) (match_dup 2)))] "" "@ - {rl%I2nm.|rlw%I2nm.} %0,%1,%h2,0xffffffff + {rlnm.|rlwnm.} %0,%1,%2,0xffffffff + {rlinm.|rlwinm.} %0,%1,%h2,0xffffffff + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -3868,27 +3888,32 @@ "") (define_insn "*rotlsi3_internal4" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (and:SI (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "reg_or_cint_operand" "ri")) - (match_operand:SI 3 "mask_operand" "n")))] - "" - "{rl%I2nm|rlw%I2nm} %0,%1,%h2,%m3,%M3") - -(define_insn "*rotlsi3_internal5" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") - (compare:CC (and:SI - (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) - (match_operand:SI 3 "mask_operand" "n,n")) - (const_int 0))) - (clobber (match_scratch:SI 4 "=r,r"))] + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (and:SI (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i")) + (match_operand:SI 3 "mask_operand" "n,n")))] "" "@ - {rl%I2nm.|rlw%I2nm.} %4,%1,%h2,%m3,%M3 + {rlnm|rlwnm} %0,%1,%2,%m3,%M3 + {rlinm|rlwinm} %0,%1,%h2,%m3,%M3" + [(set_attr "type" "var_shift_rotate,integer")]) + +(define_insn "*rotlsi3_internal5" + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (and:SI + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) + (match_operand:SI 3 "mask_operand" "n,n,n,n")) + (const_int 0))) + (clobber (match_scratch:SI 4 "=r,r,r,r"))] + "" + "@ + {rlnm.|rlwnm.} %4,%1,%2,%m3,%M3 + {rlinm.|rlwinm.} %4,%1,%h2,%m3,%M3 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -3909,20 +3934,22 @@ "") (define_insn "*rotlsi3_internal6" - [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (and:SI - (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) - (match_operand:SI 3 "mask_operand" "n,n")) + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) + (match_operand:SI 3 "mask_operand" "n,n,n,n")) (const_int 0))) - (set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r") (and:SI (rotate:SI (match_dup 1) (match_dup 2)) (match_dup 3)))] "" "@ - {rl%I2nm.|rlw%I2nm.} %0,%1,%h2,%m3,%M3 + {rlnm.|rlwnm.} %0,%1,%2,%m3,%M3 + {rlinm.|rlwinm.} %0,%1,%h2,%m3,%M3 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "") @@ -3951,19 +3978,21 @@ "{rl%I2nm|rlw%I2nm} %0,%1,%h2,0xff") (define_insn "*rotlsi3_internal8" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (zero_extend:SI (subreg:QI - (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) 0)) + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0)) (const_int 0))) - (clobber (match_scratch:SI 3 "=r,r"))] + (clobber (match_scratch:SI 3 "=r,r,r,r"))] "" "@ - {rl%I2nm.|rlw%I2nm.} %3,%1,%h2,0xff + {rlnm.|rlwnm.} %3,%1,%2,0xff + {rlinm.|rlwinm.} %3,%1,%h2,0xff + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -3984,20 +4013,22 @@ "") (define_insn "*rotlsi3_internal9" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (zero_extend:SI (subreg:QI - (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) 0)) + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0)) (const_int 0))) - (set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r") (zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))] "" "@ - {rl%I2nm.|rlw%I2nm.} %0,%1,%h2,0xff + {rlnm.|rlwnm.} %0,%1,%2,0xff + {rlinm.|rlwinm.} %0,%1,%h2,0xff + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -4017,28 +4048,34 @@ "") (define_insn "*rotlsi3_internal10" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r") (zero_extend:SI (subreg:HI - (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "reg_or_cint_operand" "ri")) 0)))] - "" - "{rl%I2nm|rlw%I2nm} %0,%1,%h2,0xffff") - -(define_insn "*rotlsi3_internal11" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") - (compare:CC (zero_extend:SI - (subreg:HI - (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) 0)) - (const_int 0))) - (clobber (match_scratch:SI 3 "=r,r"))] + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i")) 0)))] "" "@ - {rl%I2nm.|rlw%I2nm.} %3,%1,%h2,0xffff + {rlnm|rlwnm} %0,%1,%2,0xffff + {rlinm|rlwinm} %0,%1,%h2,0xffff" + [(set_attr "type" "var_shift_rotate,integer")]) + + +(define_insn "*rotlsi3_internal11" + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (zero_extend:SI + (subreg:HI + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0)) + (const_int 0))) + (clobber (match_scratch:SI 3 "=r,r,r,r"))] + "" + "@ + {rlnm.|rlwnm.} %3,%1,%2,0xffff + {rlinm.|rlwinm.} %3,%1,%h2,0xffff + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -4059,20 +4096,22 @@ "") (define_insn "*rotlsi3_internal12" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (zero_extend:SI (subreg:HI - (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) 0)) + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0)) (const_int 0))) - (set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r") (zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 0)))] "" "@ - {rl%I2nm.|rlw%I2nm.} %0,%1,%h2,0xffff + {rlnm.|rlwnm.} %0,%1,%2,0xffff + {rlinm.|rlwinm.} %0,%1,%h2,0xffff + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -4119,11 +4158,14 @@ {sli|slwi} %0,%1,%h2") (define_insn "ashlsi3_no_power" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "reg_or_cint_operand" "ri")))] + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i")))] "! TARGET_POWER" - "{sl|slw}%I2 %0,%1,%h2") + "@ + {sl|slw} %0,%1,%2 + {sli|slwi} %0,%1,%h2" + [(set_attr "type" "var_shift_rotate,shift")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") @@ -4158,17 +4200,19 @@ "") (define_insn "" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") - (compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (clobber (match_scratch:SI 3 "=r,r"))] + (clobber (match_scratch:SI 3 "=r,r,r,r"))] "! TARGET_POWER && TARGET_32BIT" "@ - {sl|slw}%I2. %3,%1,%h2 + {sl.|slw.} %3,%1,%2 + {sli.|slwi.} %3,%1,%h2 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -4219,18 +4263,20 @@ "") (define_insn "" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") - (compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r") (ashift:SI (match_dup 1) (match_dup 2)))] "! TARGET_POWER && TARGET_32BIT" "@ - {sl|slw}%I2. %0,%1,%h2 + {sl.|slw.} %0,%1,%2 + {sli.|slwi.} %0,%1,%h2 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -4348,13 +4394,15 @@ {s%A2i|s%A2wi} %0,%1,%h2") (define_insn "lshrsi3_no_power" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r") - (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "O,ri")))] + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r") + (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "O,r,i")))] "! TARGET_POWER" "@ mr %0,%1 - {sr|srw}%I2 %0,%1,%h2") + {sr|srw} %0,%1,%2 + {sri|srwi} %0,%1,%h2" + [(set_attr "type" "integer,var_shift_rotate,shift")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,?y,?y,?y") @@ -4391,19 +4439,21 @@ "") (define_insn "" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") - (compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") - (match_operand:SI 2 "reg_or_cint_operand" "O,ri,O,ri")) + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,?y,?y,?y") + (compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "O,r,i,O,r,i")) (const_int 0))) - (clobber (match_scratch:SI 3 "=X,r,X,r"))] + (clobber (match_scratch:SI 3 "=X,r,r,X,r,r"))] "! TARGET_POWER && TARGET_32BIT" "@ mr. %1,%1 - {sr|srw}%I2. %3,%1,%h2 + {sr.|srw.} %3,%1,%2 + {sri.|srwi.} %3,%1,%h2 + # # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,4,8,8")]) + [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,4,8,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -4456,20 +4506,22 @@ "") (define_insn "" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") - (compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") - (match_operand:SI 2 "reg_or_cint_operand" "O,ri,O,ri")) + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,x,?y,?y,?y") + (compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "O,r,i,O,r,i")) (const_int 0))) - (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r") + (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r,r,r") (lshiftrt:SI (match_dup 1) (match_dup 2)))] "! TARGET_POWER && TARGET_32BIT" "@ mr. %0,%1 - {sr|srw}%I2. %0,%1,%h2 + {sr.|srw.} %0,%1,%2 + {sri.|srwi.} %0,%1,%h2 + # # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,4,8,8")]) + [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,4,8,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -4767,14 +4819,18 @@ "TARGET_POWER" "@ srea %0,%1,%2 - {srai|srawi} %0,%1,%h2") + {srai|srawi} %0,%1,%h2" + [(set_attr "type" "shift")]) (define_insn "ashrsi3_no_power" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "reg_or_cint_operand" "ri")))] + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i")))] "! TARGET_POWER" - "{sra|sraw}%I2 %0,%1,%h2") + "@ + {sra|sraw} %0,%1,%2 + {srai|srawi} %0,%1,%h2" + [(set_attr "type" "var_shift_rotate,shift")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") @@ -4809,17 +4865,19 @@ "") (define_insn "" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") - (compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (clobber (match_scratch:SI 3 "=r,r"))] + (clobber (match_scratch:SI 3 "=r,r,r,r"))] "! TARGET_POWER" "@ - {sra|sraw}%I2. %3,%1,%h2 + {sra.|sraw.} %3,%1,%2 + {srai.|srawi.} %3,%1,%h2 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -4870,18 +4928,20 @@ "") (define_insn "" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") - (compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r") (ashiftrt:SI (match_dup 1) (match_dup 2)))] "! TARGET_POWER" "@ - {sra|sraw}%I2. %0,%1,%h2 + {sra.|sraw.} %0,%1,%2 + {srai.|srawi.} %0,%1,%h2 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -5657,6 +5717,12 @@ emit_insn (gen_spe_floatsidf2 (operands[0], operands[1])); DONE; } + if (TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS) + { + rtx t1 = gen_reg_rtx (DImode); + emit_insn (gen_floatsidf_ppc64_mfpgpr (operands[0], operands[1], t1)); + DONE; + } if (TARGET_POWERPC64) { rtx mem = assign_stack_temp (DImode, GET_MODE_SIZE (DImode), 0); @@ -5790,6 +5856,14 @@ DONE; } operands[2] = gen_reg_rtx (DImode); + if (TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS + && gpc_reg_operand(operands[0], GET_MODE (operands[0]))) + { + operands[3] = gen_reg_rtx (DImode); + emit_insn (gen_fix_truncdfsi2_mfpgpr (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } if (TARGET_PPC_GFXOPT) { rtx orig_dest = operands[0]; @@ -5843,6 +5917,20 @@ }" [(set_attr "length" "16")]) +(define_insn_and_split "fix_truncdfsi2_mfpgpr" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (fix:SI (match_operand:DF 1 "gpc_reg_operand" "f"))) + (clobber (match_operand:DI 2 "gpc_reg_operand" "=f")) + (clobber (match_operand:DI 3 "gpc_reg_operand" "=r"))] + "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS" + "#" + "&& 1" + [(set (match_dup 2) (unspec:DI [(fix:SI (match_dup 1))] UNSPEC_FCTIWZ)) + (set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (subreg:SI (match_dup 3) 4))] + "" + [(set_attr "length" "12")]) + ; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ)) ; rather than (set (subreg:SI (reg)) (fix:SI ...)) ; because the first makes it clear that operand 0 is not live @@ -5933,13 +6021,24 @@ "fcfid %0,%1" [(set_attr "type" "fp")]) +(define_insn_and_split "floatsidf_ppc64_mfpgpr" + [(set (match_operand:DF 0 "gpc_reg_operand" "=f") + (float:DF (match_operand:SI 1 "gpc_reg_operand" "r"))) + (clobber (match_operand:DI 2 "gpc_reg_operand" "=r"))] + "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS" + "#" + "&& 1" + [(set (match_dup 2) (sign_extend:DI (match_dup 1))) + (set (match_dup 0) (float:DF (match_dup 2)))] + "") + (define_insn_and_split "floatsidf_ppc64" [(set (match_operand:DF 0 "gpc_reg_operand" "=f") (float:DF (match_operand:SI 1 "gpc_reg_operand" "r"))) (clobber (match_operand:DI 2 "memory_operand" "=o")) (clobber (match_operand:DI 3 "gpc_reg_operand" "=r")) (clobber (match_operand:DI 4 "gpc_reg_operand" "=f"))] - "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS" + "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS" "#" "&& 1" [(set (match_dup 3) (sign_extend:DI (match_dup 1))) @@ -6364,7 +6463,8 @@ "@ {srai|srawi} %0,%1,31\;{srai|srawi} %L0,%1,%h2 sraiq %0,%1,%h2\;srliq %L0,%L1,%h2" - [(set_attr "length" "8")]) + [(set_attr "type" "shift") + (set_attr "length" "8")]) (define_insn "ashrdi3_no_power" [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,&r") @@ -6514,24 +6614,29 @@ [(set_attr "type" "lmul")]) (define_insn "rotldi3" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r") - (match_operand:DI 2 "reg_or_cint_operand" "ri")))] + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i")))] "TARGET_POWERPC64" - "rld%I2cl %0,%1,%H2,0") + "@ + rldcl %0,%1,%2,0 + rldicl %0,%1,%H2,0" + [(set_attr "type" "var_shift_rotate,integer")]) (define_insn "*rotldi3_internal2" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") - (compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:DI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (clobber (match_scratch:DI 3 "=r,r"))] + (clobber (match_scratch:DI 3 "=r,r,r,r"))] "TARGET_64BIT" "@ - rld%I2cl. %3,%1,%H2,0 + rldcl. %3,%1,%2,0 + rldicl. %3,%1,%H2,0 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -6548,18 +6653,20 @@ "") (define_insn "*rotldi3_internal3" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") - (compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:DI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r") (rotate:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT" "@ - rld%I2cl. %0,%1,%H2,0 + rldcl. %0,%1,%2,0 + rldicl. %0,%1,%H2,0 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -6577,27 +6684,32 @@ "") (define_insn "*rotldi3_internal4" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") - (and:DI (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r") - (match_operand:DI 2 "reg_or_cint_operand" "ri")) - (match_operand:DI 3 "mask64_operand" "n")))] + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (and:DI (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i")) + (match_operand:DI 3 "mask64_operand" "n,n")))] "TARGET_POWERPC64" - "rld%I2c%B3 %0,%1,%H2,%S3") + "@ + rldc%B3 %0,%1,%2,%S3 + rldic%B3 %0,%1,%H2,%S3" + [(set_attr "type" "var_shift_rotate,integer")]) (define_insn "*rotldi3_internal5" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (and:DI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:DI 2 "reg_or_cint_operand" "ri,ri")) - (match_operand:DI 3 "mask64_operand" "n,n")) + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) + (match_operand:DI 3 "mask64_operand" "n,n,n,n")) (const_int 0))) - (clobber (match_scratch:DI 4 "=r,r"))] + (clobber (match_scratch:DI 4 "=r,r,r,r"))] "TARGET_64BIT" "@ - rld%I2c%B3. %4,%1,%H2,%S3 + rldc%B3. %4,%1,%2,%S3 + rldic%B3. %4,%1,%H2,%S3 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -6618,20 +6730,22 @@ "") (define_insn "*rotldi3_internal6" - [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (and:DI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:DI 2 "reg_or_cint_operand" "ri,ri")) - (match_operand:DI 3 "mask64_operand" "n,n")) + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) + (match_operand:DI 3 "mask64_operand" "n,n,n,n")) (const_int 0))) - (set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r") (and:DI (rotate:DI (match_dup 1) (match_dup 2)) (match_dup 3)))] "TARGET_64BIT" "@ - rld%I2c%B3. %0,%1,%H2,%S3 + rldc%B3. %0,%1,%2,%S3 + rldic%B3. %0,%1,%H2,%S3 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "") @@ -6651,28 +6765,33 @@ "") (define_insn "*rotldi3_internal7" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (zero_extend:DI (subreg:QI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r") - (match_operand:DI 2 "reg_or_cint_operand" "ri")) 0)))] + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))] "TARGET_POWERPC64" - "rld%I2cl %0,%1,%H2,56") + "@ + rldcl %0,%1,%2,56 + rldicl %0,%1,%H2,56" + [(set_attr "type" "var_shift_rotate,integer")]) (define_insn "*rotldi3_internal8" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (zero_extend:DI (subreg:QI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:DI 2 "reg_or_cint_operand" "ri,ri")) 0)) + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0)) (const_int 0))) - (clobber (match_scratch:DI 3 "=r,r"))] + (clobber (match_scratch:DI 3 "=r,r,r,r"))] "TARGET_64BIT" "@ - rld%I2cl. %3,%1,%H2,56 + rldcl. %3,%1,%2,56 + rldicl. %3,%1,%H2,56 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -6693,20 +6812,22 @@ "") (define_insn "*rotldi3_internal9" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (zero_extend:DI (subreg:QI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:DI 2 "reg_or_cint_operand" "ri,ri")) 0)) + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0)) (const_int 0))) - (set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r") (zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 0)))] "TARGET_64BIT" "@ - rld%I2cl. %0,%1,%H2,56 + rldcl. %0,%1,%2,56 + rldicl. %0,%1,%H2,56 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -6726,28 +6847,33 @@ "") (define_insn "*rotldi3_internal10" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (zero_extend:DI (subreg:HI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r") - (match_operand:DI 2 "reg_or_cint_operand" "ri")) 0)))] + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))] "TARGET_POWERPC64" - "rld%I2cl %0,%1,%H2,48") + "@ + rldcl %0,%1,%2,48 + rldicl %0,%1,%H2,48" + [(set_attr "type" "var_shift_rotate,integer")]) (define_insn "*rotldi3_internal11" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (zero_extend:DI (subreg:HI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:DI 2 "reg_or_cint_operand" "ri,ri")) 0)) + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0)) (const_int 0))) - (clobber (match_scratch:DI 3 "=r,r"))] + (clobber (match_scratch:DI 3 "=r,r,r,r"))] "TARGET_64BIT" "@ - rld%I2cl. %3,%1,%H2,48 + rldcl. %3,%1,%2,48 + rldicl. %3,%1,%H2,48 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -6768,20 +6894,22 @@ "") (define_insn "*rotldi3_internal12" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (zero_extend:DI (subreg:HI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:DI 2 "reg_or_cint_operand" "ri,ri")) 0)) + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0)) (const_int 0))) - (set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r") (zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 0)))] "TARGET_64BIT" "@ - rld%I2cl. %0,%1,%H2,48 + rldcl. %0,%1,%2,48 + rldicl. %0,%1,%H2,48 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -6801,28 +6929,33 @@ "") (define_insn "*rotldi3_internal13" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (zero_extend:DI (subreg:SI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r") - (match_operand:DI 2 "reg_or_cint_operand" "ri")) 0)))] + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))] "TARGET_POWERPC64" - "rld%I2cl %0,%1,%H2,32") + "@ + rldcl %0,%1,%2,32 + rldicl %0,%1,%H2,32" + [(set_attr "type" "var_shift_rotate,integer")]) (define_insn "*rotldi3_internal14" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (zero_extend:DI (subreg:SI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:DI 2 "reg_or_cint_operand" "ri,ri")) 0)) + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0)) (const_int 0))) - (clobber (match_scratch:DI 3 "=r,r"))] + (clobber (match_scratch:DI 3 "=r,r,r,r"))] "TARGET_64BIT" "@ - rld%I2cl. %3,%1,%H2,32 + rldcl. %3,%1,%2,32 + rldicl. %3,%1,%H2,32 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -6843,20 +6976,22 @@ "") (define_insn "*rotldi3_internal15" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") (compare:CC (zero_extend:DI (subreg:SI - (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:DI 2 "reg_or_cint_operand" "ri,ri")) 0)) + (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0)) (const_int 0))) - (set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r") (zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 0)))] "TARGET_64BIT" "@ - rld%I2cl. %0,%1,%H2,32 + rldcl. %0,%1,%2,32 + rldicl. %0,%1,%H2,32 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -6894,24 +7029,29 @@ }") (define_insn "*ashldi3_internal1" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") - (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "reg_or_cint_operand" "ri")))] + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i")))] "TARGET_POWERPC64" - "sld%I2 %0,%1,%H2") + "@ + sld %0,%1,%2 + sldi %0,%1,%H2" + [(set_attr "type" "var_shift_rotate,shift")]) (define_insn "*ashldi3_internal2" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") - (compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (clobber (match_scratch:DI 3 "=r,r"))] + (clobber (match_scratch:DI 3 "=r,r,r,r"))] "TARGET_64BIT" "@ - sld%I2. %3,%1,%H2 + sld. %3,%1,%2 + sldi. %3,%1,%H2 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -6928,18 +7068,20 @@ "") (define_insn "*ashldi3_internal3" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") - (compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r") (ashift:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT" "@ - sld%I2. %0,%1,%H2 + sld. %0,%1,%2 + sldi. %0,%1,%H2 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -7127,24 +7269,29 @@ }") (define_insn "*lshrdi3_internal1" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") - (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "reg_or_cint_operand" "ri")))] + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i")))] "TARGET_POWERPC64" - "srd%I2 %0,%1,%H2") + "@ + srd %0,%1,%2 + srdi %0,%1,%H2" + [(set_attr "type" "var_shift_rotate,shift")]) (define_insn "*lshrdi3_internal2" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") - (compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (clobber (match_scratch:DI 3 "=r,r"))] + (clobber (match_scratch:DI 3 "=r,r,r,r"))] "TARGET_64BIT " "@ - srd%I2. %3,%1,%H2 + srd. %3,%1,%2 + srdi. %3,%1,%H2 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -7161,18 +7308,20 @@ "") (define_insn "*lshrdi3_internal3" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") - (compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r") (lshiftrt:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT" "@ - srd%I2. %0,%1,%H2 + srd. %0,%1,%2 + srdi. %0,%1,%H2 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -7214,24 +7363,29 @@ }") (define_insn "*ashrdi3_internal1" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") - (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "reg_or_cint_operand" "ri")))] + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i")))] "TARGET_POWERPC64" - "srad%I2 %0,%1,%H2") + "@ + srad %0,%1,%2 + sradi %0,%1,%H2" + [(set_attr "type" "var_shift_rotate,shift")]) (define_insn "*ashrdi3_internal2" - [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") - (compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (clobber (match_scratch:DI 3 "=r,r"))] + (clobber (match_scratch:DI 3 "=r,r,r,r"))] "TARGET_64BIT" "@ - srad%I2. %3,%1,%H2 + srad. %3,%1,%2 + sradi. %3,%1,%H2 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") @@ -7248,18 +7402,20 @@ "") (define_insn "*ashrdi3_internal3" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") - (compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r") - (match_operand:SI 2 "reg_or_cint_operand" "ri,ri")) + [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y") + (compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r") + (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) (const_int 0))) - (set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r") (ashiftrt:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT" "@ - srad%I2. %0,%1,%H2 + srad. %0,%1,%2 + sradi. %0,%1,%H2 + # #" - [(set_attr "type" "delayed_compare") - (set_attr "length" "4,8")]) + [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + (set_attr "length" "4,4,8,8")]) (define_split [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "") @@ -7334,7 +7490,7 @@ # # #" - [(set_attr "type" "compare,compare,compare,compare,compare,compare,compare,compare,compare,compare,compare,compare") + [(set_attr "type" "compare,compare,delayed_compare,compare,compare,compare,compare,compare,compare,compare,compare,compare") (set_attr "length" "4,4,4,4,4,8,8,8,8,8,8,12")]) (define_split @@ -7385,7 +7541,7 @@ # # #" - [(set_attr "type" "compare,compare,compare,compare,compare,compare,compare,compare,compare,compare,compare,compare") + [(set_attr "type" "compare,compare,delayed_compare,compare,compare,compare,compare,compare,compare,compare,compare,compare") (set_attr "length" "4,4,4,4,4,8,8,8,8,8,8,12")]) (define_split @@ -8269,12 +8425,38 @@ [(set_attr "type" "two,load,store,*,*,*") (set_attr "length" "8,8,8,8,12,16")]) +; ld/std require word-aligned displacements -> 'Y' constraint. +; List Y->r and r->Y before r->r for reload. +(define_insn "*movdf_hardfloat64_mfpgpr" + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,f,f,m,*c*l,!r,*h,!r,!r,!r,r,f") + (match_operand:DF 1 "input_operand" "r,Y,r,f,m,f,r,h,0,G,H,F,f,r"))] + "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS + && (gpc_reg_operand (operands[0], DFmode) + || gpc_reg_operand (operands[1], DFmode))" + "@ + std%U0%X0 %1,%0 + ld%U1%X1 %0,%1 + mr %0,%1 + fmr %0,%1 + lfd%U1%X1 %0,%1 + stfd%U0%X0 %1,%0 + mt%0 %1 + mf%1 %0 + {cror 0,0,0|nop} + # + # + # + mftgpr %0,%1 + mffgpr %0,%1" + [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr") + (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16,4,4")]) + ; ld/std require word-aligned displacements -> 'Y' constraint. ; List Y->r and r->Y before r->r for reload. (define_insn "*movdf_hardfloat64" [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,f,f,m,*c*l,!r,*h,!r,!r,!r") (match_operand:DF 1 "input_operand" "r,Y,r,f,m,f,r,h,0,G,H,F"))] - "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS + "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS && (gpc_reg_operand (operands[0], DFmode) || gpc_reg_operand (operands[1], DFmode))" "@ @@ -8590,10 +8772,35 @@ [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) +(define_insn "*movdi_mfpgpr" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m,r,r,r,r,*f,*f,m,r,*h,*h,r,*f") + (match_operand:DI 1 "input_operand" "r,m,r,I,L,nF,R,f,m,f,*h,r,0,*f,r"))] + "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS + && (gpc_reg_operand (operands[0], DImode) + || gpc_reg_operand (operands[1], DImode))" + "@ + mr %0,%1 + ld%U1%X1 %0,%1 + std%U0%X0 %1,%0 + li %0,%1 + lis %0,%v1 + # + {cal|la} %0,%a1 + fmr %0,%1 + lfd%U1%X1 %0,%1 + stfd%U0%X0 %1,%0 + mf%1 %0 + mt%0 %1 + {cror 0,0,0|nop} + mftgpr %0,%1 + mffgpr %0,%1" + [(set_attr "type" "*,load,store,*,*,*,*,fp,fpload,fpstore,mfjmpr,mtjmpr,*,mftgpr,mffgpr") + (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4")]) + (define_insn "*movdi_internal64" [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m,r,r,r,r,*f,*f,m,r,*h,*h") (match_operand:DI 1 "input_operand" "r,m,r,I,L,nF,R,f,m,f,*h,r,0"))] - "TARGET_POWERPC64 + "TARGET_POWERPC64 && (!TARGET_MFPGPR || !TARGET_HARD_FLOAT || !TARGET_FPRS) && (gpc_reg_operand (operands[0], DImode) || gpc_reg_operand (operands[1], DImode))" "@ @@ -13971,7 +14178,8 @@ (define_insn "trap" [(trap_if (const_int 1) (const_int 0))] "" - "{t 31,0,0|trap}") + "{t 31,0,0|trap}" + [(set_attr "type" "trap")]) (define_expand "conditional_trap" [(trap_if (match_operator 0 "trap_comparison_operator" @@ -13988,7 +14196,8 @@ (match_operand:GPR 2 "reg_or_short_operand" "rI")]) (const_int 0))] "" - "{t|t}%V0%I2 %1,%2") + "{t|t}%V0%I2 %1,%2" + [(set_attr "type" "trap")]) ;; Insns related to generating the function prologue and epilogue. diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 8ff390d6edc..bbf6235b8e3 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -64,6 +64,10 @@ mfprnd Target Report Mask(FPRND) Use PowerPC V2.02 floating point rounding instructions +mmfpgpr +Target Report Mask(MFPGPR) +Use extended PowerPC V2.05 move floating point to/from GPR instructions + maltivec Target Report Mask(ALTIVEC) Use AltiVec instructions diff --git a/gcc/config/rs6000/rs64.md b/gcc/config/rs6000/rs64.md index 71ec61de59b..3af39bf75ff 100644 --- a/gcc/config/rs6000/rs64.md +++ b/gcc/config/rs6000/rs64.md @@ -47,7 +47,8 @@ "lsu_rs64") (define_insn_reservation "rs64a-integer" 1 - (and (eq_attr "type" "integer,insert_word") + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\ + var_shift_rotate,cntlz,exts") (eq_attr "cpu" "rs64a")) "iu_rs64") @@ -92,7 +93,8 @@ "mciu_rs64*66") (define_insn_reservation "rs64a-compare" 3 - (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare") + (and (eq_attr "type" "cmp,fast_compare,compare,\ + delayed_compare,var_delayed_compare") (eq_attr "cpu" "rs64a")) "iu_rs64,nothing,bpu_rs64") diff --git a/gcc/configure b/gcc/configure index 49535d590f7..2e2b0bc1b75 100755 --- a/gcc/configure +++ b/gcc/configure @@ -15510,6 +15510,52 @@ cat >>confdefs.h <<\_ACEOF #define HAVE_AS_FPRND 1 _ACEOF +fi + + case $target in + *-*-aix*) conftest_s=' .machine "pwr6" + .csect .text[PR] + mffgpr 1,3';; + *) conftest_s=' .machine power6 + .text + mffgpr 1,3';; + esac + + echo "$as_me:$LINENO: checking assembler for move fp gpr support" >&5 +echo $ECHO_N "checking assembler for move fp gpr support... $ECHO_C" >&6 +if test "${gcc_cv_as_powerpc_mfpgpr+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + gcc_cv_as_powerpc_mfpgpr=no + if test $in_tree_gas = yes; then + if test $gcc_cv_gas_vers -ge `expr \( \( 9 \* 1000 \) + 99 \) \* 1000 + 0` + then gcc_cv_as_powerpc_mfpgpr=yes +fi + elif test x$gcc_cv_as != x; then + echo "$conftest_s" > conftest.s + if { ac_try='$gcc_cv_as -o conftest.o conftest.s >&5' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } + then + gcc_cv_as_powerpc_mfpgpr=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +echo "$as_me:$LINENO: result: $gcc_cv_as_powerpc_mfpgpr" >&5 +echo "${ECHO_T}$gcc_cv_as_powerpc_mfpgpr" >&6 +if test $gcc_cv_as_powerpc_mfpgpr = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_AS_MFPGPR 1 +_ACEOF + fi case $target in diff --git a/gcc/configure.ac b/gcc/configure.ac index 7b571385ce4..fca9fedaf07 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -2933,6 +2933,21 @@ foo: nop [AC_DEFINE(HAVE_AS_FPRND, 1, [Define if your assembler supports fprnd.])]) + case $target in + *-*-aix*) conftest_s=' .machine "pwr6" + .csect .text[[PR]] + mffgpr 1,3';; + *) conftest_s=' .machine power6 + .text + mffgpr 1,3';; + esac + + gcc_GAS_CHECK_FEATURE([move fp gpr support], + gcc_cv_as_powerpc_mfpgpr, [9,99,0],, + [$conftest_s],, + [AC_DEFINE(HAVE_AS_MFPGPR, 1, + [Define if your assembler supports mffgpr and mftgpr.])]) + case $target in *-*-aix*) conftest_s=' .csect .text[[PR]] LCF..0: diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c62c02c31cd..bb9ce267a72 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -656,6 +656,7 @@ See RS/6000 and PowerPC Options. -mpowerpc-gpopt -mno-powerpc-gpopt @gol -mpowerpc-gfxopt -mno-powerpc-gfxopt @gol -mmfcrf -mno-mfcrf -mpopcntb -mno-popcntb -mfprnd -mno-fprnd @gol +-mmfpgpr -mno-mfpgpr @gol -mnew-mnemonics -mold-mnemonics @gol -mfull-toc -mminimal-toc -mno-fp-in-toc -mno-sum-in-toc @gol -m64 -m32 -mxl-compat -mno-xl-compat -mpe @gol @@ -11253,6 +11254,8 @@ These @samp{-m} options are defined for the IBM RS/6000 and PowerPC: @itemx -mno-popcntb @itemx -mfprnd @itemx -mno-fprnd +@itemx -mmfpgpr +@itemx -mno-mfpgpr @opindex mpower @opindex mno-power @opindex mpower2 @@ -11271,6 +11274,8 @@ These @samp{-m} options are defined for the IBM RS/6000 and PowerPC: @opindex mno-popcntb @opindex mfprnd @opindex mno-fprnd +@opindex mmfpgpr +@opindex mno-mfpgpr GCC supports two related instruction set architectures for the RS/6000 and PowerPC@. The @dfn{POWER} instruction set are those instructions supported by the @samp{rios} chip set used in the original @@ -11315,6 +11320,10 @@ architecture. The @option{-mfprnd} option allows GCC to generate the FP round to integer instructions implemented on the POWER5+ processor and other processors that support the PowerPC V2.03 architecture. +The @option{-mmfpgpr} option allows GCC to generate the FP move to/from +general purpose register instructions implemented on the POWER6X +processor and other processors that support the extended PowerPC V2.05 +architecture. The @option{-mpowerpc64} option allows GCC to generate the additional 64-bit instructions that are found in the full PowerPC64 architecture @@ -11357,7 +11366,7 @@ Supported values for @var{cpu_type} are @samp{401}, @samp{403}, @samp{860}, @samp{970}, @samp{8540}, @samp{ec603e}, @samp{G3}, @samp{G4}, @samp{G5}, @samp{power}, @samp{power2}, @samp{power3}, @samp{power4}, @samp{power5}, @samp{power5+}, @samp{power6}, -@samp{common}, @samp{powerpc}, @samp{powerpc64}, +@samp{power6x}, @samp{common}, @samp{powerpc}, @samp{powerpc64}, @samp{rios}, @samp{rios1}, @samp{rios2}, @samp{rsc}, and @samp{rs64}. @option{-mcpu=common} selects a completely generic processor. Code @@ -11381,14 +11390,14 @@ following options: @option{-maltivec}, @option{-mfprnd}, @option{-mhard-float}, @option{-mmfcrf}, @option{-mmultiple}, @option{-mnew-mnemonics}, @option{-mpopcntb}, @option{-mpower}, @option{-mpower2}, @option{-mpowerpc64}, @option{-mpowerpc-gpopt}, -@option{-mpowerpc-gfxopt}, @option{-mstring}, @option{-mmulhw}, @option{-mdlmzb}. -The particular options -set for any particular CPU will vary between compiler versions, -depending on what setting seems to produce optimal code for that CPU; -it doesn't necessarily reflect the actual hardware's capabilities. If -you wish to set an individual option to a particular value, you may -specify it after the @option{-mcpu} option, like @samp{-mcpu=970 --mno-altivec}. +@option{-mpowerpc-gfxopt}, @option{-mstring}, @option{-mmulhw}, +@option{-mdlmzb}, @option{-mmfpgpr}. +The particular options set for any particular CPU will vary between +compiler versions, depending on what setting seems to produce optimal +code for that CPU; it doesn't necessarily reflect the actual hardware's +capabilities. If you wish to set an individual option to a particular +value, you may specify it after the @option{-mcpu} option, like +@samp{-mcpu=970 -mno-altivec}. On AIX, the @option{-maltivec} and @option{-mpowerpc64} options are not enabled or disabled by the @option{-mcpu} option at present because