s390.c (z196_cost): New.
2010-10-05 Andreas Krebbel <Andreas.Krebbel@de.ibm.com> Christian Borntraeger <Christian.Borntraeger@de.ibm.com> * gcc/config/s390/s390.c (z196_cost): New. (s390_handle_arch_option): Add -march=z196. (s390_option_override): Use the new cost function and use the z10 defaults also for z196. (legitimate_reload_constant_p): Adjust comment. (legitimate_reload_fp_constant_p): New function. (s390_preferred_reload_class): Distingiush between FP and Int constants. (preferred_la_operand_p): Avoid la with index on z196. (s390_adjust_priority): Trigger also for z196. (s390_issue_rate): Issue rate for z196 is 3. (s390_z10_fix_long_loop_prediction): Rename to ... (s390_fix_long_loop_prediction): ... this. (s390_reorg): Apply the z10 adjustments also for z196. (s390_loop_unroll_adjust): Do this also for z196. * gcc/config/s390/s390.h (enum processor_type): Add PROCESSOR_2817_Z196. (enum processor_flags): Add PF_Z196. (TARGET_AVOID_CMP_AND_BRANCH): New macro. (TARGET_CPU_Z196, TARGET_Z196): New macros. * gcc/config.gcc: Enable z196 for --with-arch. * gcc/config/s390/2817.md: New file. * gcc/config/s390/2084.md: New type for multiply and add. * gcc/config/s390/2097.md: Likewise. * gcc/config/s390/s390.md (UNSPEC_POPCNT, UNSPEC_COPYSIGN) (UNSPECV_ATOMIC_OP): New constants. (fmadddf, fmaddsf): New values for type attribute. (z196prop): New insn attribute. (cpu, cpu_facility): Add z196. (ATOMIC_Z196): New code iterator. (noxa): New code attribute. (gk): New mode attribute. (*mov<mode>_64, *mov<mode>_31, *mov<mode>_64dfp, mov<mode>): Support load zero for fp constants. (fixuns_truncdddi2, fixuns_trunctddi2) (fixuns_trunc<BFP:mode><GPR:mode>2): Use the standard rtx pattern for z196. (fixuns_trunc<mode>si2, mov<mode>cc, popcountdi2, popcountsi2) (popcounthi2, popcountqi2): New expander. (*fixuns_trunc<FP:mode><GPR:mode>2_z196, floatsi<mode>2) (floatuns<GPR:mode><FP:mode>2, *mov<mode>cc, sync_<atomic><mode>) (sync_old_<atomic><mode>, *popcount<mode>, copysign<mode>3): New insn definition. (add<mode>3, *add<mode>3_carry1_cc, *add<mode>3_carry1_cconly) (*add<mode>3_carry2_cc, *add<mode>3_carry2_cconly, *add<mode>3_cc) (*add<mode>3_cconly, *add<mode>3_cconly2, *add<mode>3_imm_cc, *sub<mode>3) (*sub<mode>3_borrow_cc, *sub<mode>3_borrow_cconly, *sub<mode>3_cc) (*sub<mode>3_cc2, *sub<mode>3_cconly, *sub<mode>3_cconly2) (*anddi3_cc, *anddi3_cconly, *anddi3, *andsi3_cc, *andsi3_cconly) (*andsi3_zarch, *andsi3_esa, *andhi3_zarch, *andqi3_zarch, *iordi3_cc) (*iordi3_cconly, *iordi3, *iorsi3_cc, *iorsi3_cconly, *iorsi3_zarch) (*iorhi3_zarch, *iorqi3_zarch, *xordi3_cc, *xordi3_cconly, *xordi3) (*xorsi3_cc, *xorsi3_cconly, *xorsi3, *xorhi3, *xorqi3, *<shift><mode>3) (*<shift><mode>3_and, *ashr<mode>3_cc, *ashr<mode>3_cconly, *ashr<mode>3) (*ashr<mode>3_cc_and, *ashr<mode>3_cconly_and, *ashr<mode>3_and): Support new z196 instructions. Co-Authored-By: Christian Borntraeger <Christian.Borntraeger@de.ibm.com> From-SVN: r164985
This commit is contained in:
parent
6e57232622
commit
65b1d8ea3e
@ -1,3 +1,61 @@
|
||||
2010-10-05 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
|
||||
Christian Borntraeger <Christian.Borntraeger@de.ibm.com>
|
||||
|
||||
* gcc/config/s390/s390.c (z196_cost): New.
|
||||
(s390_handle_arch_option): Add -march=z196.
|
||||
(s390_option_override): Use the new cost function and use the z10
|
||||
defaults also for z196.
|
||||
(legitimate_reload_constant_p): Adjust comment.
|
||||
(legitimate_reload_fp_constant_p): New function.
|
||||
(s390_preferred_reload_class): Distingiush between FP and Int constants.
|
||||
(preferred_la_operand_p): Avoid la with index on z196.
|
||||
(s390_adjust_priority): Trigger also for z196.
|
||||
(s390_issue_rate): Issue rate for z196 is 3.
|
||||
(s390_z10_fix_long_loop_prediction): Rename to ...
|
||||
(s390_fix_long_loop_prediction): ... this.
|
||||
(s390_reorg): Apply the z10 adjustments also for z196.
|
||||
(s390_loop_unroll_adjust): Do this also for z196.
|
||||
* gcc/config/s390/s390.h (enum processor_type): Add PROCESSOR_2817_Z196.
|
||||
(enum processor_flags): Add PF_Z196.
|
||||
(TARGET_AVOID_CMP_AND_BRANCH): New macro.
|
||||
(TARGET_CPU_Z196, TARGET_Z196): New macros.
|
||||
* gcc/config.gcc: Enable z196 for --with-arch.
|
||||
* gcc/config/s390/2817.md: New file.
|
||||
* gcc/config/s390/2084.md: New type for multiply and add.
|
||||
* gcc/config/s390/2097.md: Likewise.
|
||||
* gcc/config/s390/s390.md (UNSPEC_POPCNT, UNSPEC_COPYSIGN)
|
||||
(UNSPECV_ATOMIC_OP): New constants.
|
||||
(fmadddf, fmaddsf): New values for type attribute.
|
||||
(z196prop): New insn attribute.
|
||||
(cpu, cpu_facility): Add z196.
|
||||
(ATOMIC_Z196): New code iterator.
|
||||
(noxa): New code attribute.
|
||||
(gk): New mode attribute.
|
||||
(*mov<mode>_64, *mov<mode>_31, *mov<mode>_64dfp, mov<mode>):
|
||||
Support load zero for fp constants.
|
||||
(fixuns_truncdddi2, fixuns_trunctddi2)
|
||||
(fixuns_trunc<BFP:mode><GPR:mode>2): Use the standard rtx pattern
|
||||
for z196.
|
||||
(fixuns_trunc<mode>si2, mov<mode>cc, popcountdi2, popcountsi2)
|
||||
(popcounthi2, popcountqi2): New expander.
|
||||
(*fixuns_trunc<FP:mode><GPR:mode>2_z196, floatsi<mode>2)
|
||||
(floatuns<GPR:mode><FP:mode>2, *mov<mode>cc, sync_<atomic><mode>)
|
||||
(sync_old_<atomic><mode>, *popcount<mode>, copysign<mode>3): New
|
||||
insn definition.
|
||||
(add<mode>3, *add<mode>3_carry1_cc, *add<mode>3_carry1_cconly)
|
||||
(*add<mode>3_carry2_cc, *add<mode>3_carry2_cconly, *add<mode>3_cc)
|
||||
(*add<mode>3_cconly, *add<mode>3_cconly2, *add<mode>3_imm_cc, *sub<mode>3)
|
||||
(*sub<mode>3_borrow_cc, *sub<mode>3_borrow_cconly, *sub<mode>3_cc)
|
||||
(*sub<mode>3_cc2, *sub<mode>3_cconly, *sub<mode>3_cconly2)
|
||||
(*anddi3_cc, *anddi3_cconly, *anddi3, *andsi3_cc, *andsi3_cconly)
|
||||
(*andsi3_zarch, *andsi3_esa, *andhi3_zarch, *andqi3_zarch, *iordi3_cc)
|
||||
(*iordi3_cconly, *iordi3, *iorsi3_cc, *iorsi3_cconly, *iorsi3_zarch)
|
||||
(*iorhi3_zarch, *iorqi3_zarch, *xordi3_cc, *xordi3_cconly, *xordi3)
|
||||
(*xorsi3_cc, *xorsi3_cconly, *xorsi3, *xorhi3, *xorqi3, *<shift><mode>3)
|
||||
(*<shift><mode>3_and, *ashr<mode>3_cc, *ashr<mode>3_cconly, *ashr<mode>3)
|
||||
(*ashr<mode>3_cc_and, *ashr<mode>3_cconly_and, *ashr<mode>3_and):
|
||||
Support new z196 instructions.
|
||||
|
||||
2010-10-05 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR middle-end/45877
|
||||
|
@ -3374,7 +3374,7 @@ case "${target}" in
|
||||
for which in arch tune; do
|
||||
eval "val=\$with_$which"
|
||||
case ${val} in
|
||||
"" | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10)
|
||||
"" | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10 | z196)
|
||||
# OK
|
||||
;;
|
||||
*)
|
||||
|
@ -168,12 +168,12 @@
|
||||
|
||||
(define_insn_reservation "x_fsimpdf" 6
|
||||
(and (eq_attr "cpu" "z990,z9_109")
|
||||
(eq_attr "type" "fsimpdf,fmuldf,fhex"))
|
||||
(eq_attr "type" "fsimpdf,fmuldf,fmadddf,fhex"))
|
||||
"x_e1_t,x-wr-fp")
|
||||
|
||||
(define_insn_reservation "x_fsimpsf" 6
|
||||
(and (eq_attr "cpu" "z990,z9_109")
|
||||
(eq_attr "type" "fsimpsf,fmulsf,fhex"))
|
||||
(eq_attr "type" "fsimpsf,fmulsf,fmaddsf,fhex"))
|
||||
"x_e1_t,x-wr-fp")
|
||||
|
||||
|
||||
|
@ -463,12 +463,12 @@
|
||||
|
||||
(define_insn_reservation "z10_fsimpdf" 6
|
||||
(and (eq_attr "cpu" "z10")
|
||||
(eq_attr "type" "fsimpdf,fmuldf"))
|
||||
(eq_attr "type" "fsimpdf,fmuldf,fmadddf"))
|
||||
"z10_e1_BOTH, z10_Gate_FP")
|
||||
|
||||
(define_insn_reservation "z10_fsimpsf" 6
|
||||
(and (eq_attr "cpu" "z10")
|
||||
(eq_attr "type" "fsimpsf,fmulsf"))
|
||||
(eq_attr "type" "fsimpsf,fmulsf,fmaddsf"))
|
||||
"z10_e1_BOTH, z10_Gate_FP")
|
||||
|
||||
(define_insn_reservation "z10_fmultf" 52
|
||||
|
313
gcc/config/s390/2817.md
Normal file
313
gcc/config/s390/2817.md
Normal file
@ -0,0 +1,313 @@
|
||||
;; Scheduling description for z196 (cpu 2817).
|
||||
;; Copyright (C) 2010
|
||||
;; Free Software Foundation, Inc.
|
||||
;; Contributed by Christian Borntraeger (Christian.Borntraeger@de.ibm.com)
|
||||
;; Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
|
||||
|
||||
;; This file is part of GCC.
|
||||
|
||||
;; GCC is free software; you can redistribute it and/or modify it under
|
||||
;; the terms of the GNU General Public License as published by the Free
|
||||
;; Software Foundation; either version 3, or (at your option) any later
|
||||
;; version.
|
||||
|
||||
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
;; for more details.
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GCC; see the file COPYING3. If not see
|
||||
;; <http://www.gnu.org/licenses/>.
|
||||
|
||||
(define_automaton "z196_ipu")
|
||||
|
||||
;; Fetch + Decoder
|
||||
(define_cpu_unit "z196_g1" "z196_ipu")
|
||||
(define_cpu_unit "z196_g2" "z196_ipu")
|
||||
(define_cpu_unit "z196_g3" "z196_ipu")
|
||||
(define_cpu_unit "z196_cr1" "z196_ipu")
|
||||
(define_cpu_unit "z196_cr2" "z196_ipu")
|
||||
(define_cpu_unit "z196_cr3" "z196_ipu")
|
||||
|
||||
(final_presence_set "z196_g2" "z196_g1")
|
||||
(final_presence_set "z196_g3" "z196_g2")
|
||||
(final_presence_set "z196_cr2" "z196_cr1")
|
||||
(final_presence_set "z196_cr3" "z196_cr2")
|
||||
(exclusion_set "z196_g1" "z196_cr1")
|
||||
|
||||
;; Instructions can be groupable, end a group, or be alone in a group.
|
||||
(define_reservation "z196_simple" "( z196_g1 | z196_g2 | z196_g3 )")
|
||||
(define_reservation "z196_ends" "( z196_g3 | ( z196_g2 + z196_g3 ) | ( z196_g1 + z196_g2 + z196_g3 ) )")
|
||||
|
||||
;; Try to keep cracked and alone ops together in a clump.
|
||||
(define_reservation "z196_crack" "( z196_cr1 | z196_cr2 | z196_cr3 )")
|
||||
(define_reservation "z196_alone" "( z196_cr1 | z196_cr2 | z196_cr3 )")
|
||||
|
||||
;; Most simple instruction a fast enough to be handled by OOO even with
|
||||
;; latency == 0. This reduces life ranges and spilling. We want to increase
|
||||
;; life range for longer running ops, though, thats why we do not use
|
||||
;; -fno-schedule-insns.
|
||||
(define_insn_reservation "z196_simple_LSU" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(and (eq_attr "type" "load,store,lr")
|
||||
(eq_attr "z196prop" "none")))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_simple_FXU" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(and (eq_attr "type" "integer,la,larl,other")
|
||||
(and (eq_attr "z196prop" "none")
|
||||
(eq_attr "op_type" "RR"))))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_simple_DUAL" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(and (eq_attr "type" "integer,la,larl,other")
|
||||
(and (eq_attr "z196prop" "none")
|
||||
(eq_attr "op_type" "!RR"))))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_cracked" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(and (eq_attr "type" "integer,la,larl,load,lr,store,other")
|
||||
(eq_attr "z196prop" "z196_cracked")))
|
||||
"z196_crack")
|
||||
|
||||
(define_insn_reservation "z196_alone" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(and (eq_attr "type" "integer,la,larl,load,lr,store,other")
|
||||
(eq_attr "z196prop" "z196_alone")))
|
||||
"z196_alone")
|
||||
|
||||
(define_insn_reservation "z196_ends" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(and (eq_attr "type" "integer,la,larl,load,lr,store,other")
|
||||
(eq_attr "z196prop" "z196_ends")))
|
||||
"z196_ends")
|
||||
|
||||
(define_insn_reservation "z196_branch" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "branch"))
|
||||
"z196_ends")
|
||||
|
||||
(define_insn_reservation "z196_call" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "jsr"))
|
||||
"z196_ends")
|
||||
|
||||
(define_insn_reservation "z196_mul_hi" 10
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "imulhi"))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_mul_si" 12
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "imulsi"))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_mul_di" 14
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "imuldi"))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_div" 73
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "idiv"))
|
||||
"z196_alone")
|
||||
|
||||
(define_insn_reservation "z196_sem" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "sem"))
|
||||
"z196_crack")
|
||||
|
||||
(define_insn_reservation "z196_cs" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "cs"))
|
||||
"z196_crack")
|
||||
|
||||
(define_insn_reservation "z196_vs" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "vs"))
|
||||
"z196_alone")
|
||||
|
||||
(define_insn_reservation "z196_lm_stm" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "stm,lm"))
|
||||
"z196_crack")
|
||||
|
||||
|
||||
;;
|
||||
;; Binary Floating Point
|
||||
;;
|
||||
|
||||
(define_insn_reservation "z196_fsimptf" 18
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fsimptf,fhex"))
|
||||
"z196_alone")
|
||||
|
||||
(define_insn_reservation "z196_fmultf" 47
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fmultf"))
|
||||
"z196_alone")
|
||||
|
||||
(define_insn_reservation "z196_fsimpdf" 7
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fsimpdf,fmuldf,fhex"))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_fmadddf" 7
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fmadddf"))
|
||||
"z196_alone")
|
||||
|
||||
(define_insn_reservation "z196_fsimpsf" 7
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fsimpsf,fmulsf,fhex"))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_fmaddsf" 7
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fmaddsf"))
|
||||
"z196_alone")
|
||||
|
||||
(define_insn_reservation "z196_fdivtf" 108
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fdivtf,fsqrttf"))
|
||||
"z196_alone")
|
||||
|
||||
(define_insn_reservation "z196_fdivdf" 36
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fdivdf,fsqrtdf"))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_fdivsf" 29
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fdivsf,fsqrtsf"))
|
||||
"z196_simple")
|
||||
|
||||
|
||||
;; Loads and stores are cheap as well.
|
||||
(define_insn_reservation "z196_floaddf" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "floaddf"))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_floadsf" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "floadsf"))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_fstoredf" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fstoredf"))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_fstoresf" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fstoresf"))
|
||||
"z196_simple")
|
||||
|
||||
|
||||
(define_insn_reservation "z196_ftrunctf" 9
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "ftrunctf"))
|
||||
"z196_simple")
|
||||
|
||||
(define_insn_reservation "z196_ftruncdf" 7
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "ftruncdf"))
|
||||
"z196_simple")
|
||||
|
||||
|
||||
(define_insn_reservation "z196_ftoi" 7
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "ftoi"))
|
||||
"z196_crack")
|
||||
|
||||
(define_insn_reservation "z196_itof" 7
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "itoftf,itofdf,itofsf"))
|
||||
"z196_crack")
|
||||
|
||||
;;
|
||||
;; Decimal Floating Point
|
||||
;;
|
||||
|
||||
;; DDTR
|
||||
(define_insn_reservation "z196_fdivdd" 33
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fdivdd"))
|
||||
"z196_simple")
|
||||
|
||||
;; DXTR
|
||||
(define_insn_reservation "z196_fdivtd" 35
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fdivtd"))
|
||||
"z196_alone")
|
||||
|
||||
;; LEDTR
|
||||
(define_insn_reservation "z196_ftruncsd" 34
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "ftruncsd"))
|
||||
"z196_simple")
|
||||
|
||||
;; LDXTR
|
||||
(define_insn_reservation "z196_ftruncdd" 36
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "ftruncdd"))
|
||||
"z196_simple")
|
||||
|
||||
;; These are normal fp loads/stores - which are cheap.
|
||||
(define_insn_reservation "z196_floadsddd" 0
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "floadsd,floaddd,fstoredd,fstoresd"))
|
||||
"z196_simple")
|
||||
|
||||
;; MDTR
|
||||
(define_insn_reservation "z196_fmuldd" 23
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fmuldd"))
|
||||
"z196_simple")
|
||||
|
||||
;; MXTR
|
||||
(define_insn_reservation "z196_fmultd" 25
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fmultd"))
|
||||
"z196_alone")
|
||||
|
||||
;; multiple different isns like add, sub etc.
|
||||
;; Just use the same defaults as z10.
|
||||
(define_insn_reservation "z196_fsimpsd" 17
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fsimpsd"))
|
||||
"z196_simple")
|
||||
(define_insn_reservation "z196_fsimpdd" 17
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fsimpdd"))
|
||||
"z196_simple")
|
||||
(define_insn_reservation "z196_fsimptd" 18
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "fsimptd"))
|
||||
"z196_alone")
|
||||
|
||||
;; CDGTR
|
||||
(define_insn_reservation "z196_itofdd" 45
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "itofdd"))
|
||||
"z196_crack")
|
||||
|
||||
;; CXGTR
|
||||
(define_insn_reservation "z196_itoftd" 33
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "itoftd"))
|
||||
"z196_crack")
|
||||
|
||||
;; CGXTR, CGDTR
|
||||
(define_insn_reservation "z196_ftoidfp" 33
|
||||
(and (eq_attr "cpu" "z196")
|
||||
(eq_attr "type" "ftoidfp"))
|
||||
"z196_crack")
|
||||
|
||||
|
||||
|
@ -223,6 +223,38 @@ struct processor_costs z10_cost =
|
||||
COSTS_N_INSNS (71), /* DSGR */
|
||||
};
|
||||
|
||||
static const
|
||||
struct processor_costs z196_cost =
|
||||
{
|
||||
COSTS_N_INSNS (7), /* M */
|
||||
COSTS_N_INSNS (5), /* MGHI */
|
||||
COSTS_N_INSNS (5), /* MH */
|
||||
COSTS_N_INSNS (5), /* MHI */
|
||||
COSTS_N_INSNS (7), /* ML */
|
||||
COSTS_N_INSNS (7), /* MR */
|
||||
COSTS_N_INSNS (6), /* MS */
|
||||
COSTS_N_INSNS (8), /* MSG */
|
||||
COSTS_N_INSNS (6), /* MSGF */
|
||||
COSTS_N_INSNS (6), /* MSGFR */
|
||||
COSTS_N_INSNS (8), /* MSGR */
|
||||
COSTS_N_INSNS (6), /* MSR */
|
||||
COSTS_N_INSNS (1) , /* multiplication in DFmode */
|
||||
COSTS_N_INSNS (40), /* MXBR B+40 */
|
||||
COSTS_N_INSNS (100), /* SQXBR B+100 */
|
||||
COSTS_N_INSNS (42), /* SQDBR B+42 */
|
||||
COSTS_N_INSNS (28), /* SQEBR B+28 */
|
||||
COSTS_N_INSNS (1), /* MADBR B */
|
||||
COSTS_N_INSNS (1), /* MAEBR B */
|
||||
COSTS_N_INSNS (101), /* DXBR B+101 */
|
||||
COSTS_N_INSNS (29), /* DDBR */
|
||||
COSTS_N_INSNS (22), /* DEBR */
|
||||
COSTS_N_INSNS (160), /* DLGR cracked */
|
||||
COSTS_N_INSNS (160), /* DLR cracked */
|
||||
COSTS_N_INSNS (160), /* DR expanded */
|
||||
COSTS_N_INSNS (160), /* DSGFR cracked */
|
||||
COSTS_N_INSNS (160), /* DSGR cracked */
|
||||
};
|
||||
|
||||
extern int reload_completed;
|
||||
|
||||
/* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
|
||||
@ -350,8 +382,8 @@ struct GTY(()) machine_function
|
||||
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
|
||||
|
||||
/* That's the read ahead of the dynamic branch prediction unit in
|
||||
bytes on a z10 CPU. */
|
||||
#define Z10_PREDICT_DISTANCE 384
|
||||
bytes on a z10 (or higher) CPU. */
|
||||
#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
|
||||
|
||||
static enum machine_mode
|
||||
s390_libgcc_cmp_return_mode (void)
|
||||
@ -1506,7 +1538,9 @@ s390_handle_arch_option (const char *arg,
|
||||
{"z9-ec", PROCESSOR_2094_Z9_109, PF_IEEE_FLOAT | PF_ZARCH
|
||||
| PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP },
|
||||
{"z10", PROCESSOR_2097_Z10, PF_IEEE_FLOAT | PF_ZARCH
|
||||
| PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10},
|
||||
| PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10},
|
||||
{"z196", PROCESSOR_2817_Z196, PF_IEEE_FLOAT | PF_ZARCH
|
||||
| PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 },
|
||||
};
|
||||
size_t i;
|
||||
|
||||
@ -1624,6 +1658,8 @@ s390_option_override (void)
|
||||
break;
|
||||
case PROCESSOR_2097_Z10:
|
||||
s390_cost = &z10_cost;
|
||||
case PROCESSOR_2817_Z196:
|
||||
s390_cost = &z196_cost;
|
||||
break;
|
||||
default:
|
||||
s390_cost = &z900_cost;
|
||||
@ -1648,7 +1684,8 @@ s390_option_override (void)
|
||||
target_flags |= MASK_LONG_DOUBLE_128;
|
||||
#endif
|
||||
|
||||
if (s390_tune == PROCESSOR_2097_Z10)
|
||||
if (s390_tune == PROCESSOR_2097_Z10
|
||||
|| s390_tune == PROCESSOR_2817_Z196)
|
||||
{
|
||||
if (!PARAM_SET_P (PARAM_MAX_UNROLLED_INSNS))
|
||||
set_param_value ("max-unrolled-insns", 100);
|
||||
@ -2782,7 +2819,9 @@ s390_cannot_force_const_mem (rtx x)
|
||||
operand during and after reload. The difference to
|
||||
legitimate_constant_p is that this function will not accept
|
||||
a constant that would need to be forced to the literal pool
|
||||
before it can be used as operand. */
|
||||
before it can be used as operand.
|
||||
This function accepts all constants which can be loaded directly
|
||||
into a GPR. */
|
||||
|
||||
bool
|
||||
legitimate_reload_constant_p (rtx op)
|
||||
@ -2836,6 +2875,24 @@ legitimate_reload_constant_p (rtx op)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Returns true if the constant value OP is a legitimate fp operand
|
||||
during and after reload.
|
||||
This function accepts all constants which can be loaded directly
|
||||
into an FPR. */
|
||||
|
||||
static bool
|
||||
legitimate_reload_fp_constant_p (rtx op)
|
||||
{
|
||||
/* Accept floating-point zero operands if the load zero instruction
|
||||
can be used. */
|
||||
if (TARGET_Z196
|
||||
&& GET_CODE (op) == CONST_DOUBLE
|
||||
&& s390_float_const_zero_p (op))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
|
||||
return the class of reg to actually use. */
|
||||
|
||||
@ -2854,8 +2911,10 @@ s390_preferred_reload_class (rtx op, enum reg_class rclass)
|
||||
else if (reg_class_subset_p (ADDR_REGS, rclass)
|
||||
&& legitimate_reload_constant_p (op))
|
||||
return ADDR_REGS;
|
||||
else
|
||||
return NO_REGS;
|
||||
else if (reg_class_subset_p (FP_REGS, rclass)
|
||||
&& legitimate_reload_fp_constant_p (op))
|
||||
return FP_REGS;
|
||||
return NO_REGS;
|
||||
|
||||
/* If a symbolic constant or a PLUS is reloaded,
|
||||
it is most likely being used as an address, so
|
||||
@ -3218,6 +3277,11 @@ preferred_la_operand_p (rtx op1, rtx op2)
|
||||
if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
|
||||
return false;
|
||||
|
||||
/* Avoid LA instructions with index register on z196; it is
|
||||
preferable to use regular add instructions when possible. */
|
||||
if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
|
||||
return false;
|
||||
|
||||
if (!TARGET_64BIT && !addr.pointer)
|
||||
return false;
|
||||
|
||||
@ -5398,8 +5462,6 @@ s390_agen_dep_p (rtx dep_insn, rtx insn)
|
||||
|
||||
A STD instruction should be scheduled earlier,
|
||||
in order to use the bypass. */
|
||||
|
||||
|
||||
static int
|
||||
s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
|
||||
{
|
||||
@ -5408,7 +5470,8 @@ s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
|
||||
|
||||
if (s390_tune != PROCESSOR_2084_Z990
|
||||
&& s390_tune != PROCESSOR_2094_Z9_109
|
||||
&& s390_tune != PROCESSOR_2097_Z10)
|
||||
&& s390_tune != PROCESSOR_2097_Z10
|
||||
&& s390_tune != PROCESSOR_2817_Z196)
|
||||
return priority;
|
||||
|
||||
switch (s390_safe_attr_type (insn))
|
||||
@ -5437,6 +5500,7 @@ s390_issue_rate (void)
|
||||
{
|
||||
case PROCESSOR_2084_Z990:
|
||||
case PROCESSOR_2094_Z9_109:
|
||||
case PROCESSOR_2817_Z196:
|
||||
return 3;
|
||||
case PROCESSOR_2097_Z10:
|
||||
return 2;
|
||||
@ -9859,13 +9923,13 @@ s390_optimize_prologue (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* On z10 the dynamic branch prediction must see the backward jump in
|
||||
a window of 384 bytes. If not it falls back to the static
|
||||
prediction. This function rearranges the loop backward branch in a
|
||||
way which makes the static prediction always correct. The function
|
||||
returns true if it added an instruction. */
|
||||
/* On z10 and later the dynamic branch prediction must see the
|
||||
backward jump within a certain windows. If not it falls back to
|
||||
the static prediction. This function rearranges the loop backward
|
||||
branch in a way which makes the static prediction always correct.
|
||||
The function returns true if it added an instruction. */
|
||||
static bool
|
||||
s390_z10_fix_long_loop_prediction (rtx insn)
|
||||
s390_fix_long_loop_prediction (rtx insn)
|
||||
{
|
||||
rtx set = single_set (insn);
|
||||
rtx code_label, label_ref, new_label;
|
||||
@ -9891,11 +9955,11 @@ s390_z10_fix_long_loop_prediction (rtx insn)
|
||||
if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
|
||||
|| INSN_ADDRESSES (INSN_UID (insn)) == -1
|
||||
|| (INSN_ADDRESSES (INSN_UID (insn))
|
||||
- INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
|
||||
- INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
|
||||
return false;
|
||||
|
||||
for (distance = 0, cur_insn = PREV_INSN (insn);
|
||||
distance < Z10_PREDICT_DISTANCE - 6;
|
||||
distance < PREDICT_DISTANCE - 6;
|
||||
distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
|
||||
if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
|
||||
return false;
|
||||
@ -10195,8 +10259,9 @@ s390_reorg (void)
|
||||
/* Try to optimize prologue and epilogue further. */
|
||||
s390_optimize_prologue ();
|
||||
|
||||
/* Walk over the insns and do some z10 specific changes. */
|
||||
if (s390_tune == PROCESSOR_2097_Z10)
|
||||
/* Walk over the insns and do some >=z10 specific changes. */
|
||||
if (s390_tune == PROCESSOR_2097_Z10
|
||||
|| s390_tune == PROCESSOR_2817_Z196)
|
||||
{
|
||||
rtx insn;
|
||||
bool insn_added_p = false;
|
||||
@ -10211,10 +10276,11 @@ s390_reorg (void)
|
||||
continue;
|
||||
|
||||
if (JUMP_P (insn))
|
||||
insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
|
||||
insn_added_p |= s390_fix_long_loop_prediction (insn);
|
||||
|
||||
if (GET_CODE (PATTERN (insn)) == PARALLEL
|
||||
|| GET_CODE (PATTERN (insn)) == SET)
|
||||
if ((GET_CODE (PATTERN (insn)) == PARALLEL
|
||||
|| GET_CODE (PATTERN (insn)) == SET)
|
||||
&& s390_tune == PROCESSOR_2097_Z10)
|
||||
insn_added_p |= s390_z10_optimize_cmp (insn);
|
||||
}
|
||||
|
||||
@ -10360,8 +10426,9 @@ check_dpu (rtx *x, unsigned *mem_count)
|
||||
}
|
||||
|
||||
/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
|
||||
a new number struct loop *loop should be unrolled if tuned for the z10
|
||||
cpu. The loop is analyzed for memory accesses by calling check_dpu for
|
||||
a new number struct loop *loop should be unrolled if tuned for cpus with
|
||||
a built-in stride prefetcher.
|
||||
The loop is analyzed for memory accesses by calling check_dpu for
|
||||
each rtx of the loop. Depending on the loop_depth and the amount of
|
||||
memory accesses a new number <=nunroll is returned to improve the
|
||||
behaviour of the hardware prefetch unit. */
|
||||
@ -10373,8 +10440,7 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
|
||||
unsigned i;
|
||||
unsigned mem_count = 0;
|
||||
|
||||
/* Only z10 needs special handling. */
|
||||
if (s390_tune != PROCESSOR_2097_Z10)
|
||||
if (s390_tune != PROCESSOR_2097_Z10 && s390_tune != PROCESSOR_2817_Z196)
|
||||
return nunroll;
|
||||
|
||||
/* Count the number of memory references within the loop body. */
|
||||
|
@ -36,6 +36,7 @@ enum processor_type
|
||||
PROCESSOR_2084_Z990,
|
||||
PROCESSOR_2094_Z9_109,
|
||||
PROCESSOR_2097_Z10,
|
||||
PROCESSOR_2817_Z196,
|
||||
PROCESSOR_max
|
||||
};
|
||||
|
||||
@ -48,7 +49,8 @@ enum processor_flags
|
||||
PF_LONG_DISPLACEMENT = 4,
|
||||
PF_EXTIMM = 8,
|
||||
PF_DFP = 16,
|
||||
PF_Z10 = 32
|
||||
PF_Z10 = 32,
|
||||
PF_Z196 = 64
|
||||
};
|
||||
|
||||
extern enum processor_type s390_tune;
|
||||
@ -77,6 +79,8 @@ extern int s390_arch_flags;
|
||||
(s390_arch_flags & PF_DFP)
|
||||
#define TARGET_CPU_Z10 \
|
||||
(s390_arch_flags & PF_Z10)
|
||||
#define TARGET_CPU_Z196 \
|
||||
(s390_arch_flags & PF_Z196)
|
||||
|
||||
/* These flags indicate that the generated code should run on a cpu
|
||||
providing the respective hardware facility when run in
|
||||
@ -90,6 +94,11 @@ extern int s390_arch_flags;
|
||||
(TARGET_ZARCH && TARGET_CPU_DFP && TARGET_HARD_FLOAT)
|
||||
#define TARGET_Z10 \
|
||||
(TARGET_ZARCH && TARGET_CPU_Z10)
|
||||
#define TARGET_Z196 \
|
||||
(TARGET_ZARCH && TARGET_CPU_Z196)
|
||||
|
||||
|
||||
#define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196)
|
||||
|
||||
/* Run-time target specification. */
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user