s390.c (z196_cost): New.

2010-10-05  Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>
            Christian Borntraeger  <Christian.Borntraeger@de.ibm.com>

	* gcc/config/s390/s390.c (z196_cost): New.
	(s390_handle_arch_option): Add -march=z196.
	(s390_option_override): Use the new cost function and use the z10
	defaults also for z196.
	(legitimate_reload_constant_p): Adjust comment.
	(legitimate_reload_fp_constant_p): New function.
	(s390_preferred_reload_class): Distingiush between FP and Int constants.
	(preferred_la_operand_p): Avoid la with index on z196.
	(s390_adjust_priority): Trigger also for z196.
	(s390_issue_rate): Issue rate for z196 is 3.
	(s390_z10_fix_long_loop_prediction): Rename to ...
	(s390_fix_long_loop_prediction): ... this.
	(s390_reorg): Apply the z10 adjustments also for z196.
	(s390_loop_unroll_adjust): Do this also for z196.
	* gcc/config/s390/s390.h (enum processor_type): Add PROCESSOR_2817_Z196.
	(enum processor_flags): Add PF_Z196.
	(TARGET_AVOID_CMP_AND_BRANCH): New macro.
	(TARGET_CPU_Z196, TARGET_Z196): New macros.
	* gcc/config.gcc: Enable z196 for --with-arch.
	* gcc/config/s390/2817.md: New file.
	* gcc/config/s390/2084.md: New type for multiply and add.
	* gcc/config/s390/2097.md: Likewise.
	* gcc/config/s390/s390.md (UNSPEC_POPCNT, UNSPEC_COPYSIGN)
	(UNSPECV_ATOMIC_OP): New constants.
	(fmadddf, fmaddsf): New values for type attribute.
	(z196prop): New insn attribute.
	(cpu, cpu_facility): Add z196.
	(ATOMIC_Z196): New code iterator.
	(noxa): New code attribute.
	(gk): New mode attribute.
	(*mov<mode>_64, *mov<mode>_31, *mov<mode>_64dfp, mov<mode>):
	Support load zero for fp constants.
	(fixuns_truncdddi2, fixuns_trunctddi2)
	(fixuns_trunc<BFP:mode><GPR:mode>2): Use the standard rtx pattern
	for z196.
	(fixuns_trunc<mode>si2, mov<mode>cc, popcountdi2, popcountsi2)
	(popcounthi2, popcountqi2): New expander.
	(*fixuns_trunc<FP:mode><GPR:mode>2_z196, floatsi<mode>2)
	(floatuns<GPR:mode><FP:mode>2, *mov<mode>cc, sync_<atomic><mode>)
	(sync_old_<atomic><mode>, *popcount<mode>, copysign<mode>3): New
	insn definition.
	(add<mode>3, *add<mode>3_carry1_cc, *add<mode>3_carry1_cconly)
	(*add<mode>3_carry2_cc, *add<mode>3_carry2_cconly, *add<mode>3_cc)
	(*add<mode>3_cconly, *add<mode>3_cconly2, *add<mode>3_imm_cc, *sub<mode>3)
	(*sub<mode>3_borrow_cc, *sub<mode>3_borrow_cconly, *sub<mode>3_cc)
	(*sub<mode>3_cc2, *sub<mode>3_cconly, *sub<mode>3_cconly2)
	(*anddi3_cc, *anddi3_cconly, *anddi3, *andsi3_cc, *andsi3_cconly)
	(*andsi3_zarch, *andsi3_esa, *andhi3_zarch, *andqi3_zarch, *iordi3_cc)
	(*iordi3_cconly, *iordi3, *iorsi3_cc, *iorsi3_cconly, *iorsi3_zarch)
	(*iorhi3_zarch, *iorqi3_zarch, *xordi3_cc, *xordi3_cconly, *xordi3)
	(*xorsi3_cc, *xorsi3_cconly, *xorsi3, *xorhi3, *xorqi3, *<shift><mode>3)
	(*<shift><mode>3_and, *ashr<mode>3_cc, *ashr<mode>3_cconly, *ashr<mode>3)
	(*ashr<mode>3_cc_and, *ashr<mode>3_cconly_and, *ashr<mode>3_and):
	Support new z196 instructions.


Co-Authored-By: Christian Borntraeger <Christian.Borntraeger@de.ibm.com>

From-SVN: r164985
This commit is contained in:
Andreas Krebbel 2010-10-05 10:39:48 +00:00 committed by Andreas Krebbel
parent 6e57232622
commit 65b1d8ea3e
8 changed files with 1329 additions and 479 deletions

View File

@ -1,3 +1,61 @@
2010-10-05 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
Christian Borntraeger <Christian.Borntraeger@de.ibm.com>
* gcc/config/s390/s390.c (z196_cost): New.
(s390_handle_arch_option): Add -march=z196.
(s390_option_override): Use the new cost function and use the z10
defaults also for z196.
(legitimate_reload_constant_p): Adjust comment.
(legitimate_reload_fp_constant_p): New function.
(s390_preferred_reload_class): Distingiush between FP and Int constants.
(preferred_la_operand_p): Avoid la with index on z196.
(s390_adjust_priority): Trigger also for z196.
(s390_issue_rate): Issue rate for z196 is 3.
(s390_z10_fix_long_loop_prediction): Rename to ...
(s390_fix_long_loop_prediction): ... this.
(s390_reorg): Apply the z10 adjustments also for z196.
(s390_loop_unroll_adjust): Do this also for z196.
* gcc/config/s390/s390.h (enum processor_type): Add PROCESSOR_2817_Z196.
(enum processor_flags): Add PF_Z196.
(TARGET_AVOID_CMP_AND_BRANCH): New macro.
(TARGET_CPU_Z196, TARGET_Z196): New macros.
* gcc/config.gcc: Enable z196 for --with-arch.
* gcc/config/s390/2817.md: New file.
* gcc/config/s390/2084.md: New type for multiply and add.
* gcc/config/s390/2097.md: Likewise.
* gcc/config/s390/s390.md (UNSPEC_POPCNT, UNSPEC_COPYSIGN)
(UNSPECV_ATOMIC_OP): New constants.
(fmadddf, fmaddsf): New values for type attribute.
(z196prop): New insn attribute.
(cpu, cpu_facility): Add z196.
(ATOMIC_Z196): New code iterator.
(noxa): New code attribute.
(gk): New mode attribute.
(*mov<mode>_64, *mov<mode>_31, *mov<mode>_64dfp, mov<mode>):
Support load zero for fp constants.
(fixuns_truncdddi2, fixuns_trunctddi2)
(fixuns_trunc<BFP:mode><GPR:mode>2): Use the standard rtx pattern
for z196.
(fixuns_trunc<mode>si2, mov<mode>cc, popcountdi2, popcountsi2)
(popcounthi2, popcountqi2): New expander.
(*fixuns_trunc<FP:mode><GPR:mode>2_z196, floatsi<mode>2)
(floatuns<GPR:mode><FP:mode>2, *mov<mode>cc, sync_<atomic><mode>)
(sync_old_<atomic><mode>, *popcount<mode>, copysign<mode>3): New
insn definition.
(add<mode>3, *add<mode>3_carry1_cc, *add<mode>3_carry1_cconly)
(*add<mode>3_carry2_cc, *add<mode>3_carry2_cconly, *add<mode>3_cc)
(*add<mode>3_cconly, *add<mode>3_cconly2, *add<mode>3_imm_cc, *sub<mode>3)
(*sub<mode>3_borrow_cc, *sub<mode>3_borrow_cconly, *sub<mode>3_cc)
(*sub<mode>3_cc2, *sub<mode>3_cconly, *sub<mode>3_cconly2)
(*anddi3_cc, *anddi3_cconly, *anddi3, *andsi3_cc, *andsi3_cconly)
(*andsi3_zarch, *andsi3_esa, *andhi3_zarch, *andqi3_zarch, *iordi3_cc)
(*iordi3_cconly, *iordi3, *iorsi3_cc, *iorsi3_cconly, *iorsi3_zarch)
(*iorhi3_zarch, *iorqi3_zarch, *xordi3_cc, *xordi3_cconly, *xordi3)
(*xorsi3_cc, *xorsi3_cconly, *xorsi3, *xorhi3, *xorqi3, *<shift><mode>3)
(*<shift><mode>3_and, *ashr<mode>3_cc, *ashr<mode>3_cconly, *ashr<mode>3)
(*ashr<mode>3_cc_and, *ashr<mode>3_cconly_and, *ashr<mode>3_and):
Support new z196 instructions.
2010-10-05 Richard Guenther <rguenther@suse.de>
PR middle-end/45877

View File

@ -3374,7 +3374,7 @@ case "${target}" in
for which in arch tune; do
eval "val=\$with_$which"
case ${val} in
"" | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10)
"" | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10 | z196)
# OK
;;
*)

View File

@ -168,12 +168,12 @@
(define_insn_reservation "x_fsimpdf" 6
(and (eq_attr "cpu" "z990,z9_109")
(eq_attr "type" "fsimpdf,fmuldf,fhex"))
(eq_attr "type" "fsimpdf,fmuldf,fmadddf,fhex"))
"x_e1_t,x-wr-fp")
(define_insn_reservation "x_fsimpsf" 6
(and (eq_attr "cpu" "z990,z9_109")
(eq_attr "type" "fsimpsf,fmulsf,fhex"))
(eq_attr "type" "fsimpsf,fmulsf,fmaddsf,fhex"))
"x_e1_t,x-wr-fp")

View File

@ -463,12 +463,12 @@
(define_insn_reservation "z10_fsimpdf" 6
(and (eq_attr "cpu" "z10")
(eq_attr "type" "fsimpdf,fmuldf"))
(eq_attr "type" "fsimpdf,fmuldf,fmadddf"))
"z10_e1_BOTH, z10_Gate_FP")
(define_insn_reservation "z10_fsimpsf" 6
(and (eq_attr "cpu" "z10")
(eq_attr "type" "fsimpsf,fmulsf"))
(eq_attr "type" "fsimpsf,fmulsf,fmaddsf"))
"z10_e1_BOTH, z10_Gate_FP")
(define_insn_reservation "z10_fmultf" 52

313
gcc/config/s390/2817.md Normal file
View File

@ -0,0 +1,313 @@
;; Scheduling description for z196 (cpu 2817).
;; Copyright (C) 2010
;; Free Software Foundation, Inc.
;; Contributed by Christian Borntraeger (Christian.Borntraeger@de.ibm.com)
;; Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
;; This file is part of GCC.
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
(define_automaton "z196_ipu")
;; Fetch + Decoder
(define_cpu_unit "z196_g1" "z196_ipu")
(define_cpu_unit "z196_g2" "z196_ipu")
(define_cpu_unit "z196_g3" "z196_ipu")
(define_cpu_unit "z196_cr1" "z196_ipu")
(define_cpu_unit "z196_cr2" "z196_ipu")
(define_cpu_unit "z196_cr3" "z196_ipu")
(final_presence_set "z196_g2" "z196_g1")
(final_presence_set "z196_g3" "z196_g2")
(final_presence_set "z196_cr2" "z196_cr1")
(final_presence_set "z196_cr3" "z196_cr2")
(exclusion_set "z196_g1" "z196_cr1")
;; Instructions can be groupable, end a group, or be alone in a group.
(define_reservation "z196_simple" "( z196_g1 | z196_g2 | z196_g3 )")
(define_reservation "z196_ends" "( z196_g3 | ( z196_g2 + z196_g3 ) | ( z196_g1 + z196_g2 + z196_g3 ) )")
;; Try to keep cracked and alone ops together in a clump.
(define_reservation "z196_crack" "( z196_cr1 | z196_cr2 | z196_cr3 )")
(define_reservation "z196_alone" "( z196_cr1 | z196_cr2 | z196_cr3 )")
;; Most simple instruction a fast enough to be handled by OOO even with
;; latency == 0. This reduces life ranges and spilling. We want to increase
;; life range for longer running ops, though, thats why we do not use
;; -fno-schedule-insns.
(define_insn_reservation "z196_simple_LSU" 0
(and (eq_attr "cpu" "z196")
(and (eq_attr "type" "load,store,lr")
(eq_attr "z196prop" "none")))
"z196_simple")
(define_insn_reservation "z196_simple_FXU" 0
(and (eq_attr "cpu" "z196")
(and (eq_attr "type" "integer,la,larl,other")
(and (eq_attr "z196prop" "none")
(eq_attr "op_type" "RR"))))
"z196_simple")
(define_insn_reservation "z196_simple_DUAL" 0
(and (eq_attr "cpu" "z196")
(and (eq_attr "type" "integer,la,larl,other")
(and (eq_attr "z196prop" "none")
(eq_attr "op_type" "!RR"))))
"z196_simple")
(define_insn_reservation "z196_cracked" 0
(and (eq_attr "cpu" "z196")
(and (eq_attr "type" "integer,la,larl,load,lr,store,other")
(eq_attr "z196prop" "z196_cracked")))
"z196_crack")
(define_insn_reservation "z196_alone" 0
(and (eq_attr "cpu" "z196")
(and (eq_attr "type" "integer,la,larl,load,lr,store,other")
(eq_attr "z196prop" "z196_alone")))
"z196_alone")
(define_insn_reservation "z196_ends" 0
(and (eq_attr "cpu" "z196")
(and (eq_attr "type" "integer,la,larl,load,lr,store,other")
(eq_attr "z196prop" "z196_ends")))
"z196_ends")
(define_insn_reservation "z196_branch" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "branch"))
"z196_ends")
(define_insn_reservation "z196_call" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "jsr"))
"z196_ends")
(define_insn_reservation "z196_mul_hi" 10
(and (eq_attr "cpu" "z196")
(eq_attr "type" "imulhi"))
"z196_simple")
(define_insn_reservation "z196_mul_si" 12
(and (eq_attr "cpu" "z196")
(eq_attr "type" "imulsi"))
"z196_simple")
(define_insn_reservation "z196_mul_di" 14
(and (eq_attr "cpu" "z196")
(eq_attr "type" "imuldi"))
"z196_simple")
(define_insn_reservation "z196_div" 73
(and (eq_attr "cpu" "z196")
(eq_attr "type" "idiv"))
"z196_alone")
(define_insn_reservation "z196_sem" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "sem"))
"z196_crack")
(define_insn_reservation "z196_cs" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "cs"))
"z196_crack")
(define_insn_reservation "z196_vs" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "vs"))
"z196_alone")
(define_insn_reservation "z196_lm_stm" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "stm,lm"))
"z196_crack")
;;
;; Binary Floating Point
;;
(define_insn_reservation "z196_fsimptf" 18
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fsimptf,fhex"))
"z196_alone")
(define_insn_reservation "z196_fmultf" 47
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fmultf"))
"z196_alone")
(define_insn_reservation "z196_fsimpdf" 7
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fsimpdf,fmuldf,fhex"))
"z196_simple")
(define_insn_reservation "z196_fmadddf" 7
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fmadddf"))
"z196_alone")
(define_insn_reservation "z196_fsimpsf" 7
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fsimpsf,fmulsf,fhex"))
"z196_simple")
(define_insn_reservation "z196_fmaddsf" 7
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fmaddsf"))
"z196_alone")
(define_insn_reservation "z196_fdivtf" 108
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fdivtf,fsqrttf"))
"z196_alone")
(define_insn_reservation "z196_fdivdf" 36
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fdivdf,fsqrtdf"))
"z196_simple")
(define_insn_reservation "z196_fdivsf" 29
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fdivsf,fsqrtsf"))
"z196_simple")
;; Loads and stores are cheap as well.
(define_insn_reservation "z196_floaddf" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "floaddf"))
"z196_simple")
(define_insn_reservation "z196_floadsf" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "floadsf"))
"z196_simple")
(define_insn_reservation "z196_fstoredf" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fstoredf"))
"z196_simple")
(define_insn_reservation "z196_fstoresf" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fstoresf"))
"z196_simple")
(define_insn_reservation "z196_ftrunctf" 9
(and (eq_attr "cpu" "z196")
(eq_attr "type" "ftrunctf"))
"z196_simple")
(define_insn_reservation "z196_ftruncdf" 7
(and (eq_attr "cpu" "z196")
(eq_attr "type" "ftruncdf"))
"z196_simple")
(define_insn_reservation "z196_ftoi" 7
(and (eq_attr "cpu" "z196")
(eq_attr "type" "ftoi"))
"z196_crack")
(define_insn_reservation "z196_itof" 7
(and (eq_attr "cpu" "z196")
(eq_attr "type" "itoftf,itofdf,itofsf"))
"z196_crack")
;;
;; Decimal Floating Point
;;
;; DDTR
(define_insn_reservation "z196_fdivdd" 33
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fdivdd"))
"z196_simple")
;; DXTR
(define_insn_reservation "z196_fdivtd" 35
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fdivtd"))
"z196_alone")
;; LEDTR
(define_insn_reservation "z196_ftruncsd" 34
(and (eq_attr "cpu" "z196")
(eq_attr "type" "ftruncsd"))
"z196_simple")
;; LDXTR
(define_insn_reservation "z196_ftruncdd" 36
(and (eq_attr "cpu" "z196")
(eq_attr "type" "ftruncdd"))
"z196_simple")
;; These are normal fp loads/stores - which are cheap.
(define_insn_reservation "z196_floadsddd" 0
(and (eq_attr "cpu" "z196")
(eq_attr "type" "floadsd,floaddd,fstoredd,fstoresd"))
"z196_simple")
;; MDTR
(define_insn_reservation "z196_fmuldd" 23
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fmuldd"))
"z196_simple")
;; MXTR
(define_insn_reservation "z196_fmultd" 25
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fmultd"))
"z196_alone")
;; multiple different isns like add, sub etc.
;; Just use the same defaults as z10.
(define_insn_reservation "z196_fsimpsd" 17
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fsimpsd"))
"z196_simple")
(define_insn_reservation "z196_fsimpdd" 17
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fsimpdd"))
"z196_simple")
(define_insn_reservation "z196_fsimptd" 18
(and (eq_attr "cpu" "z196")
(eq_attr "type" "fsimptd"))
"z196_alone")
;; CDGTR
(define_insn_reservation "z196_itofdd" 45
(and (eq_attr "cpu" "z196")
(eq_attr "type" "itofdd"))
"z196_crack")
;; CXGTR
(define_insn_reservation "z196_itoftd" 33
(and (eq_attr "cpu" "z196")
(eq_attr "type" "itoftd"))
"z196_crack")
;; CGXTR, CGDTR
(define_insn_reservation "z196_ftoidfp" 33
(and (eq_attr "cpu" "z196")
(eq_attr "type" "ftoidfp"))
"z196_crack")

View File

@ -223,6 +223,38 @@ struct processor_costs z10_cost =
COSTS_N_INSNS (71), /* DSGR */
};
static const
struct processor_costs z196_cost =
{
COSTS_N_INSNS (7), /* M */
COSTS_N_INSNS (5), /* MGHI */
COSTS_N_INSNS (5), /* MH */
COSTS_N_INSNS (5), /* MHI */
COSTS_N_INSNS (7), /* ML */
COSTS_N_INSNS (7), /* MR */
COSTS_N_INSNS (6), /* MS */
COSTS_N_INSNS (8), /* MSG */
COSTS_N_INSNS (6), /* MSGF */
COSTS_N_INSNS (6), /* MSGFR */
COSTS_N_INSNS (8), /* MSGR */
COSTS_N_INSNS (6), /* MSR */
COSTS_N_INSNS (1) , /* multiplication in DFmode */
COSTS_N_INSNS (40), /* MXBR B+40 */
COSTS_N_INSNS (100), /* SQXBR B+100 */
COSTS_N_INSNS (42), /* SQDBR B+42 */
COSTS_N_INSNS (28), /* SQEBR B+28 */
COSTS_N_INSNS (1), /* MADBR B */
COSTS_N_INSNS (1), /* MAEBR B */
COSTS_N_INSNS (101), /* DXBR B+101 */
COSTS_N_INSNS (29), /* DDBR */
COSTS_N_INSNS (22), /* DEBR */
COSTS_N_INSNS (160), /* DLGR cracked */
COSTS_N_INSNS (160), /* DLR cracked */
COSTS_N_INSNS (160), /* DR expanded */
COSTS_N_INSNS (160), /* DSGFR cracked */
COSTS_N_INSNS (160), /* DSGR cracked */
};
extern int reload_completed;
/* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
@ -350,8 +382,8 @@ struct GTY(()) machine_function
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
/* That's the read ahead of the dynamic branch prediction unit in
bytes on a z10 CPU. */
#define Z10_PREDICT_DISTANCE 384
bytes on a z10 (or higher) CPU. */
#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
static enum machine_mode
s390_libgcc_cmp_return_mode (void)
@ -1506,7 +1538,9 @@ s390_handle_arch_option (const char *arg,
{"z9-ec", PROCESSOR_2094_Z9_109, PF_IEEE_FLOAT | PF_ZARCH
| PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP },
{"z10", PROCESSOR_2097_Z10, PF_IEEE_FLOAT | PF_ZARCH
| PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10},
| PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10},
{"z196", PROCESSOR_2817_Z196, PF_IEEE_FLOAT | PF_ZARCH
| PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 },
};
size_t i;
@ -1624,6 +1658,8 @@ s390_option_override (void)
break;
case PROCESSOR_2097_Z10:
s390_cost = &z10_cost;
case PROCESSOR_2817_Z196:
s390_cost = &z196_cost;
break;
default:
s390_cost = &z900_cost;
@ -1648,7 +1684,8 @@ s390_option_override (void)
target_flags |= MASK_LONG_DOUBLE_128;
#endif
if (s390_tune == PROCESSOR_2097_Z10)
if (s390_tune == PROCESSOR_2097_Z10
|| s390_tune == PROCESSOR_2817_Z196)
{
if (!PARAM_SET_P (PARAM_MAX_UNROLLED_INSNS))
set_param_value ("max-unrolled-insns", 100);
@ -2782,7 +2819,9 @@ s390_cannot_force_const_mem (rtx x)
operand during and after reload. The difference to
legitimate_constant_p is that this function will not accept
a constant that would need to be forced to the literal pool
before it can be used as operand. */
before it can be used as operand.
This function accepts all constants which can be loaded directly
into a GPR. */
bool
legitimate_reload_constant_p (rtx op)
@ -2836,6 +2875,24 @@ legitimate_reload_constant_p (rtx op)
return false;
}
/* Returns true if the constant value OP is a legitimate fp operand
during and after reload.
This function accepts all constants which can be loaded directly
into an FPR. */
static bool
legitimate_reload_fp_constant_p (rtx op)
{
/* Accept floating-point zero operands if the load zero instruction
can be used. */
if (TARGET_Z196
&& GET_CODE (op) == CONST_DOUBLE
&& s390_float_const_zero_p (op))
return true;
return false;
}
/* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
return the class of reg to actually use. */
@ -2854,8 +2911,10 @@ s390_preferred_reload_class (rtx op, enum reg_class rclass)
else if (reg_class_subset_p (ADDR_REGS, rclass)
&& legitimate_reload_constant_p (op))
return ADDR_REGS;
else
return NO_REGS;
else if (reg_class_subset_p (FP_REGS, rclass)
&& legitimate_reload_fp_constant_p (op))
return FP_REGS;
return NO_REGS;
/* If a symbolic constant or a PLUS is reloaded,
it is most likely being used as an address, so
@ -3218,6 +3277,11 @@ preferred_la_operand_p (rtx op1, rtx op2)
if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
return false;
/* Avoid LA instructions with index register on z196; it is
preferable to use regular add instructions when possible. */
if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
return false;
if (!TARGET_64BIT && !addr.pointer)
return false;
@ -5398,8 +5462,6 @@ s390_agen_dep_p (rtx dep_insn, rtx insn)
A STD instruction should be scheduled earlier,
in order to use the bypass. */
static int
s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
{
@ -5408,7 +5470,8 @@ s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
if (s390_tune != PROCESSOR_2084_Z990
&& s390_tune != PROCESSOR_2094_Z9_109
&& s390_tune != PROCESSOR_2097_Z10)
&& s390_tune != PROCESSOR_2097_Z10
&& s390_tune != PROCESSOR_2817_Z196)
return priority;
switch (s390_safe_attr_type (insn))
@ -5437,6 +5500,7 @@ s390_issue_rate (void)
{
case PROCESSOR_2084_Z990:
case PROCESSOR_2094_Z9_109:
case PROCESSOR_2817_Z196:
return 3;
case PROCESSOR_2097_Z10:
return 2;
@ -9859,13 +9923,13 @@ s390_optimize_prologue (void)
}
}
/* On z10 the dynamic branch prediction must see the backward jump in
a window of 384 bytes. If not it falls back to the static
prediction. This function rearranges the loop backward branch in a
way which makes the static prediction always correct. The function
returns true if it added an instruction. */
/* On z10 and later the dynamic branch prediction must see the
backward jump within a certain windows. If not it falls back to
the static prediction. This function rearranges the loop backward
branch in a way which makes the static prediction always correct.
The function returns true if it added an instruction. */
static bool
s390_z10_fix_long_loop_prediction (rtx insn)
s390_fix_long_loop_prediction (rtx insn)
{
rtx set = single_set (insn);
rtx code_label, label_ref, new_label;
@ -9891,11 +9955,11 @@ s390_z10_fix_long_loop_prediction (rtx insn)
if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
|| INSN_ADDRESSES (INSN_UID (insn)) == -1
|| (INSN_ADDRESSES (INSN_UID (insn))
- INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
- INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
return false;
for (distance = 0, cur_insn = PREV_INSN (insn);
distance < Z10_PREDICT_DISTANCE - 6;
distance < PREDICT_DISTANCE - 6;
distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
return false;
@ -10195,8 +10259,9 @@ s390_reorg (void)
/* Try to optimize prologue and epilogue further. */
s390_optimize_prologue ();
/* Walk over the insns and do some z10 specific changes. */
if (s390_tune == PROCESSOR_2097_Z10)
/* Walk over the insns and do some >=z10 specific changes. */
if (s390_tune == PROCESSOR_2097_Z10
|| s390_tune == PROCESSOR_2817_Z196)
{
rtx insn;
bool insn_added_p = false;
@ -10211,10 +10276,11 @@ s390_reorg (void)
continue;
if (JUMP_P (insn))
insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
insn_added_p |= s390_fix_long_loop_prediction (insn);
if (GET_CODE (PATTERN (insn)) == PARALLEL
|| GET_CODE (PATTERN (insn)) == SET)
if ((GET_CODE (PATTERN (insn)) == PARALLEL
|| GET_CODE (PATTERN (insn)) == SET)
&& s390_tune == PROCESSOR_2097_Z10)
insn_added_p |= s390_z10_optimize_cmp (insn);
}
@ -10360,8 +10426,9 @@ check_dpu (rtx *x, unsigned *mem_count)
}
/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
a new number struct loop *loop should be unrolled if tuned for the z10
cpu. The loop is analyzed for memory accesses by calling check_dpu for
a new number struct loop *loop should be unrolled if tuned for cpus with
a built-in stride prefetcher.
The loop is analyzed for memory accesses by calling check_dpu for
each rtx of the loop. Depending on the loop_depth and the amount of
memory accesses a new number <=nunroll is returned to improve the
behaviour of the hardware prefetch unit. */
@ -10373,8 +10440,7 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
unsigned i;
unsigned mem_count = 0;
/* Only z10 needs special handling. */
if (s390_tune != PROCESSOR_2097_Z10)
if (s390_tune != PROCESSOR_2097_Z10 && s390_tune != PROCESSOR_2817_Z196)
return nunroll;
/* Count the number of memory references within the loop body. */

View File

@ -36,6 +36,7 @@ enum processor_type
PROCESSOR_2084_Z990,
PROCESSOR_2094_Z9_109,
PROCESSOR_2097_Z10,
PROCESSOR_2817_Z196,
PROCESSOR_max
};
@ -48,7 +49,8 @@ enum processor_flags
PF_LONG_DISPLACEMENT = 4,
PF_EXTIMM = 8,
PF_DFP = 16,
PF_Z10 = 32
PF_Z10 = 32,
PF_Z196 = 64
};
extern enum processor_type s390_tune;
@ -77,6 +79,8 @@ extern int s390_arch_flags;
(s390_arch_flags & PF_DFP)
#define TARGET_CPU_Z10 \
(s390_arch_flags & PF_Z10)
#define TARGET_CPU_Z196 \
(s390_arch_flags & PF_Z196)
/* These flags indicate that the generated code should run on a cpu
providing the respective hardware facility when run in
@ -90,6 +94,11 @@ extern int s390_arch_flags;
(TARGET_ZARCH && TARGET_CPU_DFP && TARGET_HARD_FLOAT)
#define TARGET_Z10 \
(TARGET_ZARCH && TARGET_CPU_Z10)
#define TARGET_Z196 \
(TARGET_ZARCH && TARGET_CPU_Z196)
#define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196)
/* Run-time target specification. */

File diff suppressed because it is too large Load Diff