Fix for PR51109

From-SVN: r192296
This commit is contained in:
Ganesh Gopalasubramanian 2012-10-10 09:19:11 +00:00 committed by Venkataramanan Kumar
parent d8cfe70799
commit 85e02264cb
2 changed files with 25 additions and 13 deletions

View File

@ -1,3 +1,9 @@
2012-10-10 Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>
PR target/51109
* gcc/config/i386/bdver1.md (bdver1_int): Automaton has been
split to reduce state transitions.
2012-10-10 Richard Biener <rguenther@suse.de>
PR middle-end/54876

View File

@ -36,7 +36,7 @@
(define_attr "bdver1_decode" "direct,vector,double"
(const_string "direct"))
(define_automaton "bdver1,bdver1_int,bdver1_load,bdver1_mult,bdver1_fp")
(define_automaton "bdver1,bdver1_ieu,bdver1_load,bdver1_fp,bdver1_agu")
(define_cpu_unit "bdver1-decode0" "bdver1")
(define_cpu_unit "bdver1-decode1" "bdver1")
@ -71,12 +71,12 @@
| (nothing,(bdver1-decode1 + bdver1-decode2)))")
(define_cpu_unit "bdver1-ieu0" "bdver1_int")
(define_cpu_unit "bdver1-ieu1" "bdver1_int")
(define_cpu_unit "bdver1-ieu0" "bdver1_ieu")
(define_cpu_unit "bdver1-ieu1" "bdver1_ieu")
(define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)")
(define_cpu_unit "bdver1-agu0" "bdver1_int")
(define_cpu_unit "bdver1-agu1" "bdver1_int")
(define_cpu_unit "bdver1-agu0" "bdver1_agu")
(define_cpu_unit "bdver1-agu1" "bdver1_agu")
(define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)")
(define_cpu_unit "bdver1-mult" "bdver1_mult")
@ -93,6 +93,12 @@
;; 128bit SSE instructions issue two stores at once.
(define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)")
;; vectorpath (microcoded) instructions are single issue instructions.
;; So, they occupy all the integer units.
(define_reservation "bdver1-ivector" "bdver1-ieu0+bdver1-ieu1+
bdver1-agu0+bdver1-agu1+
bdver1-load0+bdver1-load1")
;; The FP operations start to execute at stage 12 in the pipeline, while
;; integer operations start to execute at stage 9 for athlon and 11 for K8
;; Compensate the difference for athlon because it results in significantly
@ -125,7 +131,7 @@
(define_insn_reservation "bdver1_call" 0
(and (eq_attr "cpu" "bdver1,bdver2")
(eq_attr "type" "call,callv"))
"bdver1-double,bdver1-agu,bdver1-ieu")
"bdver1-double,bdver1-agu")
;; PUSH mem is double path.
(define_insn_reservation "bdver1_push" 1
(and (eq_attr "cpu" "bdver1,bdver2")
@ -135,17 +141,17 @@
(define_insn_reservation "bdver1_pop" 1
(and (eq_attr "cpu" "bdver1,bdver2")
(eq_attr "type" "pop"))
"bdver1-direct,(bdver1-ieu+bdver1-load)")
"bdver1-direct,bdver1-ivector")
;; LEAVE no latency info so far, assume same with amdfam10.
(define_insn_reservation "bdver1_leave" 3
(and (eq_attr "cpu" "bdver1,bdver2")
(eq_attr "type" "leave"))
"bdver1-vector,(bdver1-ieu+bdver1-load)")
"bdver1-vector,bdver1-ivector")
;; LEA executes in AGU unit with 1 cycle latency on BDVER1.
(define_insn_reservation "bdver1_lea" 1
(and (eq_attr "cpu" "bdver1,bdver2")
(eq_attr "type" "lea"))
"bdver1-direct,bdver1-agu,nothing")
"bdver1-direct,bdver1-agu")
;; MUL executes in special multiplier unit attached to IEU1.
(define_insn_reservation "bdver1_imul_DI" 6
@ -153,23 +159,23 @@
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "none,unknown"))))
"bdver1-direct1,bdver1-ieu1,bdver1-mult,nothing,bdver1-ieu1")
"bdver1-direct1,bdver1-ieu1")
(define_insn_reservation "bdver1_imul" 4
(and (eq_attr "cpu" "bdver1,bdver2")
(and (eq_attr "type" "imul")
(eq_attr "memory" "none,unknown")))
"bdver1-direct1,bdver1-ieu1,bdver1-mult,bdver1-ieu1")
"bdver1-direct1,bdver1-ieu1")
(define_insn_reservation "bdver1_imul_mem_DI" 10
(and (eq_attr "cpu" "bdver1,bdver2")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "load,both"))))
"bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,nothing,bdver1-ieu")
"bdver1-direct1,bdver1-load,bdver1-ieu1")
(define_insn_reservation "bdver1_imul_mem" 8
(and (eq_attr "cpu" "bdver1,bdver2")
(and (eq_attr "type" "imul")
(eq_attr "memory" "load,both")))
"bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,bdver1-ieu")
"bdver1-direct1,bdver1-load,bdver1-ieu1")
;; IDIV cannot execute in parallel with other instructions. Dealing with it
;; as with short latency vector instruction is good approximation avoiding