i386.c (athlon_cost): Fix lea, divide and XFmode move costs.
* i386.c (athlon_cost): Fix lea, divide and XFmode move costs. (x86_integer_DFmode_moves, x86_partial_reg_dependency, x86_memory_mismatch_stall): New global variables. (ix86_adjust_cost): Handle MEMORY_BOTH on places MEMORY_STORE was only alloved; fix load penalties for Athlon. * i386.h (x86_integer_DFmode_moves, x86_partial_reg_dependency, x86_memory_mismatch_stall): Declare. (TARGET_INTEGER_DFMODE_MOVES, TARGET_PARTIAL_REG_DEPENDENCY, TARGET_MEMORY_MISMATCH_STALL): New. * i386.md (athlon scheduling parameters): Fix latencies according to Athlon Optimization Manual. (sahf, xchg, fldcw, leave instruction patterns): Set athlon_decode to vector. (fsqrt instruction patterns): Set athlon_decode to direct. (movhi_1): Promote for TARGET_PARTIAL_REG_DEPENDENCY and for PARTIAL_REGISTER_STALL with !TARGET_HIMODE_MATH machines. (movqi_1): Handle promoting correctly for TARGET_PARTIAL_REG_DEPENDENCY and TARGET_PARTIAL_REGISTER_STALL machines. (pushdf_nointeger): New pattern. (pushdf_integer): Rename from pushdf. (movdf_nointger): Enable for !TARGET_INTEGER_DFMODE_MOVES machines. (movdf_intger): Disable for !TARGET_INTEGER_DFMODE_MOVES machines. From-SVN: r33215
This commit is contained in:
parent
10f1359422
commit
0b5107cf3a
@ -1,3 +1,28 @@
|
||||
Mon Apr 17 23:35:29 MET DST 2000 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* i386.c (athlon_cost): Fix lea, divide and XFmode move costs.
|
||||
(x86_integer_DFmode_moves, x86_partial_reg_dependency,
|
||||
x86_memory_mismatch_stall): New global variables.
|
||||
(ix86_adjust_cost): Handle MEMORY_BOTH on places MEMORY_STORE was only
|
||||
alloved; fix load penalties for Athlon.
|
||||
* i386.h (x86_integer_DFmode_moves, x86_partial_reg_dependency,
|
||||
x86_memory_mismatch_stall): Declare.
|
||||
(TARGET_INTEGER_DFMODE_MOVES, TARGET_PARTIAL_REG_DEPENDENCY,
|
||||
TARGET_MEMORY_MISMATCH_STALL): New.
|
||||
* i386.md (athlon scheduling parameters): Fix latencies according to
|
||||
Athlon Optimization Manual.
|
||||
(sahf, xchg, fldcw, leave instruction patterns): Set athlon_decode to
|
||||
vector.
|
||||
(fsqrt instruction patterns): Set athlon_decode to direct.
|
||||
(movhi_1): Promote for TARGET_PARTIAL_REG_DEPENDENCY and for
|
||||
PARTIAL_REGISTER_STALL with !TARGET_HIMODE_MATH machines.
|
||||
(movqi_1): Handle promoting correctly for TARGET_PARTIAL_REG_DEPENDENCY
|
||||
and TARGET_PARTIAL_REGISTER_STALL machines.
|
||||
(pushdf_nointeger): New pattern.
|
||||
(pushdf_integer): Rename from pushdf.
|
||||
(movdf_nointger): Enable for !TARGET_INTEGER_DFMODE_MOVES machines.
|
||||
(movdf_intger): Disable for !TARGET_INTEGER_DFMODE_MOVES machines.
|
||||
|
||||
2000-04-17 Richard Henderson <rth@cygnus.com>
|
||||
|
||||
* loop.c (canonicalize_condition): Add WANT_REG argument.
|
||||
|
@ -163,12 +163,12 @@ struct processor_costs k6_cost = {
|
||||
|
||||
struct processor_costs athlon_cost = {
|
||||
1, /* cost of an add instruction */
|
||||
1, /* cost of a lea instruction */
|
||||
2, /* cost of a lea instruction */
|
||||
1, /* variable shift costs */
|
||||
1, /* constant shift costs */
|
||||
5, /* cost of starting a multiply */
|
||||
0, /* cost of multiply per each bit set */
|
||||
19, /* cost of a divide/mod */
|
||||
42, /* cost of a divide/mod */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
@ -177,9 +177,9 @@ struct processor_costs athlon_cost = {
|
||||
Relative to reg-reg move (2). */
|
||||
{2, 3, 2}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{6, 6, 6}, /* cost of loading fp registers
|
||||
{6, 6, 20}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 4, 4} /* cost of loading integer registers */
|
||||
{4, 4, 16} /* cost of loading integer registers */
|
||||
};
|
||||
|
||||
struct processor_costs *ix86_cost = &pentium_cost;
|
||||
@ -222,6 +222,9 @@ const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
|
||||
const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
|
||||
const int x86_add_esp_4 = m_ATHLON | m_K6;
|
||||
const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
|
||||
const int x86_integer_DFmode_moves = ~m_ATHLON;
|
||||
const int x86_partial_reg_dependency = m_ATHLON;
|
||||
const int x86_memory_mismatch_stall = m_ATHLON;
|
||||
|
||||
#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
|
||||
|
||||
@ -6287,6 +6290,7 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
|
||||
int cost;
|
||||
{
|
||||
enum attr_type insn_type, dep_insn_type;
|
||||
enum attr_memory memory;
|
||||
rtx set, set2;
|
||||
int dep_insn_code_number;
|
||||
|
||||
@ -6334,7 +6338,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
|
||||
increase the cost here for non-imov insns. */
|
||||
if (dep_insn_type != TYPE_IMOV
|
||||
&& dep_insn_type != TYPE_FMOV
|
||||
&& get_attr_memory (dep_insn) == MEMORY_LOAD)
|
||||
&& ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
|
||||
|| memory == MEMORY_BOTH))
|
||||
cost += 1;
|
||||
|
||||
/* INT->FP conversion is expensive. */
|
||||
@ -6359,7 +6364,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
|
||||
|
||||
/* Since we can't represent delayed latencies of load+operation,
|
||||
increase the cost here for non-imov insns. */
|
||||
if (get_attr_memory (dep_insn) == MEMORY_LOAD)
|
||||
if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
|
||||
|| memory == MEMORY_BOTH)
|
||||
cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
|
||||
|
||||
/* INT->FP conversion is expensive. */
|
||||
@ -6368,19 +6374,15 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
|
||||
break;
|
||||
|
||||
case PROCESSOR_ATHLON:
|
||||
/* Address Generation Interlock cause problems on the Athlon CPU because
|
||||
the loads and stores are done in order so once one load or store has
|
||||
to wait, others must too, so penalize the AGIs slightly by one cycle.
|
||||
We might experiment with this value later. */
|
||||
if (ix86_agi_dependant (insn, dep_insn, insn_type))
|
||||
cost += 1;
|
||||
if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
|
||||
|| memory == MEMORY_BOTH)
|
||||
{
|
||||
if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
|
||||
cost += 2;
|
||||
else
|
||||
cost += 3;
|
||||
}
|
||||
|
||||
/* Since we can't represent delayed latencies of load+operation,
|
||||
increase the cost here for non-imov insns. */
|
||||
if (dep_insn_type != TYPE_IMOV
|
||||
&& dep_insn_type != TYPE_FMOV
|
||||
&& get_attr_memory (dep_insn) == MEMORY_LOAD)
|
||||
cost += 2;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -173,8 +173,9 @@ extern const int x86_use_cltd, x86_read_modify_write;
|
||||
extern const int x86_read_modify, x86_split_long_moves;
|
||||
extern const int x86_promote_QImode, x86_single_stringop;
|
||||
extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
|
||||
extern const int x86_promote_hi_regs;
|
||||
extern const int x86_promote_hi_regs, x86_integer_DFmode_moves;
|
||||
extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
|
||||
extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
|
||||
|
||||
#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
|
||||
#define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
|
||||
@ -206,6 +207,9 @@ extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
|
||||
#define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK)
|
||||
#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK)
|
||||
#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
|
||||
#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK)
|
||||
#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK)
|
||||
#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK)
|
||||
|
||||
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
|
||||
|
||||
|
@ -738,7 +738,7 @@
|
||||
;; communicates with all the execution units seperately instead.
|
||||
|
||||
(define_attr "athlon_decode" "direct,vector"
|
||||
(cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str")
|
||||
(cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
|
||||
(const_string "vector")
|
||||
(and (eq_attr "type" "push")
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
@ -766,7 +766,7 @@
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,str,cld"))
|
||||
(eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
@ -777,12 +777,12 @@
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "imul"))
|
||||
4 0)
|
||||
5 0)
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "idiv"))
|
||||
27 0)
|
||||
42 0)
|
||||
|
||||
(define_function_unit "athlon_muldiv" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
@ -792,56 +792,118 @@
|
||||
(define_function_unit "athlon_muldiv" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "idiv"))
|
||||
27 27)
|
||||
42 42)
|
||||
|
||||
(define_attr "athlon_fpunits" "none,store,mul,add,muladd,all"
|
||||
(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
|
||||
(cond [(eq_attr "type" "fop,fop1,fcmp")
|
||||
(const_string "add")
|
||||
(eq_attr "type" "fmul,fdiv,fpspc,fsgn")
|
||||
(eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
|
||||
(const_string "mul")
|
||||
(and (eq_attr "type" "fmov") (eq_attr "memory" "!none"))
|
||||
(and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
|
||||
(const_string "store")
|
||||
(and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
|
||||
(const_string "any")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(ior (match_operand:SI 1 "register_operand" "")
|
||||
(match_operand 1 "immediate_operand" "")))
|
||||
(const_string "store")
|
||||
(eq_attr "type" "fmov")
|
||||
(const_string "muladd")
|
||||
(eq_attr "type" "fcmov")
|
||||
(const_string "all")]
|
||||
(const_string "muladd")]
|
||||
(const_string "none")))
|
||||
|
||||
;; We use latencies 1 for definitions. This is OK to model colisions
|
||||
;; in execution units. The real latencies are modeled in the "fp" pipeline.
|
||||
|
||||
;; fsin, fcos: 96-192
|
||||
;; fsincos: 107-211
|
||||
;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fpspc"))
|
||||
100 1)
|
||||
|
||||
;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fdiv"))
|
||||
24 1)
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fop,fop1,fmul"))
|
||||
4 1)
|
||||
|
||||
;; XFmode loads are slow.
|
||||
;; XFmode store is slow too (8 cycles), but we don't need to model it, because
|
||||
;; there are no dependent instructions.
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(match_operand:XF 1 "memory_operand" "")))
|
||||
10 1)
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fmov,fsgn"))
|
||||
2 1)
|
||||
|
||||
;; fcmp and ftst instructions
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(eq_attr "athlon_decode" "direct")))
|
||||
3 1)
|
||||
|
||||
;; fcmpi instructions.
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(eq_attr "athlon_decode" "vector")))
|
||||
3 1)
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fcmov"))
|
||||
7 1)
|
||||
|
||||
(define_function_unit "athlon_fp_mul" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "mul,all"))
|
||||
4 1)
|
||||
(eq_attr "athlon_fpunits" "mul"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_fp_add" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "add,all"))
|
||||
4 1)
|
||||
(eq_attr "athlon_fpunits" "add"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_fp_muladd" 2 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "athlon_fpunits" "muladd,mul,add,all")))
|
||||
2 1)
|
||||
|
||||
(define_function_unit "athlon_fp_muladd" 2 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "!fmov")
|
||||
(eq_attr "athlon_fpunits" "muladd,mul,add,all")))
|
||||
4 1)
|
||||
(eq_attr "athlon_fpunits" "muladd,mul,add"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_fp_store" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "store,all"))
|
||||
(eq_attr "athlon_fpunits" "store"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_agu" 3 0
|
||||
;; We don't need to model the Adress Generation Unit, since we don't model
|
||||
;; the re-order buffer yet and thus we never schedule more than three operations
|
||||
;; at time. Later we may want to experiment with MD_SCHED macros modeling the
|
||||
;; decoders independently on the functional units.
|
||||
|
||||
;(define_function_unit "athlon_agu" 3 0
|
||||
; (and (eq_attr "cpu" "athlon")
|
||||
; (and (eq_attr "memory" "!none")
|
||||
; (eq_attr "athlon_fpunits" "none")))
|
||||
; 1 1)
|
||||
|
||||
;; Model load unit to avoid too long sequences of loads. We don't need to
|
||||
;; model store queue, since it is hardly going to be bottleneck.
|
||||
|
||||
(define_function_unit "athlon_load" 2 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(eq_attr "athlon_fpunits" "none")))
|
||||
(eq_attr "memory" "load,both"))
|
||||
1 1)
|
||||
|
||||
|
||||
@ -1255,6 +1317,7 @@
|
||||
""
|
||||
"sahf"
|
||||
[(set_attr "length" "1")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "ppro_uops" "one")])
|
||||
|
||||
;; Pentium Pro can do steps 1 through 3 in one go.
|
||||
@ -1390,6 +1453,7 @@
|
||||
"xchg{l}\\t%1, %0"
|
||||
[(set_attr "type" "imov")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "ppro_uops" "few")])
|
||||
|
||||
(define_expand "movhi"
|
||||
@ -1437,8 +1501,10 @@
|
||||
}"
|
||||
[(set (attr "type")
|
||||
(cond [(and (eq_attr "alternative" "0")
|
||||
(eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
|
||||
(const_int 0)))
|
||||
(ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
|
||||
(const_int 0))
|
||||
(eq (symbol_ref "TARGET_HIMODE_MATH")
|
||||
(const_int 0))))
|
||||
(const_string "imov")
|
||||
(and (eq_attr "alternative" "1,2")
|
||||
(match_operand:HI 1 "aligned_operand" ""))
|
||||
@ -1456,8 +1522,10 @@
|
||||
(match_operand:HI 1 "aligned_operand" ""))
|
||||
(const_string "0")
|
||||
(and (eq_attr "alternative" "0")
|
||||
(eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
|
||||
(const_int 0)))
|
||||
(ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
|
||||
(const_int 0))
|
||||
(eq (symbol_ref "TARGET_HIMODE_MATH")
|
||||
(const_int 0))))
|
||||
(const_string "0")
|
||||
]
|
||||
(const_string "1")))
|
||||
@ -1547,9 +1615,19 @@
|
||||
[(set_attr "type" "pop")
|
||||
(set_attr "length_prefix" "1")])
|
||||
|
||||
;; Situation is quite tricky about when to choose full sized (SImode) move
|
||||
;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
|
||||
;; partial register dependency machines (such as AMD Athlon), where QImode
|
||||
;; moves issue extra dependency and for partial register stalls machines
|
||||
;; that don't use QImode patterns (and QImode move cause stall on the next
|
||||
;; instruction).
|
||||
;;
|
||||
;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
|
||||
;; register stall machines with, where we use QImode instructions, since
|
||||
;; partial register stall can be caused there. Then we use movzx.
|
||||
(define_insn "*movqi_1"
|
||||
[(set (match_operand:QI 0 "nonimmediate_operand" "=q,q,r,?r,m")
|
||||
(match_operand:QI 1 "general_operand" "qn,qm,rn,qm,qn"))]
|
||||
[(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
|
||||
(match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))]
|
||||
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
|
||||
"*
|
||||
{
|
||||
@ -1560,26 +1638,50 @@
|
||||
abort ();
|
||||
return \"movz{bl|x}\\t{%1, %k0|%k0, %1}\";
|
||||
default:
|
||||
if (which_alternative == 2)
|
||||
if (which_alternative == 4 || which_alternative == 3
|
||||
|| (which_alternative == 1 && get_attr_length (insn) == 5)
|
||||
|| (which_alternative == 0
|
||||
&& ((TARGET_PARTIAL_REG_STALL && !TARGET_QIMODE_MATH)
|
||||
|| TARGET_PARTIAL_REG_DEPENDENCY)))
|
||||
return \"mov{l}\\t{%k1, %k0|%k0, %k1}\";
|
||||
else
|
||||
return \"mov{b}\\t{%1, %0|%0, %1}\";
|
||||
}
|
||||
}"
|
||||
[(set (attr "type")
|
||||
(cond [(eq_attr "alternative" "3")
|
||||
(cond [(and (eq_attr "alternative" "3")
|
||||
(ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
|
||||
(const_int 0))
|
||||
(eq (symbol_ref "TARGET_QIMODE_MATH")
|
||||
(const_int 0))))
|
||||
(const_string "imov")
|
||||
(eq_attr "alternative" "3,5")
|
||||
(const_string "imovx")
|
||||
(and (ne (symbol_ref "TARGET_MOVX")
|
||||
(const_int 0))
|
||||
(eq_attr "alternative" "1"))
|
||||
(eq_attr "alternative" "2"))
|
||||
(const_string "imovx")
|
||||
]
|
||||
(const_string "imov")))
|
||||
; There's no place to override just the immediate length
|
||||
(set (attr "length")
|
||||
(cond [(and (eq_attr "type" "imov")
|
||||
(and (eq_attr "alternative" "2")
|
||||
(match_operand:HI 1 "immediate_operand" "")))
|
||||
(and (match_operand:HI 1 "immediate_operand" "")
|
||||
(eq_attr "alternative" "4")))
|
||||
(const_string "5")
|
||||
;; Avoid extra dependency on partial register.
|
||||
(and (eq_attr "type" "imov")
|
||||
(and (eq_attr "alternative" "1")
|
||||
(ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
|
||||
(const_int 0))))
|
||||
(const_string "5")
|
||||
;; Avoid partial register stalls when not using QImode arithmetic
|
||||
(and (eq_attr "type" "imov")
|
||||
(and (eq_attr "alternative" "1")
|
||||
(and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL")
|
||||
(const_int 0))
|
||||
(eq (symbol_ref "TARGET_QIMODE_MATH")
|
||||
(const_int 0)))))
|
||||
(const_string "5")
|
||||
]
|
||||
(const_string "*")))])
|
||||
@ -1904,10 +2006,38 @@
|
||||
;; On the average, pushdf using integers can be still shorter. Allow this
|
||||
;; pattern for optimize_size too.
|
||||
|
||||
(define_insn "*pushdf"
|
||||
(define_insn "*pushdf_nointeger"
|
||||
[(set (match_operand:DF 0 "push_operand" "=<,<,<")
|
||||
(match_operand:DF 1 "general_no_elim_operand" "f,Fo#f,*r#f"))]
|
||||
"!TARGET_INTEGER_DFMODE_MOVES"
|
||||
"*
|
||||
{
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 0:
|
||||
/* %%% We loose REG_DEAD notes for controling pops if we split late. */
|
||||
operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx);
|
||||
operands[2] = stack_pointer_rtx;
|
||||
operands[3] = GEN_INT (8);
|
||||
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
|
||||
return \"sub{l}\\t{%3, %2|%2, %3}\;fstp%z0\\t%y0\";
|
||||
else
|
||||
return \"sub{l}\\t{%3, %2|%2, %3}\;fst%z0\\t%y0\";
|
||||
|
||||
case 1:
|
||||
case 2:
|
||||
return \"#\";
|
||||
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
}"
|
||||
[(set_attr "type" "multi")])
|
||||
|
||||
(define_insn "*pushdf_integer"
|
||||
[(set (match_operand:DF 0 "push_operand" "=<,<")
|
||||
(match_operand:DF 1 "general_no_elim_operand" "f#r,rFo#f"))]
|
||||
""
|
||||
"TARGET_INTEGER_DFMODE_MOVES"
|
||||
"*
|
||||
{
|
||||
switch (which_alternative)
|
||||
@ -1955,7 +2085,7 @@
|
||||
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,f,*r,o")
|
||||
(match_operand:DF 1 "general_operand" "fm,f,G,*roF,F*r"))]
|
||||
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
|
||||
&& optimize_size
|
||||
&& (optimize_size || !TARGET_INTEGER_DFMODE_MOVES)
|
||||
&& (reload_in_progress || reload_completed
|
||||
|| GET_CODE (operands[1]) != CONST_DOUBLE
|
||||
|| memory_operand (operands[0], DFmode))"
|
||||
@ -2002,7 +2132,7 @@
|
||||
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o")
|
||||
(match_operand:DF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))]
|
||||
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
|
||||
&& !optimize_size
|
||||
&& !optimize_size && TARGET_INTEGER_DFMODE_MOVES
|
||||
&& (reload_in_progress || reload_completed
|
||||
|| GET_CODE (operands[1]) != CONST_DOUBLE
|
||||
|| memory_operand (operands[0], DFmode))"
|
||||
@ -2304,7 +2434,8 @@
|
||||
else
|
||||
return \"fxch\\t%0\";
|
||||
}"
|
||||
[(set_attr "type" "fxch")])
|
||||
[(set_attr "type" "fxch")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
|
||||
;; Zero extension instructions
|
||||
|
||||
@ -3202,6 +3333,7 @@
|
||||
"TARGET_80387"
|
||||
"fldcw\\t%0"
|
||||
[(set_attr "length_opcode" "2")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "ppro_uops" "few")])
|
||||
|
||||
;; Conversion between fixed point and floating point.
|
||||
@ -7691,6 +7823,7 @@
|
||||
""
|
||||
"leave"
|
||||
[(set_attr "length" "1")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "ppro_uops" "few")])
|
||||
|
||||
(define_expand "ffssi2"
|
||||
@ -8123,7 +8256,8 @@
|
||||
(sqrt:SF (match_operand:SF 1 "register_operand" "0")))]
|
||||
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
|
||||
"fsqrt"
|
||||
[(set_attr "type" "fpspc")])
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "athlon_decode" "direct")])
|
||||
|
||||
(define_insn "sqrtdf2"
|
||||
[(set (match_operand:DF 0 "register_operand" "=f")
|
||||
@ -8131,7 +8265,8 @@
|
||||
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
|
||||
&& (TARGET_IEEE_FP || flag_fast_math) "
|
||||
"fsqrt"
|
||||
[(set_attr "type" "fpspc")])
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "athlon_decode" "direct")])
|
||||
|
||||
(define_insn "*sqrtextendsfdf2"
|
||||
[(set (match_operand:DF 0 "register_operand" "=f")
|
||||
@ -8139,7 +8274,8 @@
|
||||
(match_operand:SF 1 "register_operand" "0"))))]
|
||||
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
|
||||
"fsqrt"
|
||||
[(set_attr "type" "fpspc")])
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "athlon_decode" "direct")])
|
||||
|
||||
(define_insn "sqrtxf2"
|
||||
[(set (match_operand:XF 0 "register_operand" "=f")
|
||||
@ -8147,7 +8283,8 @@
|
||||
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
|
||||
&& (TARGET_IEEE_FP || flag_fast_math) "
|
||||
"fsqrt"
|
||||
[(set_attr "type" "fpspc")])
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "athlon_decode" "direct")])
|
||||
|
||||
(define_insn "*sqrtextenddfxf2"
|
||||
[(set (match_operand:XF 0 "register_operand" "=f")
|
||||
@ -8155,7 +8292,8 @@
|
||||
(match_operand:DF 1 "register_operand" "0"))))]
|
||||
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
|
||||
"fsqrt"
|
||||
[(set_attr "type" "fpspc")])
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "athlon_decode" "direct")])
|
||||
|
||||
(define_insn "*sqrtextendsfxf2"
|
||||
[(set (match_operand:XF 0 "register_operand" "=f")
|
||||
@ -8163,7 +8301,8 @@
|
||||
(match_operand:SF 1 "register_operand" "0"))))]
|
||||
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
|
||||
"fsqrt"
|
||||
[(set_attr "type" "fpspc")])
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "athlon_decode" "direct")])
|
||||
|
||||
(define_insn "sindf2"
|
||||
[(set (match_operand:DF 0 "register_operand" "=f")
|
||||
|
Loading…
Reference in New Issue
Block a user