i386.md (cpu attribute): Add "athlon".
* i386.md (cpu attribute): Add "athlon". (athlon_decode): New attribute. (Athlon scheduling units definitions): New. (fcmp and shld patterns): Set athlon_decode to "vector". * i386.c (athlon_cost): New. (m_ATHLON): New. (x86_use_leave, x86_push_memory, x86_movx, x86_cmove, x86_deep_branch, x86_use_sahf): Set for Athlon. (x86_use_fiop): Unset for Athlon. (override_options): Define Athlon alignments and "athlon" name. (x86_adjust_cost): Penalize AGI and delayed latencies for Athlon. * i386.h (TARGET_ATHLON): New. (enum processor_type): Add PROCESSOR_ATHLON. (TARGET_CPU_DEFAULT_SPEC): Set to "-D__tune_athlon__" for CPU_DEFAULT==6 (TARGET_CPP_CPU_SPECS): Set -D__tune_athlon__ for Athlon. From-SVN: r30852
This commit is contained in:
parent
009fef522a
commit
309ada50de
|
@ -1,3 +1,22 @@
|
|||
Fri Dec 10 00:52:13 MET 1999 Jan Hubicka <hubicka@freesoft.cz>
|
||||
|
||||
* i386.md (cpu attribute): Add "athlon".
|
||||
(athlon_decode): New attribute.
|
||||
(Athlon scheduling units definitions): New.
|
||||
(fcmp and shld patterns): Set athlon_decode to "vector".
|
||||
* i386.c (athlon_cost): New.
|
||||
(m_ATHLON): New.
|
||||
(x86_use_leave, x86_push_memory, x86_movx, x86_cmove, x86_deep_branch,
|
||||
x86_use_sahf): Set for Athlon.
|
||||
(x86_use_fiop): Unset for Athlon.
|
||||
(override_options): Define Athlon alignments and "athlon" name.
|
||||
(x86_adjust_cost): Penalize AGI and delayed latencies for Athlon.
|
||||
* i386.h (TARGET_ATHLON): New.
|
||||
(enum processor_type): Add PROCESSOR_ATHLON.
|
||||
(TARGET_CPU_DEFAULT_SPEC): Set to "-D__tune_athlon__"
|
||||
for CPU_DEFAULT==5
|
||||
(TARGET_CPP_CPU_SPECS): Set -D__tune_athlon__ for Athlon.
|
||||
|
||||
1999-12-09 Andreas Jaeger <aj@suse.de>
|
||||
|
||||
* loop.c (record_biv): Declare parameter as int.
|
||||
|
|
|
@ -155,6 +155,26 @@ struct processor_costs k6_cost = {
|
|||
{4, 4, 4} /* cost of loading integer registers */
|
||||
};
|
||||
|
||||
struct processor_costs athlon_cost = {
|
||||
1, /* cost of an add instruction */
|
||||
1, /* cost of a lea instruction */
|
||||
1, /* variable shift costs */
|
||||
1, /* constant shift costs */
|
||||
5, /* cost of starting a multiply */
|
||||
0, /* cost of multiply per each bit set */
|
||||
19, /* cost of a divide/mod */
|
||||
8, /* "large" insn */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{4, 5, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{2, 3, 2}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{6, 6, 6}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 4, 4} /* cost of loading integer registers */
|
||||
};
|
||||
|
||||
struct processor_costs *ix86_cost = &pentium_cost;
|
||||
|
||||
/* Processor feature/optimization bitmasks. */
|
||||
|
@ -163,22 +183,23 @@ struct processor_costs *ix86_cost = &pentium_cost;
|
|||
#define m_PENT (1<<PROCESSOR_PENTIUM)
|
||||
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
|
||||
#define m_K6 (1<<PROCESSOR_K6)
|
||||
#define m_ATHLON (1<<PROCESSOR_ATHLON)
|
||||
|
||||
const int x86_use_leave = m_386 | m_K6;
|
||||
const int x86_push_memory = m_386 | m_K6;
|
||||
const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
|
||||
const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
|
||||
const int x86_zero_extend_with_and = m_486 | m_PENT;
|
||||
const int x86_movx = 0 /* m_386 | m_PPRO | m_K6 */;
|
||||
const int x86_movx = m_ATHLON /* m_386 | m_PPRO | m_K6 */;
|
||||
const int x86_double_with_add = ~m_386;
|
||||
const int x86_use_bit_test = m_386;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT;
|
||||
const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
|
||||
const int x86_use_any_reg = m_486;
|
||||
const int x86_cmove = m_PPRO;
|
||||
const int x86_deep_branch = m_PPRO | m_K6;
|
||||
const int x86_use_sahf = m_PPRO | m_K6;
|
||||
const int x86_cmove = m_PPRO | m_ATHLON;
|
||||
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
|
||||
const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
|
||||
const int x86_partial_reg_stall = m_PPRO;
|
||||
const int x86_use_loop = m_K6;
|
||||
const int x86_use_fiop = ~m_PPRO;
|
||||
const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
|
||||
const int x86_use_mov0 = m_K6;
|
||||
const int x86_use_cltd = ~(m_PENT | m_K6);
|
||||
const int x86_read_modify_write = ~m_PENT;
|
||||
|
@ -334,7 +355,8 @@ override_options ()
|
|||
{&i486_cost, 0, 0, 4, 4, 4, 1},
|
||||
{&pentium_cost, 0, 0, -4, -4, -4, 1},
|
||||
{&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
|
||||
{&k6_cost, 0, 0, -5, -5, 4, 1}
|
||||
{&k6_cost, 0, 0, -5, -5, 4, 1},
|
||||
{&athlon_cost, 0, 0, 4, -4, 4, 1}
|
||||
};
|
||||
|
||||
static struct pta
|
||||
|
@ -351,6 +373,7 @@ override_options ()
|
|||
{"i686", PROCESSOR_PENTIUMPRO},
|
||||
{"pentiumpro", PROCESSOR_PENTIUMPRO},
|
||||
{"k6", PROCESSOR_K6},
|
||||
{"athlon", PROCESSOR_ATHLON},
|
||||
};
|
||||
|
||||
int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
|
||||
|
@ -5582,9 +5605,9 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
|
|||
rtx set, set2;
|
||||
int dep_insn_code_number;
|
||||
|
||||
/* We describe no anti or output depenancies. */
|
||||
/* Anti and output depenancies have zero cost on all CPUs. */
|
||||
if (REG_NOTE_KIND (link) != 0)
|
||||
return cost;
|
||||
return 0;
|
||||
|
||||
dep_insn_code_number = recog_memoized (dep_insn);
|
||||
|
||||
|
@ -5660,6 +5683,20 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
|
|||
cost += 5;
|
||||
break;
|
||||
|
||||
case PROCESSOR_ATHLON:
|
||||
/* Address Generation Interlock cause problems on the Athlon CPU because
|
||||
the loads and stores are done in order so once one load or store has
|
||||
to wait, others must too, so penalize the AGIs slightly by one cycle.
|
||||
We might experiment with this value later. */
|
||||
if (ix86_agi_dependant (insn, dep_insn, insn_type))
|
||||
cost += 1;
|
||||
|
||||
/* Since we can't represent delayed latencies of load+operation,
|
||||
increase the cost here for non-imov insns. */
|
||||
if (dep_insn_type != TYPE_IMOV
|
||||
&& dep_insn_type != TYPE_FMOV
|
||||
&& get_attr_memory (dep_insn) == MEMORY_LOAD)
|
||||
cost += 2;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -151,6 +151,7 @@ extern int target_flags;
|
|||
#define TARGET_PENTIUM (ix86_cpu == PROCESSOR_PENTIUM)
|
||||
#define TARGET_PENTIUMPRO (ix86_cpu == PROCESSOR_PENTIUMPRO)
|
||||
#define TARGET_K6 (ix86_cpu == PROCESSOR_K6)
|
||||
#define TARGET_ATHLON (ix86_cpu == PROCESSOR_ATHLON)
|
||||
|
||||
#define CPUMASK (1 << ix86_cpu)
|
||||
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
|
||||
|
@ -245,6 +246,7 @@ enum processor_type
|
|||
PROCESSOR_PENTIUM,
|
||||
PROCESSOR_PENTIUMPRO,
|
||||
PROCESSOR_K6,
|
||||
PROCESSOR_ATHLON,
|
||||
PROCESSOR_max
|
||||
};
|
||||
|
||||
|
@ -326,6 +328,9 @@ extern int ix86_arch;
|
|||
#if TARGET_CPU_DEFAULT == 4
|
||||
#define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__"
|
||||
#endif
|
||||
#if TARGET_CPU_DEFAULT == 5
|
||||
#define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__"
|
||||
#endif
|
||||
#ifndef CPP_CPU_DEFAULT_SPEC
|
||||
#define CPP_CPU_DEFAULT_SPEC "-D__tune_i386__"
|
||||
#endif
|
||||
|
@ -342,11 +347,13 @@ extern int ix86_arch;
|
|||
%{march=pentiumpro|march=i686:-D__pentiumpro -D__pentiumpro__ \
|
||||
%{!mcpu*:-D__tune_pentiumpro__ }}\
|
||||
%{march=k6:-D__k6 -D__k6__ %{!mcpu*:-D__tune_k6__ }}\
|
||||
%{march=athlon:-D__athlon -D__athlon__ %{!mcpu*:-D__tune_athlon__ }}\
|
||||
%{m386|mcpu=i386:-D__tune_i386__ }\
|
||||
%{m486|mcpu=i486:-D__tune_i486__ }\
|
||||
%{mpentium|mcpu=pentium|mcpu=i586:-D__tune_pentium__ }\
|
||||
%{mpentiumpro|mcpu=pentiumpro|mcpu=i686:-D__tune_pentiumpro__ }\
|
||||
%{mcpu=k6:-D__tune_k6__ }\
|
||||
%{mcpu=athlon:-D__tune_athlon__ }\
|
||||
%{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -77,7 +77,7 @@
|
|||
|
||||
;; Processor type. This attribute must exactly match the processor_type
|
||||
;; enumeration in i386.h.
|
||||
(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6"
|
||||
(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon"
|
||||
(const (symbol_ref "ix86_cpu")))
|
||||
|
||||
;; A basic instruction type. Refinements due to arguments to be
|
||||
|
@ -693,6 +693,128 @@
|
|||
(eq_attr "type" "idiv"))
|
||||
17 17)
|
||||
|
||||
;; AMD Athlon Scheduling
|
||||
;;
|
||||
;; The Athlon does contain three pipelined FP units, three integer units and
|
||||
;; three address generation units.
|
||||
;;
|
||||
;; The predecode logic is determining boundaries of instructions in the 64
|
||||
;; byte cache line. So the cache line straddling problem of K6 might be issue
|
||||
;; here as well, but it is not noted in the documentation.
|
||||
;;
|
||||
;; Three DirectPath instructions decoders and only one VectorPath decoder
|
||||
;; is available. They can decode three DirectPath instructions or one VectorPath
|
||||
;; instruction per cycle.
|
||||
;; Decoded macro instructions are then passed to 72 entry instruction control
|
||||
;; unit, that passes
|
||||
;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
|
||||
;;
|
||||
;; The load/store queue unit is not attached to the schedulers but
|
||||
;; communicates with all the execution units seperately instead.
|
||||
|
||||
(define_attr "athlon_decode" "direct,vector"
|
||||
(cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc")
|
||||
(const_string "vector")
|
||||
(and (eq_attr "type" "push")
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(ior (match_operand:XF 0 "memory_operand" "")
|
||||
(match_operand:XF 1 "memory_operand" "")))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
|
||||
(define_function_unit "athlon_vectordec" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_decode" "vector"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_directdec" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_decode" "direct"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_vectordec" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_decode" "direct"))
|
||||
1 1 [(eq_attr "athlon_decode" "vector")])
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "imul"))
|
||||
4 0)
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "idiv"))
|
||||
27 0)
|
||||
|
||||
(define_function_unit "athlon_muldiv" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "imul"))
|
||||
5 0)
|
||||
|
||||
(define_function_unit "athlon_muldiv" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "idiv"))
|
||||
27 27)
|
||||
|
||||
(define_attr "athlon_fpunits" "none,store,mul,add,muladd,all"
|
||||
(cond [(eq_attr "type" "fop,fop1,fcmp")
|
||||
(const_string "add")
|
||||
(eq_attr "type" "fmul,fdiv,fpspc,fsgn")
|
||||
(const_string "mul")
|
||||
(and (eq_attr "type" "fmov") (eq_attr "memory" "!none"))
|
||||
(const_string "store")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(ior (match_operand:SI 1 "register_operand" "")
|
||||
(match_operand 1 "immediate_operand" "")))
|
||||
(const_string "store")
|
||||
(eq_attr "type" "fmov")
|
||||
(const_string "muladd")
|
||||
(eq_attr "type" "fcmov")
|
||||
(const_string "all")]
|
||||
(const_string "none")))
|
||||
|
||||
(define_function_unit "athlon_fp_mul" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "mul,all"))
|
||||
4 1)
|
||||
|
||||
(define_function_unit "athlon_fp_add" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "add,all"))
|
||||
4 1)
|
||||
|
||||
(define_function_unit "athlon_fp_muladd" 2 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "athlon_fpunits" "muladd,mul,add,all")))
|
||||
2 1)
|
||||
|
||||
(define_function_unit "athlon_fp_muladd" 2 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "!fmov")
|
||||
(eq_attr "athlon_fpunits" "muladd,mul,add,all")))
|
||||
4 1)
|
||||
|
||||
(define_function_unit "athlon_fp_store" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "store,all"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_agu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(eq_attr "athlon_fpunits" "none")))
|
||||
1 1)
|
||||
|
||||
|
||||
;; Compare instructions.
|
||||
|
||||
;; All compare insns have expanders that save the operands away without
|
||||
|
@ -1095,7 +1217,8 @@
|
|||
&& FLOAT_MODE_P (GET_MODE (operands[0]))
|
||||
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
|
||||
"* return output_fp_compare (insn, operands, 1, 0);"
|
||||
[(set_attr "type" "fcmp")])
|
||||
[(set_attr "type" "fcmp")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
|
||||
(define_insn "*cmpfp_iu"
|
||||
[(set (reg:CCFPU 17)
|
||||
|
@ -1105,7 +1228,8 @@
|
|||
&& FLOAT_MODE_P (GET_MODE (operands[0]))
|
||||
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
|
||||
"* return output_fp_compare (insn, operands, 1, 1);"
|
||||
[(set_attr "type" "fcmp")])
|
||||
[(set_attr "type" "fcmp")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
|
||||
;; Move instructions.
|
||||
|
||||
|
@ -5440,6 +5564,7 @@
|
|||
[(set_attr "type" "ishift")
|
||||
(set_attr "length_opcode" "3")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "ppro_uops" "few")])
|
||||
|
||||
(define_expand "x86_shift_adj_1"
|
||||
|
|
Loading…
Reference in New Issue