i386.md (cpu attribute): Add "athlon".

* i386.md (cpu attribute): Add "athlon".
	(athlon_decode): New attribute.
	(Athlon scheduling units definitions): New.
	(fcmp and shld patterns): Set athlon_decode to "vector".
	* i386.c (athlon_cost): New.
	(m_ATHLON): New.
	(x86_use_leave, x86_push_memory, x86_movx, x86_cmove, x86_deep_branch,
	x86_use_sahf): Set for Athlon.
	(x86_use_fiop): Unset for Athlon.
	(override_options): Define Athlon alignments and "athlon" name.
	(x86_adjust_cost): Penalize AGI and delayed latencies for Athlon.
	* i386.h (TARGET_ATHLON): New.
	(enum processor_type): Add PROCESSOR_ATHLON.
	(TARGET_CPU_DEFAULT_SPEC): Set to "-D__tune_athlon__"
	for CPU_DEFAULT==6
	(TARGET_CPP_CPU_SPECS): Set -D__tune_athlon__ for Athlon.

From-SVN: r30852
This commit is contained in:
Jan Hubicka 1999-12-10 00:54:58 +01:00 committed by Jan Hubicka
parent 009fef522a
commit 309ada50de
4 changed files with 201 additions and 13 deletions

View File

@ -1,3 +1,22 @@
Fri Dec 10 00:52:13 MET 1999 Jan Hubicka <hubicka@freesoft.cz>
* i386.md (cpu attribute): Add "athlon".
(athlon_decode): New attribute.
(Athlon scheduling units definitions): New.
(fcmp and shld patterns): Set athlon_decode to "vector".
* i386.c (athlon_cost): New.
(m_ATHLON): New.
(x86_use_leave, x86_push_memory, x86_movx, x86_cmove, x86_deep_branch,
x86_use_sahf): Set for Athlon.
(x86_use_fiop): Unset for Athlon.
(override_options): Define Athlon alignments and "athlon" name.
(x86_adjust_cost): Penalize AGI and delayed latencies for Athlon.
* i386.h (TARGET_ATHLON): New.
(enum processor_type): Add PROCESSOR_ATHLON.
(TARGET_CPU_DEFAULT_SPEC): Set to "-D__tune_athlon__"
for CPU_DEFAULT==5
(TARGET_CPP_CPU_SPECS): Set -D__tune_athlon__ for Athlon.
1999-12-09 Andreas Jaeger <aj@suse.de>
* loop.c (record_biv): Declare parameter as int.

View File

@ -155,6 +155,26 @@ struct processor_costs k6_cost = {
{4, 4, 4} /* cost of loading integer registers */
};
struct processor_costs athlon_cost = {
1, /* cost of an add instruction */
1, /* cost of a lea instruction */
1, /* variable shift costs */
1, /* constant shift costs */
5, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
19, /* cost of a divide/mod */
8, /* "large" insn */
4, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */
4, /* cost of reg,reg fld/fst */
{6, 6, 6}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{4, 4, 4} /* cost of loading integer registers */
};
struct processor_costs *ix86_cost = &pentium_cost;
/* Processor feature/optimization bitmasks. */
@ -163,22 +183,23 @@ struct processor_costs *ix86_cost = &pentium_cost;
#define m_PENT (1<<PROCESSOR_PENTIUM)
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
#define m_K6 (1<<PROCESSOR_K6)
#define m_ATHLON (1<<PROCESSOR_ATHLON)
const int x86_use_leave = m_386 | m_K6;
const int x86_push_memory = m_386 | m_K6;
const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
const int x86_zero_extend_with_and = m_486 | m_PENT;
const int x86_movx = 0 /* m_386 | m_PPRO | m_K6 */;
const int x86_movx = m_ATHLON /* m_386 | m_PPRO | m_K6 */;
const int x86_double_with_add = ~m_386;
const int x86_use_bit_test = m_386;
const int x86_unroll_strlen = m_486 | m_PENT;
const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
const int x86_use_any_reg = m_486;
const int x86_cmove = m_PPRO;
const int x86_deep_branch = m_PPRO | m_K6;
const int x86_use_sahf = m_PPRO | m_K6;
const int x86_cmove = m_PPRO | m_ATHLON;
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
const int x86_partial_reg_stall = m_PPRO;
const int x86_use_loop = m_K6;
const int x86_use_fiop = ~m_PPRO;
const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
const int x86_use_mov0 = m_K6;
const int x86_use_cltd = ~(m_PENT | m_K6);
const int x86_read_modify_write = ~m_PENT;
@ -334,7 +355,8 @@ override_options ()
{&i486_cost, 0, 0, 4, 4, 4, 1},
{&pentium_cost, 0, 0, -4, -4, -4, 1},
{&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
{&k6_cost, 0, 0, -5, -5, 4, 1}
{&k6_cost, 0, 0, -5, -5, 4, 1},
{&athlon_cost, 0, 0, 4, -4, 4, 1}
};
static struct pta
@ -351,6 +373,7 @@ override_options ()
{"i686", PROCESSOR_PENTIUMPRO},
{"pentiumpro", PROCESSOR_PENTIUMPRO},
{"k6", PROCESSOR_K6},
{"athlon", PROCESSOR_ATHLON},
};
int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
@ -5582,9 +5605,9 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
rtx set, set2;
int dep_insn_code_number;
/* We describe no anti or output depenancies. */
/* Anti and output depenancies have zero cost on all CPUs. */
if (REG_NOTE_KIND (link) != 0)
return cost;
return 0;
dep_insn_code_number = recog_memoized (dep_insn);
@ -5660,6 +5683,20 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
cost += 5;
break;
case PROCESSOR_ATHLON:
/* Address Generation Interlock cause problems on the Athlon CPU because
the loads and stores are done in order so once one load or store has
to wait, others must too, so penalize the AGIs slightly by one cycle.
We might experiment with this value later. */
if (ix86_agi_dependant (insn, dep_insn, insn_type))
cost += 1;
/* Since we can't represent delayed latencies of load+operation,
increase the cost here for non-imov insns. */
if (dep_insn_type != TYPE_IMOV
&& dep_insn_type != TYPE_FMOV
&& get_attr_memory (dep_insn) == MEMORY_LOAD)
cost += 2;
default:
break;
}

View File

@ -151,6 +151,7 @@ extern int target_flags;
#define TARGET_PENTIUM (ix86_cpu == PROCESSOR_PENTIUM)
#define TARGET_PENTIUMPRO (ix86_cpu == PROCESSOR_PENTIUMPRO)
#define TARGET_K6 (ix86_cpu == PROCESSOR_K6)
#define TARGET_ATHLON (ix86_cpu == PROCESSOR_ATHLON)
#define CPUMASK (1 << ix86_cpu)
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
@ -245,6 +246,7 @@ enum processor_type
PROCESSOR_PENTIUM,
PROCESSOR_PENTIUMPRO,
PROCESSOR_K6,
PROCESSOR_ATHLON,
PROCESSOR_max
};
@ -326,6 +328,9 @@ extern int ix86_arch;
#if TARGET_CPU_DEFAULT == 4
#define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__"
#endif
#if TARGET_CPU_DEFAULT == 5
#define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__"
#endif
#ifndef CPP_CPU_DEFAULT_SPEC
#define CPP_CPU_DEFAULT_SPEC "-D__tune_i386__"
#endif
@ -342,11 +347,13 @@ extern int ix86_arch;
%{march=pentiumpro|march=i686:-D__pentiumpro -D__pentiumpro__ \
%{!mcpu*:-D__tune_pentiumpro__ }}\
%{march=k6:-D__k6 -D__k6__ %{!mcpu*:-D__tune_k6__ }}\
%{march=athlon:-D__athlon -D__athlon__ %{!mcpu*:-D__tune_athlon__ }}\
%{m386|mcpu=i386:-D__tune_i386__ }\
%{m486|mcpu=i486:-D__tune_i486__ }\
%{mpentium|mcpu=pentium|mcpu=i586:-D__tune_pentium__ }\
%{mpentiumpro|mcpu=pentiumpro|mcpu=i686:-D__tune_pentiumpro__ }\
%{mcpu=k6:-D__tune_k6__ }\
%{mcpu=athlon:-D__tune_athlon__ }\
%{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}"
#endif

View File

@ -77,7 +77,7 @@
;; Processor type. This attribute must exactly match the processor_type
;; enumeration in i386.h.
(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6"
(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon"
(const (symbol_ref "ix86_cpu")))
;; A basic instruction type. Refinements due to arguments to be
@ -693,6 +693,128 @@
(eq_attr "type" "idiv"))
17 17)
;; AMD Athlon Scheduling
;;
;; The Athlon does contain three pipelined FP units, three integer units and
;; three address generation units.
;;
;; The predecode logic is determining boundaries of instructions in the 64
;; byte cache line. So the cache line straddling problem of K6 might be issue
;; here as well, but it is not noted in the documentation.
;;
;; Three DirectPath instructions decoders and only one VectorPath decoder
;; is available. They can decode three DirectPath instructions or one VectorPath
;; instruction per cycle.
;; Decoded macro instructions are then passed to 72 entry instruction control
;; unit, that passes
;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
;;
;; The load/store queue unit is not attached to the schedulers but
;; communicates with all the execution units seperately instead.
(define_attr "athlon_decode" "direct,vector"
(cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc")
(const_string "vector")
(and (eq_attr "type" "push")
(match_operand 1 "memory_operand" ""))
(const_string "vector")
(and (eq_attr "type" "fmov")
(ior (match_operand:XF 0 "memory_operand" "")
(match_operand:XF 1 "memory_operand" "")))
(const_string "vector")]
(const_string "direct")))
(define_function_unit "athlon_vectordec" 1 0
(and (eq_attr "cpu" "athlon")
(eq_attr "athlon_decode" "vector"))
1 1)
(define_function_unit "athlon_directdec" 3 0
(and (eq_attr "cpu" "athlon")
(eq_attr "athlon_decode" "direct"))
1 1)
(define_function_unit "athlon_vectordec" 1 0
(and (eq_attr "cpu" "athlon")
(eq_attr "athlon_decode" "direct"))
1 1 [(eq_attr "athlon_decode" "vector")])
(define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov"))
1 1)
(define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "imul"))
4 0)
(define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "idiv"))
27 0)
(define_function_unit "athlon_muldiv" 1 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "imul"))
5 0)
(define_function_unit "athlon_muldiv" 1 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "idiv"))
27 27)
(define_attr "athlon_fpunits" "none,store,mul,add,muladd,all"
(cond [(eq_attr "type" "fop,fop1,fcmp")
(const_string "add")
(eq_attr "type" "fmul,fdiv,fpspc,fsgn")
(const_string "mul")
(and (eq_attr "type" "fmov") (eq_attr "memory" "!none"))
(const_string "store")
(and (eq_attr "type" "fmov")
(ior (match_operand:SI 1 "register_operand" "")
(match_operand 1 "immediate_operand" "")))
(const_string "store")
(eq_attr "type" "fmov")
(const_string "muladd")
(eq_attr "type" "fcmov")
(const_string "all")]
(const_string "none")))
(define_function_unit "athlon_fp_mul" 1 0
(and (eq_attr "cpu" "athlon")
(eq_attr "athlon_fpunits" "mul,all"))
4 1)
(define_function_unit "athlon_fp_add" 1 0
(and (eq_attr "cpu" "athlon")
(eq_attr "athlon_fpunits" "add,all"))
4 1)
(define_function_unit "athlon_fp_muladd" 2 0
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
(eq_attr "athlon_fpunits" "muladd,mul,add,all")))
2 1)
(define_function_unit "athlon_fp_muladd" 2 0
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "!fmov")
(eq_attr "athlon_fpunits" "muladd,mul,add,all")))
4 1)
(define_function_unit "athlon_fp_store" 1 0
(and (eq_attr "cpu" "athlon")
(eq_attr "athlon_fpunits" "store,all"))
1 1)
(define_function_unit "athlon_agu" 3 0
(and (eq_attr "cpu" "athlon")
(and (eq_attr "memory" "!none")
(eq_attr "athlon_fpunits" "none")))
1 1)
;; Compare instructions.
;; All compare insns have expanders that save the operands away without
@ -1095,7 +1217,8 @@
&& FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
[(set_attr "type" "fcmp")])
[(set_attr "type" "fcmp")
(set_attr "athlon_decode" "vector")])
(define_insn "*cmpfp_iu"
[(set (reg:CCFPU 17)
@ -1105,7 +1228,8 @@
&& FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
[(set_attr "type" "fcmp")])
[(set_attr "type" "fcmp")
(set_attr "athlon_decode" "vector")])
;; Move instructions.
@ -5440,6 +5564,7 @@
[(set_attr "type" "ishift")
(set_attr "length_opcode" "3")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")])
(define_expand "x86_shift_adj_1"