i386.h (TARGET_NOCONA): New macro.
2004-04-02 Vladimir Makarov <vmakarov@redhat.com> * config/i386/i386.h (TARGET_NOCONA): New macro. (TARGET_CPU_CPP_BUILTINS): Add code for Nocona. (processor_type): Add PROCESSOR_NOCONA. * config/i386/i386.md (cpu): Add nocona to the attribute values. * config/i386/i386.c (nocona_cost): New variable. (m_NOCONA): New macro. (x86_push_memory, x86_movx, x86_cmove, x86_deep_branch, x86_branch_hints, x86_use_sahf, x86_single_stringop, x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves, x86_partial_reg_dependency, x86_memory_mismatch_stall, x86_accumulate_outgoing_args, x86_decompose_lea, x86_arch_always_fancy_math_387, x86_sse_partial_reg_dependency, x86_sse_load0_by_pxor, x86_ext_80387_constants, x86_four_jump_limit): (override_options): Add nocona_cost to processor_target_table. Set up PROCESSOR_NOCONA for Nocona entry in processor_alias_table. (incdec_operand): Prevent inc/dec generation for Nocona too. (ix86_issue_rate): Add PROCESSOR_NOCONA. From-SVN: r80353
This commit is contained in:
parent
2e9ceb77be
commit
89c43c0a0b
@ -1,3 +1,26 @@
|
||||
2004-04-02 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* config/i386/i386.h (TARGET_NOCONA): New macro.
|
||||
(TARGET_CPU_CPP_BUILTINS): Add code for Nocona.
|
||||
(processor_type): Add PROCESSOR_NOCONA.
|
||||
|
||||
* config/i386/i386.md (cpu): Add nocona to the attribute values.
|
||||
|
||||
* config/i386/i386.c (nocona_cost): New variable.
|
||||
(m_NOCONA): New macro.
|
||||
(x86_push_memory, x86_movx, x86_cmove, x86_deep_branch,
|
||||
x86_branch_hints, x86_use_sahf, x86_single_stringop,
|
||||
x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8,
|
||||
x86_integer_DFmode_moves, x86_partial_reg_dependency,
|
||||
x86_memory_mismatch_stall, x86_accumulate_outgoing_args,
|
||||
x86_decompose_lea, x86_arch_always_fancy_math_387,
|
||||
x86_sse_partial_reg_dependency, x86_sse_load0_by_pxor,
|
||||
x86_ext_80387_constants, x86_four_jump_limit):
|
||||
(override_options): Add nocona_cost to processor_target_table.
|
||||
Set up PROCESSOR_NOCONA for Nocona entry in processor_alias_table.
|
||||
(incdec_operand): Prevent inc/dec generation for Nocona too.
|
||||
(ix86_issue_rate): Add PROCESSOR_NOCONA.
|
||||
|
||||
2004-04-01 Andrew Pinski <pinskia@physics.uc.edu>
|
||||
|
||||
* rtlanal.c (find_reg_note): Manually
|
||||
|
@ -457,6 +457,50 @@ struct processor_costs pentium4_cost = {
|
||||
43, /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
static const
|
||||
struct processor_costs nocona_cost = {
|
||||
1, /* cost of an add instruction */
|
||||
1, /* cost of a lea instruction */
|
||||
1, /* variable shift costs */
|
||||
1, /* constant shift costs */
|
||||
{10, 10, 10, 10, 10}, /* cost of starting a multiply */
|
||||
0, /* cost of multiply per each bit set */
|
||||
{66, 66, 66, 66, 66}, /* cost of a divide/mod */
|
||||
1, /* cost of movsx */
|
||||
1, /* cost of movzx */
|
||||
16, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
3, /* cost of reg,reg fld/fst */
|
||||
{12, 12, 12}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 4, 4}, /* cost of loading integer registers */
|
||||
6, /* cost of moving MMX register */
|
||||
{12, 12}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{12, 12}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
6, /* cost of moving SSE register */
|
||||
{12, 12, 12}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{12, 12, 12}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
8, /* MMX or SSE register to integer */
|
||||
128, /* size of prefetch block */
|
||||
8, /* number of parallel prefetches */
|
||||
1, /* Branch cost */
|
||||
6, /* cost of FADD and FSUB insns. */
|
||||
8, /* cost of FMUL instruction. */
|
||||
40, /* cost of FDIV instruction. */
|
||||
3, /* cost of FABS instruction. */
|
||||
3, /* cost of FCHS instruction. */
|
||||
44, /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
const struct processor_costs *ix86_cost = &pentium_cost;
|
||||
|
||||
/* Processor feature/optimization bitmasks. */
|
||||
@ -469,19 +513,20 @@ const struct processor_costs *ix86_cost = &pentium_cost;
|
||||
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
|
||||
#define m_K8 (1<<PROCESSOR_K8)
|
||||
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
|
||||
#define m_NOCONA (1<<PROCESSOR_NOCONA)
|
||||
|
||||
const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
|
||||
const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
|
||||
const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_zero_extend_with_and = m_486 | m_PENT;
|
||||
const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
|
||||
const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
|
||||
const int x86_double_with_add = ~m_386;
|
||||
const int x86_use_bit_test = m_386;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
|
||||
const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
|
||||
const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_3dnow_a = m_ATHLON_K8;
|
||||
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
|
||||
const int x86_branch_hints = m_PENT4;
|
||||
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
|
||||
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_branch_hints = m_PENT4 | m_NOCONA;
|
||||
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
|
||||
const int x86_partial_reg_stall = m_PPRO;
|
||||
const int x86_use_loop = m_K6;
|
||||
const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
|
||||
@ -492,25 +537,25 @@ const int x86_read_modify = ~(m_PENT | m_PPRO);
|
||||
const int x86_split_long_moves = m_PPRO;
|
||||
const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
|
||||
const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
|
||||
const int x86_single_stringop = m_386 | m_PENT4;
|
||||
const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
|
||||
const int x86_qimode_math = ~(0);
|
||||
const int x86_promote_qi_regs = 0;
|
||||
const int x86_himode_math = ~(m_PPRO);
|
||||
const int x86_promote_hi_regs = m_PPRO;
|
||||
const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
|
||||
const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
|
||||
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
|
||||
const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
|
||||
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
|
||||
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
|
||||
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
|
||||
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
|
||||
const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
|
||||
const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
|
||||
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
|
||||
const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
|
||||
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
|
||||
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
|
||||
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
|
||||
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
|
||||
const int x86_decompose_lea = m_PENT4;
|
||||
const int x86_decompose_lea = m_PENT4 | m_NOCONA;
|
||||
const int x86_shift1 = ~m_486;
|
||||
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
|
||||
const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
|
||||
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
|
||||
/* Set for machines where the type and dependencies are resolved on SSE register
|
||||
parts instead of whole registers, so we may maintain just lower part of
|
||||
scalar values in proper format leaving the upper part undefined. */
|
||||
@ -519,14 +564,14 @@ const int x86_sse_partial_regs = m_ATHLON_K8;
|
||||
need for extra instructions beforehand */
|
||||
const int x86_sse_partial_regs_for_cvtsd2ss = 0;
|
||||
const int x86_sse_typeless_stores = m_ATHLON_K8;
|
||||
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
|
||||
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
|
||||
const int x86_use_ffreep = m_ATHLON_K8;
|
||||
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
|
||||
const int x86_inter_unit_moves = ~(m_ATHLON_K8);
|
||||
const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
|
||||
const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
|
||||
/* Some CPU cores are not able to predict more than 4 branch instructions in
|
||||
the 16 byte window. */
|
||||
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4;
|
||||
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
|
||||
/* In case the average insn count for single function invocation is
|
||||
lower than this constant, emit fast (but longer) prologue and
|
||||
@ -1025,6 +1070,7 @@ static void init_ext_80387_constants (void);
|
||||
#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
|
||||
/* The svr4 ABI for the i386 says that records and unions are returned
|
||||
in memory. */
|
||||
@ -1068,7 +1114,8 @@ override_options (void)
|
||||
{&k6_cost, 0, 0, 32, 7, 32, 7, 32},
|
||||
{&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
|
||||
{&k8_cost, 0, 0, 16, 7, 16, 7, 16}
|
||||
{&k8_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
|
||||
};
|
||||
|
||||
static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
|
||||
@ -1109,10 +1156,10 @@ override_options (void)
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
{"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"k6", PROCESSOR_K6, PTA_MMX},
|
||||
{"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
|
||||
{"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
|
||||
@ -3771,7 +3818,7 @@ incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
|
||||
{
|
||||
/* On Pentium4, the inc and dec operations causes extra dependency on flag
|
||||
registers, since carry flag is not set. */
|
||||
if (TARGET_PENTIUM4 && !optimize_size)
|
||||
if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
|
||||
return 0;
|
||||
return op == const1_rtx || op == constm1_rtx;
|
||||
}
|
||||
@ -12074,6 +12121,7 @@ ix86_issue_rate (void)
|
||||
case PROCESSOR_PENTIUM4:
|
||||
case PROCESSOR_ATHLON:
|
||||
case PROCESSOR_K8:
|
||||
case PROCESSOR_NOCONA:
|
||||
return 3;
|
||||
|
||||
default:
|
||||
|
@ -219,6 +219,7 @@ extern int target_flags;
|
||||
#define TARGET_PENTIUM4 (ix86_tune == PROCESSOR_PENTIUM4)
|
||||
#define TARGET_K8 (ix86_tune == PROCESSOR_K8)
|
||||
#define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
|
||||
#define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
|
||||
|
||||
#define TUNEMASK (1 << ix86_tune)
|
||||
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
|
||||
@ -606,6 +607,8 @@ extern int x86_prefetch_sse;
|
||||
builtin_define ("__tune_k8__"); \
|
||||
else if (TARGET_PENTIUM4) \
|
||||
builtin_define ("__tune_pentium4__"); \
|
||||
else if (TARGET_NOCONA) \
|
||||
builtin_define ("__tune_nocona__"); \
|
||||
\
|
||||
if (TARGET_MMX) \
|
||||
builtin_define ("__MMX__"); \
|
||||
@ -674,6 +677,11 @@ extern int x86_prefetch_sse;
|
||||
builtin_define ("__pentium4"); \
|
||||
builtin_define ("__pentium4__"); \
|
||||
} \
|
||||
else if (ix86_arch == PROCESSOR_NOCONA) \
|
||||
{ \
|
||||
builtin_define ("__nocona"); \
|
||||
builtin_define ("__nocona__"); \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
@ -2944,6 +2952,7 @@ enum processor_type
|
||||
PROCESSOR_ATHLON,
|
||||
PROCESSOR_PENTIUM4,
|
||||
PROCESSOR_K8,
|
||||
PROCESSOR_NOCONA,
|
||||
PROCESSOR_max
|
||||
};
|
||||
|
||||
|
@ -149,7 +149,7 @@
|
||||
|
||||
;; Processor type. This attribute must exactly match the processor_type
|
||||
;; enumeration in i386.h.
|
||||
(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8"
|
||||
(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona"
|
||||
(const (symbol_ref "ix86_tune")))
|
||||
|
||||
;; A basic instruction type. Refinements due to arguments to be
|
||||
@ -13330,7 +13330,7 @@
|
||||
[(const_int 0)]
|
||||
{
|
||||
ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
|
||||
operands[3], operands[4], NULL_RTX);
|
||||
operands[3], operands[4], NULL_RTX);
|
||||
DONE;
|
||||
})
|
||||
|
||||
@ -13351,7 +13351,7 @@
|
||||
(match_dup 4)))]
|
||||
{
|
||||
ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
|
||||
operands[3], operands[4], operands[5]);
|
||||
operands[3], operands[4], operands[5]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user