i386.c (x86_use_leave, [...]): Merge into ...

* config/i386/i386.c (x86_use_leave, x86_push_memory,
	x86_zero_extend_with_and, x86_movx, x86_double_with_add,
	x86_use_bit_test, x86_unroll_strlen, x86_deep_branch,
	x86_branch_hints, x86_use_sahf, x86_partial_reg_stall,
	x86_partial_flag_reg_stall, x86_use_himode_fiop, x86_use_simode_fiop,
	x86_use_mov0, x86_use_cltd, x86_read_modify_write, x86_read_modify,
	x86_split_long_moves, x86_promote_QImode, x86_fast_prefix, 
	x86_single_stringop, x86_qimode_math, x86_promote_qi_regs,
	x86_himode_math, x86_promote_hi_regs, x86_sub_esp_4, x86_sub_esp_8,
	x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves, 
	x86_partial_reg_dependency, x86_memory_mismatch_stall, 
	x86_prologue_using_move, x86_epilogue_using_move, x86_shift1,
	x86_sse_partial_reg_dependency, x86_sse_split_regs, 
	x86_sse_unaligned_move_optimal, x86_sse_typeless_stores,
	x86_sse_load0_by_pxor, x86_use_ffreep, x86_use_incdec,
	x86_inter_unit_moves, x86_ext_80387_constants, x86_four_jump_limit,
	x86_schedule, x86_use_bt, x86_pad_returns): Merge into ...
	(ix86_tune_features): ... here.  New array.
	(x86_cmove, x86_use_xchgb, x86_cmpxchg, x86_cmpxchg8b,	
	x86_xadd, x86_bswap): Merge into ...
	(ix86_arch_features): ... here.  New array.
	(x86_3dnow_a): Remove.
	(x86_accumulate_outgoing_args): Make static.
	(x86_arch_always_fancy_math_387): Make static.
	(ix86_tune_mask, ix86_arch_mask): Move ...
	(override_options): ... to local variables here.  Apply the
	appropriate mask to each element of ix86_arch_features and
	ix86_tune_features.  Adjust TARGET_CMOVE and TARGET_USE_SAHF
	as were done in the old macros.
	(standard_80387_constant_p): Use TARGET_EXT_80387_CONSTANTS.
	* config/i386/i386.h (x86_use_leave, x86_push_memory,
	x86_zero_extend_with_and, x86_use_bit_test, x86_cmove, x86_deep_branch,
	x86_branch_hints, x86_unroll_strlen, x86_double_with_add,
	x86_partial_reg_stall, x86_movx, x86_use_himode_fiop,
	x86_use_simode_fiop, x86_use_mov0, x86_use_cltd, x86_use_xchgb,
	x86_read_modify_write, x86_read_modify, x86_split_long_moves,
	x86_promote_QImode, x86_single_stringop, x86_fast_prefix,
	x86_himode_math, x86_qimode_math, x86_promote_qi_regs,
	x86_promote_hi_regs, x86_integer_DFmode_moves, x86_add_esp_4,
	x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8,
	x86_partial_reg_dependency, x86_memory_mismatch_stall,
	x86_accumulate_outgoing_args, x86_prologue_using_move,
	x86_epilogue_using_move, x86_decompose_lea,
	x86_arch_always_fancy_math_387, x86_shift1,
	x86_sse_partial_reg_dependency, x86_sse_split_regs,
	x86_sse_unaligned_move_optimal, x86_sse_typeless_stores,	
	x86_sse_load0_by_pxor, x86_use_ffreep, x86_inter_unit_moves,
	x86_schedule, x86_use_bt, x86_cmpxchg, x86_cmpxchg8b, x86_xadd,
	x86_use_incdec, x86_pad_returns, x86_bswap,
	x86_partial_flag_reg_stall): Remove.
	(enum ix86_tune_indices): New.
	(ix86_tune_features): New.
	(TARGET_USE_LEAVE, TARGET_PUSH_MEMORY, TARGET_ZERO_EXTEND_WITH_AND,
	TARGET_USE_BIT_TEST, TARGET_UNROLL_STRLEN,
	TARGET_DEEP_BRANCH_PREDICTION, TARGET_BRANCH_PREDICTION_HINTS,
	TARGET_DOUBLE_WITH_ADD, TARGET_USE_SAHF, TARGET_MOVX,
	TARGET_PARTIAL_REG_STALL, TARGET_PARTIAL_FLAG_REG_STALL,
	TARGET_USE_HIMODE_FIOP, TARGET_USE_SIMODE_FIOP, TARGET_USE_MOV0,
	TARGET_USE_CLTD, TARGET_USE_XCHGB, TARGET_SPLIT_LONG_MOVES,
	TARGET_READ_MODIFY_WRITE, TARGET_READ_MODIFY, TARGET_PROMOTE_QImode,
	TARGET_FAST_PREFIX, TARGET_SINGLE_STRINGOP, TARGET_QIMODE_MATH,
	TARGET_HIMODE_MATH, TARGET_PROMOTE_QI_REGS, TARGET_PROMOTE_HI_REGS,
	TARGET_ADD_ESP_4, TARGET_ADD_ESP_8, TARGET_SUB_ESP_4,
	TARGET_SUB_ESP_8, TARGET_INTEGER_DFMODE_MOVES,
	TARGET_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REG_DEPENDENCY,
	TARGET_SSE_UNALIGNED_MOVE_OPTIMAL, TARGET_SSE_SPLIT_REGS,
	TARGET_SSE_TYPELESS_STORES, TARGET_SSE_LOAD0_BY_PXOR,
	TARGET_MEMORY_MISMATCH_STALL, TARGET_PROLOGUE_USING_MOVE,
	TARGET_EPILOGUE_USING_MOVE, TARGET_SHIFT1, TARGET_USE_FFREEP,
	TARGET_INTER_UNIT_MOVES, TARGET_FOUR_JUMP_LIMIT, TARGET_SCHEDULE,
	TARGET_USE_BT, TARGET_USE_INCDEC, TARGET_PAD_RETURNS,
	TARGET_EXT_80387_CONSTANTS): Use it.
	(enum ix86_arch_indices): New.
	(ix86_arch_features): New.
	(TARGET_CMOVE, TARGET_CMPXCHG, TARGET_CMPXCHG8B, TARGET_XADD,
	TARGET_BSWAP): Use it.
	(ix86_tune_mask, ix86_arch_mask): Remove.

From-SVN: r122621
This commit is contained in:
Richard Henderson 2007-03-06 07:59:38 -08:00 committed by Richard Henderson
parent 14da607343
commit 80fd744fda
3 changed files with 510 additions and 283 deletions

View File

@ -1,3 +1,83 @@
2007-03-06 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (x86_use_leave, x86_push_memory,
x86_zero_extend_with_and, x86_movx, x86_double_with_add,
x86_use_bit_test, x86_unroll_strlen, x86_deep_branch,
x86_branch_hints, x86_use_sahf, x86_partial_reg_stall,
x86_partial_flag_reg_stall, x86_use_himode_fiop, x86_use_simode_fiop,
x86_use_mov0, x86_use_cltd, x86_read_modify_write, x86_read_modify,
x86_split_long_moves, x86_promote_QImode, x86_fast_prefix,
x86_single_stringop, x86_qimode_math, x86_promote_qi_regs,
x86_himode_math, x86_promote_hi_regs, x86_sub_esp_4, x86_sub_esp_8,
x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves,
x86_partial_reg_dependency, x86_memory_mismatch_stall,
x86_prologue_using_move, x86_epilogue_using_move, x86_shift1,
x86_sse_partial_reg_dependency, x86_sse_split_regs,
x86_sse_unaligned_move_optimal, x86_sse_typeless_stores,
x86_sse_load0_by_pxor, x86_use_ffreep, x86_use_incdec,
x86_inter_unit_moves, x86_ext_80387_constants, x86_four_jump_limit,
x86_schedule, x86_use_bt, x86_pad_returns): Merge into ...
(ix86_tune_features): ... here. New array.
(x86_cmove, x86_use_xchgb, x86_cmpxchg, x86_cmpxchg8b,
x86_xadd, x86_bswap): Merge into ...
(ix86_arch_features): ... here. New array.
(x86_3dnow_a): Remove.
(x86_accumulate_outgoing_args): Make static.
(x86_arch_always_fancy_math_387): Make static.
(ix86_tune_mask, ix86_arch_mask): Move ...
(override_options): ... to local variables here. Apply the
appropriate mask to each element of ix86_arch_features and
ix86_tune_features. Adjust TARGET_CMOVE and TARGET_USE_SAHF
as were done in the old macros.
(standard_80387_constant_p): Use TARGET_EXT_80387_CONSTANTS.
* config/i386/i386.h (x86_use_leave, x86_push_memory,
x86_zero_extend_with_and, x86_use_bit_test, x86_cmove, x86_deep_branch,
x86_branch_hints, x86_unroll_strlen, x86_double_with_add,
x86_partial_reg_stall, x86_movx, x86_use_himode_fiop,
x86_use_simode_fiop, x86_use_mov0, x86_use_cltd, x86_use_xchgb,
x86_read_modify_write, x86_read_modify, x86_split_long_moves,
x86_promote_QImode, x86_single_stringop, x86_fast_prefix,
x86_himode_math, x86_qimode_math, x86_promote_qi_regs,
x86_promote_hi_regs, x86_integer_DFmode_moves, x86_add_esp_4,
x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8,
x86_partial_reg_dependency, x86_memory_mismatch_stall,
x86_accumulate_outgoing_args, x86_prologue_using_move,
x86_epilogue_using_move, x86_decompose_lea,
x86_arch_always_fancy_math_387, x86_shift1,
x86_sse_partial_reg_dependency, x86_sse_split_regs,
x86_sse_unaligned_move_optimal, x86_sse_typeless_stores,
x86_sse_load0_by_pxor, x86_use_ffreep, x86_inter_unit_moves,
x86_schedule, x86_use_bt, x86_cmpxchg, x86_cmpxchg8b, x86_xadd,
x86_use_incdec, x86_pad_returns, x86_bswap,
x86_partial_flag_reg_stall): Remove.
(enum ix86_tune_indices): New.
(ix86_tune_features): New.
(TARGET_USE_LEAVE, TARGET_PUSH_MEMORY, TARGET_ZERO_EXTEND_WITH_AND,
TARGET_USE_BIT_TEST, TARGET_UNROLL_STRLEN,
TARGET_DEEP_BRANCH_PREDICTION, TARGET_BRANCH_PREDICTION_HINTS,
TARGET_DOUBLE_WITH_ADD, TARGET_USE_SAHF, TARGET_MOVX,
TARGET_PARTIAL_REG_STALL, TARGET_PARTIAL_FLAG_REG_STALL,
TARGET_USE_HIMODE_FIOP, TARGET_USE_SIMODE_FIOP, TARGET_USE_MOV0,
TARGET_USE_CLTD, TARGET_USE_XCHGB, TARGET_SPLIT_LONG_MOVES,
TARGET_READ_MODIFY_WRITE, TARGET_READ_MODIFY, TARGET_PROMOTE_QImode,
TARGET_FAST_PREFIX, TARGET_SINGLE_STRINGOP, TARGET_QIMODE_MATH,
TARGET_HIMODE_MATH, TARGET_PROMOTE_QI_REGS, TARGET_PROMOTE_HI_REGS,
TARGET_ADD_ESP_4, TARGET_ADD_ESP_8, TARGET_SUB_ESP_4,
TARGET_SUB_ESP_8, TARGET_INTEGER_DFMODE_MOVES,
TARGET_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REG_DEPENDENCY,
TARGET_SSE_UNALIGNED_MOVE_OPTIMAL, TARGET_SSE_SPLIT_REGS,
TARGET_SSE_TYPELESS_STORES, TARGET_SSE_LOAD0_BY_PXOR,
TARGET_MEMORY_MISMATCH_STALL, TARGET_PROLOGUE_USING_MOVE,
TARGET_EPILOGUE_USING_MOVE, TARGET_SHIFT1, TARGET_USE_FFREEP,
TARGET_INTER_UNIT_MOVES, TARGET_FOUR_JUMP_LIMIT, TARGET_SCHEDULE,
TARGET_USE_BT, TARGET_USE_INCDEC, TARGET_PAD_RETURNS,
TARGET_EXT_80387_CONSTANTS): Use it.
(enum ix86_arch_indices): New.
(ix86_arch_features): New.
(TARGET_CMOVE, TARGET_CMPXCHG, TARGET_CMPXCHG8B, TARGET_XADD,
TARGET_BSWAP): Use it.
(ix86_tune_mask, ix86_arch_mask): Remove.
2007-03-06 Joseph Myers <joseph@codesourcery.com>
PR bootstrap/31020

View File

@ -1004,187 +1004,221 @@ const struct processor_costs *ix86_cost = &pentium_cost;
(PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
Generic64 seems like good code size tradeoff. We can't enable it for 32bit
generic because it is not working well with PPro base chips. */
const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
| m_GENERIC64;
const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC;
const int x86_zero_extend_with_and = m_486 | m_PENT;
/* Enable to zero extend integer registers to avoid partial dependencies */
const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
const int x86_double_with_add = ~m_386;
const int x86_use_bit_test = m_386;
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
| m_K6 | m_CORE2 | m_GENERIC;
const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
| m_NOCONA;
const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
/* Branch hints were put in P4 based on simulation result. But
after P4 was made, no performance benefit was observed with
branch hints. It also increases the code size. As the result,
icc never generates branch hints. */
const int x86_branch_hints = 0;
const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
/*m_GENERIC | m_ATHLON_K8 ? */
/* We probably ought to watch for partial register stalls on Generic32
compilation setting as well. However in current implementation the
partial register stalls are not eliminated very well - they can
be introduced via subregs synthesized by combine and can happen
in caller/callee saving sequences.
Because this option pays back little on PPro based chips and is in conflict
with partial reg. dependencies used by Athlon/P4 based chips, it is better
to leave it off for generic32 for now. */
const int x86_partial_reg_stall = m_PPRO;
const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
| m_CORE2 | m_GENERIC);
const int x86_use_mov0 = m_K6;
const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
/* Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
const int x86_use_xchgb = m_PENT4;
const int x86_read_modify_write = ~m_PENT;
const int x86_read_modify = ~(m_PENT | m_PPRO);
const int x86_split_long_moves = m_PPRO;
const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
| m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
/* m_PENT4 ? */
const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
const int x86_qimode_math = ~(0);
const int x86_promote_qi_regs = 0;
/* On PPro this flag is meant to avoid partial register stalls. Just like
the x86_partial_reg_stall this option might be considered for Generic32
if our scheme for avoiding partial stalls was more effective. */
const int x86_himode_math = ~(m_PPRO);
const int x86_promote_hi_regs = m_PPRO;
/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
| m_CORE2 | m_GENERIC;
const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
| m_CORE2 | m_GENERIC;
const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
| m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
/* Enable if integer moves are preferred for DFmode copies */
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
| m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
| m_CORE2 | m_GENERIC;
const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
| m_CORE2 | m_GENERIC;
/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
for outgoing arguments will be computed and placed into the variable
`current_function_outgoing_args_size'. No space will be pushed onto the stack
for each call; instead, the function prologue should increase the stack frame
size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
not proper. */
const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
| m_NOCONA | m_PPRO | m_CORE2
| m_GENERIC;
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
const int x86_shift1 = ~m_486;
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
| m_ATHLON_K8_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC;
/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
that thread 128bit SSE registers as single units versus K8 based chips that
divide SSE registers to two 64bit halves.
x86_sse_partial_reg_dependency promote all store destinations to be 128bit
to allow register renaming on 128bit SSE units, but usually results in one
extra microop on 64bit SSE units. Experimental results shows that disabling
this option on P4 brings over 20% SPECfp regression, while enabling it on
K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
of moves. */
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
| m_GENERIC | m_AMDFAM10;
/* Set for machines where the type and dependencies are resolved on SSE
register parts instead of whole registers, so we may maintain just
lower part of scalar values in proper format leaving the upper part
undefined. */
const int x86_sse_split_regs = m_ATHLON_K8;
/* Code generation for scalar reg-reg moves of single and double precision data:
if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
movaps reg, reg
else
movss reg, reg
if (x86_sse_partial_reg_dependency == true)
movapd reg, reg
else
movsd reg, reg
/* Feature tests against the various tunings. */
unsigned int ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
negatively, so enabling for Generic64 seems like good code size
tradeoff. We can't enable it for 32bit generic because it does not
work well with PPro base chips. */
m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
Code generation for scalar loads of double precision data:
if (x86_sse_split_regs == true)
movlpd mem, reg (gas syntax)
else
movsd mem, reg
Code generation for unaligned packed loads of single precision data
(x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
if (x86_sse_unaligned_move_optimal)
movups mem, reg
/* X86_TUNE_PUSH_MEMORY */
m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC,
if (x86_sse_partial_reg_dependency == true)
{
xorps reg, reg
movlps mem, reg
movhps mem+8, reg
}
else
{
movlps mem, reg
movhps mem+8, reg
}
/* X86_TUNE_ZERO_EXTEND_WITH_AND */
m_486 | m_PENT,
Code generation for unaligned packed loads of double precision data
(x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
if (x86_sse_unaligned_move_optimal)
movupd mem, reg
/* X86_TUNE_USE_BIT_TEST */
m_386,
if (x86_sse_split_regs == true)
{
movlpd mem, reg
movhpd mem+8, reg
}
else
{
movsd mem, reg
movhpd mem+8, reg
}
*/
const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
/* X86_TUNE_UNROLL_STRLEN */
m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC);
/* X86_TUNE_DEEP_BRANCH_PREDICTION */
m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC,
const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
| m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
/* Some CPU cores are not able to predict more than 4 branch instructions in
the 16 byte window. */
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC;
const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
| m_CORE2 | m_GENERIC;
const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
/* Compare and exchange was added for 80486. */
const int x86_cmpxchg = ~m_386;
/* Compare and exchange 8 bytes was added for pentium. */
const int x86_cmpxchg8b = ~(m_386 | m_486);
/* Exchange and add was added for 80486. */
const int x86_xadd = ~m_386;
/* Byteswap was added for 80486. */
const int x86_bswap = ~m_386;
const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
was observed with branch hints. It also increases the code size.
As a result, icc never generates branch hints. */
0,
/* X86_TUNE_DOUBLE_WITH_ADD */
~m_386,
/* X86_TUNE_USE_SAHF */
m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32,
/* | m_GENERIC | m_ATHLON_K8 ? */
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
partial dependencies */
m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
register stalls on Generic32 compilation setting as well. However
in current implementation the partial register stalls are not eliminated
very well - they can be introduced via subregs synthesized by combine
and can happen in caller/callee saving sequences. Because this option
pays back little on PPro based chips and is in conflict with partial reg
dependencies used by Athlon/P4 based chips, it is better to leave it off
for generic32 for now. */
m_PPRO,
/* X86_TUNE_PARTIAL_FLAG_REG_STALL */
m_CORE2 | m_GENERIC,
/* X86_TUNE_USE_HIMODE_FIOP */
m_386 | m_486 | m_K6_GEODE,
/* X86_TUNE_USE_SIMODE_FIOP */
~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
/* X86_TUNE_USE_MOV0 */
m_K6,
/* X86_TUNE_USE_CLTD */
~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
m_PENT4,
/* X86_TUNE_SPLIT_LONG_MOVES */
m_PPRO,
/* X86_TUNE_READ_MODIFY_WRITE */
~m_PENT,
/* X86_TUNE_READ_MODIFY */
~(m_PENT | m_PPRO),
/* X86_TUNE_PROMOTE_QIMODE */
m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
| m_GENERIC /* | m_PENT4 ? */,
/* X86_TUNE_FAST_PREFIX */
~(m_PENT | m_486 | m_386),
/* X86_TUNE_SINGLE_STRINGOP */
m_386 | m_PENT4 | m_NOCONA,
/* X86_TUNE_QIMODE_MATH */
~0,
/* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
might be considered for Generic32 if our scheme for avoiding partial
stalls was more effective. */
~m_PPRO,
/* X86_TUNE_PROMOTE_QI_REGS */
0,
/* X86_TUNE_PROMOTE_HI_REGS */
m_PPRO,
/* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_ADD_ESP_8 */
m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
| m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SUB_ESP_4 */
m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SUB_ESP_8 */
m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
| m_GENERIC | m_GEODE),
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
conflict here in between PPro/Pentium4 based chips that thread 128bit
SSE registers as single units versus K8 based chips that divide SSE
registers to two 64bit halves. This knob promotes all store destinations
to be 128bit to allow register renaming on 128bit SSE units, but usually
results in one extra microop on 64bit SSE units. Experimental results
shows that disabling this option on P4 brings over 20% SPECfp regression,
while enabling it on K8 brings roughly 2.4% regression that can be partly
masked by careful scheduling of moves. */
m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
/* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
m_AMDFAM10,
/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
are resolved on SSE register parts instead of whole registers, so we may
maintain just lower part of scalar values in proper format leaving the
upper part undefined. */
m_ATHLON_K8,
/* X86_TUNE_SSE_TYPELESS_STORES */
m_ATHLON_K8_AMDFAM10,
/* X86_TUNE_SSE_LOAD0_BY_PXOR */
m_PPRO | m_PENT4 | m_NOCONA,
/* X86_TUNE_MEMORY_MISMATCH_STALL */
m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_PROLOGUE_USING_MOVE */
m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
/* X86_TUNE_EPILOGUE_USING_MOVE */
m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
/* X86_TUNE_SHIFT1 */
~m_486,
/* X86_TUNE_USE_FFREEP */
m_ATHLON_K8_AMDFAM10,
/* X86_TUNE_INTER_UNIT_MOVES */
~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SCHEDULE */
m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
/* X86_TUNE_USE_BT */
m_ATHLON_K8_AMDFAM10,
/* X86_TUNE_USE_INCDEC */
~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
/* X86_TUNE_PAD_RETURNS */
m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
/* X86_TUNE_EXT_80387_CONSTANTS */
m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
};
/* Feature tests against the various architecture variations. */
unsigned int ix86_arch_features[X86_ARCH_LAST] = {
/* X86_ARCH_CMOVE */
m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
/* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
~m_386,
/* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
~(m_386 | m_486),
/* X86_ARCH_XADD: Exchange and add was added for 80486. */
~m_386,
/* X86_ARCH_BSWAP: Byteswap was added for 80486. */
~m_386,
};
static const unsigned int x86_accumulate_outgoing_args
= m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
static const unsigned int x86_arch_always_fancy_math_387
= m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC;
static enum stringop_alg stringop_alg = no_stringop;
@ -1397,11 +1431,9 @@ enum fpmath_unit ix86_fpmath;
/* Which cpu are we scheduling for. */
enum processor_type ix86_tune;
int ix86_tune_mask;
/* Which instruction set architecture to use. */
enum processor_type ix86_arch;
int ix86_arch_mask;
/* true if sse prefetch instruction is not NOOP. */
int x86_prefetch_sse;
@ -1811,6 +1843,7 @@ override_options (void)
{
int i;
int ix86_tune_defaulted = 0;
unsigned int ix86_arch_mask, ix86_tune_mask;
/* Comes from final.c -- no real reason to change it. */
#define MAX_CODE_ALIGN 16
@ -2124,6 +2157,10 @@ override_options (void)
if (i == pta_size)
error ("bad value (%s) for -march= switch", ix86_arch_string);
ix86_arch_mask = 1u << ix86_arch;
for (i = 0; i < X86_ARCH_LAST; ++i)
ix86_arch_features[i] &= ix86_arch_mask;
for (i = 0; i < pta_size; i++)
if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
{
@ -2155,8 +2192,9 @@ override_options (void)
if (i == pta_size)
error ("bad value (%s) for -mtune= switch", ix86_tune_string);
ix86_arch_mask = 1 << ix86_arch;
ix86_tune_mask = 1 << ix86_tune;
ix86_tune_mask = 1u << ix86_tune;
for (i = 0; i < X86_TUNE_LAST; ++i)
ix86_tune_features[i] &= ix86_tune_mask;
if (optimize_size)
ix86_cost = &size_cost;
@ -2366,7 +2404,6 @@ override_options (void)
error ("-msseregparm used without SSE enabled");
ix86_fpmath = TARGET_FPMATH_DEFAULT;
if (ix86_fpmath_string != 0)
{
if (! strcmp (ix86_fpmath_string, "387"))
@ -2425,6 +2462,15 @@ override_options (void)
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
}
/* For sane SSE instruction set generation we need fcomi instruction.
It is safe to enable all CMOVE instructions. */
if (TARGET_SSE)
TARGET_CMOVE = 1;
/* ??? Any idea why this is unconditionally disabled for 64-bit? */
if (TARGET_64BIT)
TARGET_USE_SAHF = 0;
/* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
{
char *p;
@ -4999,7 +5045,7 @@ standard_80387_constant_p (rtx x)
/* For XFmode constants, try to find a special 80387 instruction when
optimizing for size or on those CPUs that benefit from them. */
if (GET_MODE (x) == XFmode
&& (optimize_size || x86_ext_80387_constants & ix86_tune_mask))
&& (optimize_size || TARGET_EXT_80387_CONSTANTS))
{
int i;
@ -9499,6 +9545,55 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
/* Implement the movmisalign patterns for SSE. Non-SSE modes go
straight to ix86_expand_vector_move. */
/* Code generation for scalar reg-reg moves of single and double precision data:
if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
movaps reg, reg
else
movss reg, reg
if (x86_sse_partial_reg_dependency == true)
movapd reg, reg
else
movsd reg, reg
Code generation for scalar loads of double precision data:
if (x86_sse_split_regs == true)
movlpd mem, reg (gas syntax)
else
movsd mem, reg
Code generation for unaligned packed loads of single precision data
(x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
if (x86_sse_unaligned_move_optimal)
movups mem, reg
if (x86_sse_partial_reg_dependency == true)
{
xorps reg, reg
movlps mem, reg
movhps mem+8, reg
}
else
{
movlps mem, reg
movhps mem+8, reg
}
Code generation for unaligned packed loads of double precision data
(x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
if (x86_sse_unaligned_move_optimal)
movupd mem, reg
if (x86_sse_split_regs == true)
{
movlpd mem, reg
movhpd mem+8, reg
}
else
{
movsd mem, reg
movhpd mem+8, reg
}
*/
void
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])

View File

@ -179,110 +179,165 @@ extern const struct processor_costs *ix86_cost;
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
extern const int x86_use_bit_test, x86_cmove, x86_deep_branch;
extern const int x86_branch_hints, x86_unroll_strlen;
extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx;
extern const int x86_use_himode_fiop, x86_use_simode_fiop;
extern const int x86_use_mov0, x86_use_cltd, x86_use_xchgb;
extern const int x86_read_modify_write, x86_read_modify, x86_split_long_moves;
extern const int x86_promote_QImode, x86_single_stringop, x86_fast_prefix;
extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
extern const int x86_promote_hi_regs, x86_integer_DFmode_moves;
extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
extern const int x86_epilogue_using_move, x86_decompose_lea;
extern const int x86_arch_always_fancy_math_387, x86_shift1;
extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs;
extern const int x86_sse_unaligned_move_optimal;
extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
extern const int x86_use_ffreep;
extern const int x86_inter_unit_moves, x86_schedule;
extern const int x86_use_bt;
extern const int x86_cmpxchg, x86_cmpxchg8b, x86_xadd;
extern const int x86_use_incdec;
extern const int x86_pad_returns;
extern const int x86_bswap;
extern const int x86_partial_flag_reg_stall;
extern int x86_prefetch_sse, x86_cmpxchg16b;
/* Feature tests against the various tunings. */
enum ix86_tune_indices {
X86_TUNE_USE_LEAVE,
X86_TUNE_PUSH_MEMORY,
X86_TUNE_ZERO_EXTEND_WITH_AND,
X86_TUNE_USE_BIT_TEST,
X86_TUNE_UNROLL_STRLEN,
X86_TUNE_DEEP_BRANCH_PREDICTION,
X86_TUNE_BRANCH_PREDICTION_HINTS,
X86_TUNE_DOUBLE_WITH_ADD,
X86_TUNE_USE_SAHF, /* && !TARGET_64BIT */
X86_TUNE_MOVX,
X86_TUNE_PARTIAL_REG_STALL,
X86_TUNE_PARTIAL_FLAG_REG_STALL,
X86_TUNE_USE_HIMODE_FIOP,
X86_TUNE_USE_SIMODE_FIOP,
X86_TUNE_USE_MOV0,
X86_TUNE_USE_CLTD,
X86_TUNE_USE_XCHGB,
X86_TUNE_SPLIT_LONG_MOVES,
X86_TUNE_READ_MODIFY_WRITE,
X86_TUNE_READ_MODIFY,
X86_TUNE_PROMOTE_QIMODE,
X86_TUNE_FAST_PREFIX,
X86_TUNE_SINGLE_STRINGOP,
X86_TUNE_QIMODE_MATH,
X86_TUNE_HIMODE_MATH,
X86_TUNE_PROMOTE_QI_REGS,
X86_TUNE_PROMOTE_HI_REGS,
X86_TUNE_ADD_ESP_4,
X86_TUNE_ADD_ESP_8,
X86_TUNE_SUB_ESP_4,
X86_TUNE_SUB_ESP_8,
X86_TUNE_INTEGER_DFMODE_MOVES,
X86_TUNE_PARTIAL_REG_DEPENDENCY,
X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY,
X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL,
X86_TUNE_SSE_SPLIT_REGS,
X86_TUNE_SSE_TYPELESS_STORES,
X86_TUNE_SSE_LOAD0_BY_PXOR,
X86_TUNE_MEMORY_MISMATCH_STALL,
X86_TUNE_PROLOGUE_USING_MOVE,
X86_TUNE_EPILOGUE_USING_MOVE,
X86_TUNE_SHIFT1,
X86_TUNE_USE_FFREEP,
X86_TUNE_INTER_UNIT_MOVES,
X86_TUNE_FOUR_JUMP_LIMIT,
X86_TUNE_SCHEDULE,
X86_TUNE_USE_BT,
X86_TUNE_USE_INCDEC,
X86_TUNE_PAD_RETURNS,
X86_TUNE_EXT_80387_CONSTANTS,
#define TARGET_USE_LEAVE (x86_use_leave & ix86_tune_mask)
#define TARGET_PUSH_MEMORY (x86_push_memory & ix86_tune_mask)
#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & ix86_tune_mask)
#define TARGET_USE_BIT_TEST (x86_use_bit_test & ix86_tune_mask)
#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & ix86_tune_mask)
/* For sane SSE instruction set generation we need fcomi instruction. It is
safe to enable all CMOVE instructions. */
#define TARGET_CMOVE ((x86_cmove & ix86_arch_mask) || TARGET_SSE)
#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & ix86_tune_mask)
#define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & ix86_tune_mask)
#define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & ix86_tune_mask)
#define TARGET_USE_SAHF ((x86_use_sahf & ix86_tune_mask) && !TARGET_64BIT)
#define TARGET_MOVX (x86_movx & ix86_tune_mask)
#define TARGET_PARTIAL_REG_STALL (x86_partial_reg_stall & ix86_tune_mask)
#define TARGET_PARTIAL_FLAG_REG_STALL \
(x86_partial_flag_reg_stall & ix86_tune_mask)
#define TARGET_USE_HIMODE_FIOP (x86_use_himode_fiop & ix86_tune_mask)
#define TARGET_USE_SIMODE_FIOP (x86_use_simode_fiop & ix86_tune_mask)
#define TARGET_USE_MOV0 (x86_use_mov0 & ix86_tune_mask)
#define TARGET_USE_CLTD (x86_use_cltd & ix86_tune_mask)
#define TARGET_USE_XCHGB (x86_use_xchgb & ix86_tune_mask)
#define TARGET_SPLIT_LONG_MOVES (x86_split_long_moves & ix86_tune_mask)
#define TARGET_READ_MODIFY_WRITE (x86_read_modify_write & ix86_tune_mask)
#define TARGET_READ_MODIFY (x86_read_modify & ix86_tune_mask)
#define TARGET_PROMOTE_QImode (x86_promote_QImode & ix86_tune_mask)
#define TARGET_FAST_PREFIX (x86_fast_prefix & ix86_tune_mask)
#define TARGET_SINGLE_STRINGOP (x86_single_stringop & ix86_tune_mask)
#define TARGET_QIMODE_MATH (x86_qimode_math & ix86_tune_mask)
#define TARGET_HIMODE_MATH (x86_himode_math & ix86_tune_mask)
#define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & ix86_tune_mask)
#define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & ix86_tune_mask)
#define TARGET_ADD_ESP_4 (x86_add_esp_4 & ix86_tune_mask)
#define TARGET_ADD_ESP_8 (x86_add_esp_8 & ix86_tune_mask)
#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & ix86_tune_mask)
#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & ix86_tune_mask)
#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & ix86_tune_mask)
#define TARGET_PARTIAL_REG_DEPENDENCY \
(x86_partial_reg_dependency & ix86_tune_mask)
#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
(x86_sse_partial_reg_dependency & ix86_tune_mask)
#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
(x86_sse_unaligned_move_optimal & ix86_tune_mask)
#define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & ix86_tune_mask)
#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & ix86_tune_mask)
#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & ix86_tune_mask)
#define TARGET_MEMORY_MISMATCH_STALL \
(x86_memory_mismatch_stall & ix86_tune_mask)
#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & ix86_tune_mask)
#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & ix86_tune_mask)
#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
#define TARGET_SHIFT1 (x86_shift1 & ix86_tune_mask)
#define TARGET_USE_FFREEP (x86_use_ffreep & ix86_tune_mask)
#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & ix86_tune_mask)
#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & ix86_tune_mask)
#define TARGET_SCHEDULE (x86_schedule & ix86_tune_mask)
#define TARGET_USE_BT (x86_use_bt & ix86_tune_mask)
#define TARGET_USE_INCDEC (x86_use_incdec & ix86_tune_mask)
#define TARGET_PAD_RETURNS (x86_pad_returns & ix86_tune_mask)
X86_TUNE_LAST
};
#define ASSEMBLER_DIALECT (ix86_asm_dialect)
extern unsigned int ix86_tune_features[X86_TUNE_LAST];
#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
#define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \
&& (ix86_fpmath & FPMATH_387))
#define TARGET_USE_LEAVE ix86_tune_features[X86_TUNE_USE_LEAVE]
#define TARGET_PUSH_MEMORY ix86_tune_features[X86_TUNE_PUSH_MEMORY]
#define TARGET_ZERO_EXTEND_WITH_AND \
ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
#define TARGET_USE_BIT_TEST ix86_tune_features[X86_TUNE_USE_BIT_TEST]
#define TARGET_UNROLL_STRLEN ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
#define TARGET_DEEP_BRANCH_PREDICTION \
ix86_tune_features[X86_TUNE_DEEP_BRANCH_PREDICTION]
#define TARGET_BRANCH_PREDICTION_HINTS \
ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
#define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
#define TARGET_USE_SAHF ix86_tune_features[X86_TUNE_USE_SAHF]
#define TARGET_MOVX ix86_tune_features[X86_TUNE_MOVX]
#define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
#define TARGET_PARTIAL_FLAG_REG_STALL \
ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
#define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
#define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
#define TARGET_USE_MOV0 ix86_tune_features[X86_TUNE_USE_MOV0]
#define TARGET_USE_CLTD ix86_tune_features[X86_TUNE_USE_CLTD]
#define TARGET_USE_XCHGB ix86_tune_features[X86_TUNE_USE_XCHGB]
#define TARGET_SPLIT_LONG_MOVES ix86_tune_features[X86_TUNE_SPLIT_LONG_MOVES]
#define TARGET_READ_MODIFY_WRITE ix86_tune_features[X86_TUNE_READ_MODIFY_WRITE]
#define TARGET_READ_MODIFY ix86_tune_features[X86_TUNE_READ_MODIFY]
#define TARGET_PROMOTE_QImode ix86_tune_features[X86_TUNE_PROMOTE_QIMODE]
#define TARGET_FAST_PREFIX ix86_tune_features[X86_TUNE_FAST_PREFIX]
#define TARGET_SINGLE_STRINGOP ix86_tune_features[X86_TUNE_SINGLE_STRINGOP]
#define TARGET_QIMODE_MATH ix86_tune_features[X86_TUNE_QIMODE_MATH]
#define TARGET_HIMODE_MATH ix86_tune_features[X86_TUNE_HIMODE_MATH]
#define TARGET_PROMOTE_QI_REGS ix86_tune_features[X86_TUNE_PROMOTE_QI_REGS]
#define TARGET_PROMOTE_HI_REGS ix86_tune_features[X86_TUNE_PROMOTE_HI_REGS]
#define TARGET_ADD_ESP_4 ix86_tune_features[X86_TUNE_ADD_ESP_4]
#define TARGET_ADD_ESP_8 ix86_tune_features[X86_TUNE_ADD_ESP_8]
#define TARGET_SUB_ESP_4 ix86_tune_features[X86_TUNE_SUB_ESP_4]
#define TARGET_SUB_ESP_8 ix86_tune_features[X86_TUNE_SUB_ESP_8]
#define TARGET_INTEGER_DFMODE_MOVES \
ix86_tune_features[X86_TUNE_INTEGER_DFMODE_MOVES]
#define TARGET_PARTIAL_REG_DEPENDENCY \
ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
ix86_tune_features[X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL]
#define TARGET_SSE_SPLIT_REGS ix86_tune_features[X86_TUNE_SSE_SPLIT_REGS]
#define TARGET_SSE_TYPELESS_STORES \
ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES]
#define TARGET_SSE_LOAD0_BY_PXOR ix86_tune_features[X86_TUNE_SSE_LOAD0_BY_PXOR]
#define TARGET_MEMORY_MISMATCH_STALL \
ix86_tune_features[X86_TUNE_MEMORY_MISMATCH_STALL]
#define TARGET_PROLOGUE_USING_MOVE \
ix86_tune_features[X86_TUNE_PROLOGUE_USING_MOVE]
#define TARGET_EPILOGUE_USING_MOVE \
ix86_tune_features[X86_TUNE_EPILOGUE_USING_MOVE]
#define TARGET_SHIFT1 ix86_tune_features[X86_TUNE_SHIFT1]
#define TARGET_USE_FFREEP ix86_tune_features[X86_TUNE_USE_FFREEP]
#define TARGET_INTER_UNIT_MOVES ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES]
#define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
#define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE]
#define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT]
#define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC]
#define TARGET_PAD_RETURNS ix86_tune_features[X86_TUNE_PAD_RETURNS]
#define TARGET_EXT_80387_CONSTANTS \
ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
#define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU)
#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
X86_ARCH_CMOVE, /* || TARGET_SSE */
X86_ARCH_CMPXCHG,
X86_ARCH_CMPXCHG8B,
X86_ARCH_XADD,
X86_ARCH_BSWAP,
#define TARGET_CMPXCHG (x86_cmpxchg & ix86_arch_mask)
#define TARGET_CMPXCHG8B (x86_cmpxchg8b & ix86_arch_mask)
#define TARGET_CMPXCHG16B (x86_cmpxchg16b)
#define TARGET_XADD (x86_xadd & ix86_arch_mask)
#define TARGET_BSWAP (x86_bswap & ix86_arch_mask)
X86_ARCH_LAST
};
extern unsigned int ix86_arch_features[X86_ARCH_LAST];
#define TARGET_CMOVE ix86_arch_features[X86_ARCH_CMOVE]
#define TARGET_CMPXCHG ix86_arch_features[X86_ARCH_CMPXCHG]
#define TARGET_CMPXCHG8B ix86_arch_features[X86_ARCH_CMPXCHG8B]
#define TARGET_XADD ix86_arch_features[X86_ARCH_XADD]
#define TARGET_BSWAP ix86_arch_features[X86_ARCH_BSWAP]
#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
extern int x86_prefetch_sse;
#define TARGET_PREFETCH_SSE x86_prefetch_sse
extern int x86_cmpxchg16b;
#define TARGET_CMPXCHG16B x86_cmpxchg16b
#define ASSEMBLER_DIALECT (ix86_asm_dialect)
#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
#define TARGET_MIX_SSE_I387 \
((ix86_fpmath & (FPMATH_SSE | FPMATH_387)) == (FPMATH_SSE | FPMATH_387))
#define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU)
#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
#ifndef TARGET_64BIT_DEFAULT
#define TARGET_64BIT_DEFAULT 0
@ -2132,10 +2187,7 @@ enum processor_type
};
extern enum processor_type ix86_tune;
extern int ix86_tune_mask;
extern enum processor_type ix86_arch;
extern int ix86_arch_mask;
enum fpmath_unit
{