i386.c (x86_use_leave, [...]): Merge into ...

* config/i386/i386.c (x86_use_leave, x86_push_memory, x86_zero_extend_with_and, x86_movx, x86_double_with_add, x86_use_bit_test, x86_unroll_strlen, x86_deep_branch, x86_branch_hints, x86_use_sahf, x86_partial_reg_stall, x86_partial_flag_reg_stall, x86_use_himode_fiop, x86_use_simode_fiop, x86_use_mov0, x86_use_cltd, x86_read_modify_write, x86_read_modify, x86_split_long_moves, x86_promote_QImode, x86_fast_prefix, x86_single_stringop, x86_qimode_math, x86_promote_qi_regs, x86_himode_math, x86_promote_hi_regs, x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves, x86_partial_reg_dependency, x86_memory_mismatch_stall, x86_prologue_using_move, x86_epilogue_using_move, x86_shift1, x86_sse_partial_reg_dependency, x86_sse_split_regs, x86_sse_unaligned_move_optimal, x86_sse_typeless_stores, x86_sse_load0_by_pxor, x86_use_ffreep, x86_use_incdec, x86_inter_unit_moves, x86_ext_80387_constants, x86_four_jump_limit, x86_schedule, x86_use_bt, x86_pad_returns): Merge into ... (ix86_tune_features): ... here. New array. (x86_cmove, x86_use_xchgb, x86_cmpxchg, x86_cmpxchg8b, x86_xadd, x86_bswap): Merge into ... (ix86_arch_features): ... here. New array. (x86_3dnow_a): Remove. (x86_accumulate_outgoing_args): Make static. (x86_arch_always_fancy_math_387): Make static. (ix86_tune_mask, ix86_arch_mask): Move ... (override_options): ... to local variables here. Apply the appropriate mask to each element of ix86_arch_features and ix86_tune_features. Adjust TARGET_CMOVE and TARGET_USE_SAHF as were done in the old macros. (standard_80387_constant_p): Use TARGET_EXT_80387_CONSTANTS. * config/i386/i386.h (x86_use_leave, x86_push_memory, x86_zero_extend_with_and, x86_use_bit_test, x86_cmove, x86_deep_branch, x86_branch_hints, x86_unroll_strlen, x86_double_with_add, x86_partial_reg_stall, x86_movx, x86_use_himode_fiop, x86_use_simode_fiop, x86_use_mov0, x86_use_cltd, x86_use_xchgb, x86_read_modify_write, x86_read_modify, x86_split_long_moves, x86_promote_QImode, x86_single_stringop, x86_fast_prefix, x86_himode_math, x86_qimode_math, x86_promote_qi_regs, x86_promote_hi_regs, x86_integer_DFmode_moves, x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8, x86_partial_reg_dependency, x86_memory_mismatch_stall, x86_accumulate_outgoing_args, x86_prologue_using_move, x86_epilogue_using_move, x86_decompose_lea, x86_arch_always_fancy_math_387, x86_shift1, x86_sse_partial_reg_dependency, x86_sse_split_regs, x86_sse_unaligned_move_optimal, x86_sse_typeless_stores, x86_sse_load0_by_pxor, x86_use_ffreep, x86_inter_unit_moves, x86_schedule, x86_use_bt, x86_cmpxchg, x86_cmpxchg8b, x86_xadd, x86_use_incdec, x86_pad_returns, x86_bswap, x86_partial_flag_reg_stall): Remove. (enum ix86_tune_indices): New. (ix86_tune_features): New. (TARGET_USE_LEAVE, TARGET_PUSH_MEMORY, TARGET_ZERO_EXTEND_WITH_AND, TARGET_USE_BIT_TEST, TARGET_UNROLL_STRLEN, TARGET_DEEP_BRANCH_PREDICTION, TARGET_BRANCH_PREDICTION_HINTS, TARGET_DOUBLE_WITH_ADD, TARGET_USE_SAHF, TARGET_MOVX, TARGET_PARTIAL_REG_STALL, TARGET_PARTIAL_FLAG_REG_STALL, TARGET_USE_HIMODE_FIOP, TARGET_USE_SIMODE_FIOP, TARGET_USE_MOV0, TARGET_USE_CLTD, TARGET_USE_XCHGB, TARGET_SPLIT_LONG_MOVES, TARGET_READ_MODIFY_WRITE, TARGET_READ_MODIFY, TARGET_PROMOTE_QImode, TARGET_FAST_PREFIX, TARGET_SINGLE_STRINGOP, TARGET_QIMODE_MATH, TARGET_HIMODE_MATH, TARGET_PROMOTE_QI_REGS, TARGET_PROMOTE_HI_REGS, TARGET_ADD_ESP_4, TARGET_ADD_ESP_8, TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_INTEGER_DFMODE_MOVES, TARGET_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REG_DEPENDENCY, TARGET_SSE_UNALIGNED_MOVE_OPTIMAL, TARGET_SSE_SPLIT_REGS, TARGET_SSE_TYPELESS_STORES, TARGET_SSE_LOAD0_BY_PXOR, TARGET_MEMORY_MISMATCH_STALL, TARGET_PROLOGUE_USING_MOVE, TARGET_EPILOGUE_USING_MOVE, TARGET_SHIFT1, TARGET_USE_FFREEP, TARGET_INTER_UNIT_MOVES, TARGET_FOUR_JUMP_LIMIT, TARGET_SCHEDULE, TARGET_USE_BT, TARGET_USE_INCDEC, TARGET_PAD_RETURNS, TARGET_EXT_80387_CONSTANTS): Use it. (enum ix86_arch_indices): New. (ix86_arch_features): New. (TARGET_CMOVE, TARGET_CMPXCHG, TARGET_CMPXCHG8B, TARGET_XADD, TARGET_BSWAP): Use it. (ix86_tune_mask, ix86_arch_mask): Remove. From-SVN: r122621
2007-03-06 07:59:38 -08:00 · 2007-03-06 07:59:38 -08:00 · 80fd744fda
parent 14da607343
commit 80fd744fda
3 changed files with 510 additions and 283 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,83 @@
 2007-03-06  Richard Henderson  <rth@redhat.com>
 	* config/i386/i386.c (x86_use_leave, x86_push_memory,
 	x86_zero_extend_with_and, x86_movx, x86_double_with_add,
 	x86_use_bit_test, x86_unroll_strlen, x86_deep_branch,
 	x86_branch_hints, x86_use_sahf, x86_partial_reg_stall,
 	x86_partial_flag_reg_stall, x86_use_himode_fiop, x86_use_simode_fiop,
 	x86_use_mov0, x86_use_cltd, x86_read_modify_write, x86_read_modify,
 	x86_split_long_moves, x86_promote_QImode, x86_fast_prefix, 
 	x86_single_stringop, x86_qimode_math, x86_promote_qi_regs,
 	x86_himode_math, x86_promote_hi_regs, x86_sub_esp_4, x86_sub_esp_8,
 	x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves, 
 	x86_partial_reg_dependency, x86_memory_mismatch_stall, 
 	x86_prologue_using_move, x86_epilogue_using_move, x86_shift1,
 	x86_sse_partial_reg_dependency, x86_sse_split_regs, 
 	x86_sse_unaligned_move_optimal, x86_sse_typeless_stores,
 	x86_sse_load0_by_pxor, x86_use_ffreep, x86_use_incdec,
 	x86_inter_unit_moves, x86_ext_80387_constants, x86_four_jump_limit,
 	x86_schedule, x86_use_bt, x86_pad_returns): Merge into ...
 	(ix86_tune_features): ... here.  New array.
 	(x86_cmove, x86_use_xchgb, x86_cmpxchg, x86_cmpxchg8b,	
 	x86_xadd, x86_bswap): Merge into ...
 	(ix86_arch_features): ... here.  New array.
 	(x86_3dnow_a): Remove.
 	(x86_accumulate_outgoing_args): Make static.
 	(x86_arch_always_fancy_math_387): Make static.
 	(ix86_tune_mask, ix86_arch_mask): Move ...
 	(override_options): ... to local variables here.  Apply the
 	appropriate mask to each element of ix86_arch_features and
 	ix86_tune_features.  Adjust TARGET_CMOVE and TARGET_USE_SAHF
 	as were done in the old macros.
 	(standard_80387_constant_p): Use TARGET_EXT_80387_CONSTANTS.
 	* config/i386/i386.h (x86_use_leave, x86_push_memory,
 	x86_zero_extend_with_and, x86_use_bit_test, x86_cmove, x86_deep_branch,
 	x86_branch_hints, x86_unroll_strlen, x86_double_with_add,
 	x86_partial_reg_stall, x86_movx, x86_use_himode_fiop,
 	x86_use_simode_fiop, x86_use_mov0, x86_use_cltd, x86_use_xchgb,
 	x86_read_modify_write, x86_read_modify, x86_split_long_moves,
 	x86_promote_QImode, x86_single_stringop, x86_fast_prefix,
 	x86_himode_math, x86_qimode_math, x86_promote_qi_regs,
 	x86_promote_hi_regs, x86_integer_DFmode_moves, x86_add_esp_4,
 	x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8,
 	x86_partial_reg_dependency, x86_memory_mismatch_stall,
 	x86_accumulate_outgoing_args, x86_prologue_using_move,
 	x86_epilogue_using_move, x86_decompose_lea,
 	x86_arch_always_fancy_math_387, x86_shift1,
 	x86_sse_partial_reg_dependency, x86_sse_split_regs,
 	x86_sse_unaligned_move_optimal, x86_sse_typeless_stores,	
 	x86_sse_load0_by_pxor, x86_use_ffreep, x86_inter_unit_moves,
 	x86_schedule, x86_use_bt, x86_cmpxchg, x86_cmpxchg8b, x86_xadd,
 	x86_use_incdec, x86_pad_returns, x86_bswap,
 	x86_partial_flag_reg_stall): Remove.
 	(enum ix86_tune_indices): New.
 	(ix86_tune_features): New.
 	(TARGET_USE_LEAVE, TARGET_PUSH_MEMORY, TARGET_ZERO_EXTEND_WITH_AND,
 	TARGET_USE_BIT_TEST, TARGET_UNROLL_STRLEN,
 	TARGET_DEEP_BRANCH_PREDICTION, TARGET_BRANCH_PREDICTION_HINTS,
 	TARGET_DOUBLE_WITH_ADD, TARGET_USE_SAHF, TARGET_MOVX,
 	TARGET_PARTIAL_REG_STALL, TARGET_PARTIAL_FLAG_REG_STALL,
 	TARGET_USE_HIMODE_FIOP, TARGET_USE_SIMODE_FIOP, TARGET_USE_MOV0,
 	TARGET_USE_CLTD, TARGET_USE_XCHGB, TARGET_SPLIT_LONG_MOVES,
 	TARGET_READ_MODIFY_WRITE, TARGET_READ_MODIFY, TARGET_PROMOTE_QImode,
 	TARGET_FAST_PREFIX, TARGET_SINGLE_STRINGOP, TARGET_QIMODE_MATH,
 	TARGET_HIMODE_MATH, TARGET_PROMOTE_QI_REGS, TARGET_PROMOTE_HI_REGS,
 	TARGET_ADD_ESP_4, TARGET_ADD_ESP_8, TARGET_SUB_ESP_4,
 	TARGET_SUB_ESP_8, TARGET_INTEGER_DFMODE_MOVES,
 	TARGET_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REG_DEPENDENCY,
 	TARGET_SSE_UNALIGNED_MOVE_OPTIMAL, TARGET_SSE_SPLIT_REGS,
 	TARGET_SSE_TYPELESS_STORES, TARGET_SSE_LOAD0_BY_PXOR,
 	TARGET_MEMORY_MISMATCH_STALL, TARGET_PROLOGUE_USING_MOVE,
 	TARGET_EPILOGUE_USING_MOVE, TARGET_SHIFT1, TARGET_USE_FFREEP,
 	TARGET_INTER_UNIT_MOVES, TARGET_FOUR_JUMP_LIMIT, TARGET_SCHEDULE,
 	TARGET_USE_BT, TARGET_USE_INCDEC, TARGET_PAD_RETURNS,
 	TARGET_EXT_80387_CONSTANTS): Use it.
 	(enum ix86_arch_indices): New.
 	(ix86_arch_features): New.
 	(TARGET_CMOVE, TARGET_CMPXCHG, TARGET_CMPXCHG8B, TARGET_XADD,
 	TARGET_BSWAP): Use it.
 	(ix86_tune_mask, ix86_arch_mask): Remove.
 2007-03-06  Joseph Myers  <joseph@codesourcery.com>
 	PR bootstrap/31020
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@ -1004,187 +1004,221 @@ const struct processor_costs *ix86_cost = &pentium_cost;
   (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
-/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
+/* Feature tests against the various tunings.  */
-   Generic64 seems like good code size tradeoff.  We can't enable it for 32bit
+unsigned int ix86_tune_features[X86_TUNE_LAST] = {
-   generic because it is not working well with PPro base chips.  */
+  /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
-const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
+     negatively, so enabling for Generic64 seems like good code size
-                          | m_GENERIC64;
+     tradeoff.  We can't enable it for 32bit generic because it does not
-const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
+     work well with PPro base chips.  */
  m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
  /* X86_TUNE_PUSH_MEMORY */
  m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
  | m_NOCONA | m_CORE2 | m_GENERIC,
  /* X86_TUNE_ZERO_EXTEND_WITH_AND */
  m_486 | m_PENT,
  /* X86_TUNE_USE_BIT_TEST */
  m_386,
  /* X86_TUNE_UNROLL_STRLEN */
  m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
  /* X86_TUNE_DEEP_BRANCH_PREDICTION */
  m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
  | m_NOCONA | m_CORE2 | m_GENERIC,
  /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
     on simulation result. But after P4 was made, no performance benefit
     was observed with branch hints.  It also increases the code size.
     As a result, icc never generates branch hints.  */
  0,
  /* X86_TUNE_DOUBLE_WITH_ADD */
  ~m_386,
  /* X86_TUNE_USE_SAHF */
  m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32,
  /* | m_GENERIC | m_ATHLON_K8 ? */
  /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
     partial dependencies */
  m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
  | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
  /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
     register stalls on Generic32 compilation setting as well.  However
     in current implementation the partial register stalls are not eliminated
     very well - they can be introduced via subregs synthesized by combine
     and can happen in caller/callee saving sequences.  Because this option
     pays back little on PPro based chips and is in conflict with partial reg
     dependencies used by Athlon/P4 based chips, it is better to leave it off
     for generic32 for now.  */
  m_PPRO,
  /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
  m_CORE2 | m_GENERIC,
  /* X86_TUNE_USE_HIMODE_FIOP */
  m_386 | m_486 | m_K6_GEODE,
  /* X86_TUNE_USE_SIMODE_FIOP */
  ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
  /* X86_TUNE_USE_MOV0 */
  m_K6,
  /* X86_TUNE_USE_CLTD */
  ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
  /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
  m_PENT4,
  /* X86_TUNE_SPLIT_LONG_MOVES */
  m_PPRO,
  /* X86_TUNE_READ_MODIFY_WRITE */
  ~m_PENT,
  /* X86_TUNE_READ_MODIFY */
  ~(m_PENT | m_PPRO),
  /* X86_TUNE_PROMOTE_QIMODE */
  m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
  | m_GENERIC /* | m_PENT4 ? */,
  /* X86_TUNE_FAST_PREFIX */
  ~(m_PENT | m_486 | m_386),
  /* X86_TUNE_SINGLE_STRINGOP */
  m_386 | m_PENT4 | m_NOCONA,
  /* X86_TUNE_QIMODE_MATH */
  ~0,
  /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
     register stalls.  Just like X86_TUNE_PARTIAL_REG_STALL this option
     might be considered for Generic32 if our scheme for avoiding partial
     stalls was more effective.  */
  ~m_PPRO,
  /* X86_TUNE_PROMOTE_QI_REGS */
  0,
  /* X86_TUNE_PROMOTE_HI_REGS */
  m_PPRO,
  /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
  m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
  /* X86_TUNE_ADD_ESP_8 */
  m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
  | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
  /* X86_TUNE_SUB_ESP_4 */
  m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
  /* X86_TUNE_SUB_ESP_8 */
  m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
  | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
  /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
     for DFmode copies */
  ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
    | m_GENERIC | m_GEODE),
  /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
  m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
  /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
     conflict here in between PPro/Pentium4 based chips that thread 128bit
     SSE registers as single units versus K8 based chips that divide SSE
     registers to two 64bit halves.  This knob promotes all store destinations
     to be 128bit to allow register renaming on 128bit SSE units, but usually
     results in one extra microop on 64bit SSE units.  Experimental results
     shows that disabling this option on P4 brings over 20% SPECfp regression,
     while enabling it on K8 brings roughly 2.4% regression that can be partly
     masked by careful scheduling of moves.  */
  m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
  /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
  m_AMDFAM10,
  /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
     are resolved on SSE register parts instead of whole registers, so we may
     maintain just lower part of scalar values in proper format leaving the
     upper part undefined.  */
  m_ATHLON_K8,
  /* X86_TUNE_SSE_TYPELESS_STORES */
  m_ATHLON_K8_AMDFAM10,
  /* X86_TUNE_SSE_LOAD0_BY_PXOR */
  m_PPRO | m_PENT4 | m_NOCONA,
  /* X86_TUNE_MEMORY_MISMATCH_STALL */
  m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
  /* X86_TUNE_PROLOGUE_USING_MOVE */
  m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
  /* X86_TUNE_EPILOGUE_USING_MOVE */
  m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
  /* X86_TUNE_SHIFT1 */
  ~m_486,
  /* X86_TUNE_USE_FFREEP */
  m_ATHLON_K8_AMDFAM10,
  /* X86_TUNE_INTER_UNIT_MOVES */
  ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
  /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
     than 4 branch instructions in the 16 byte window.  */
  m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
  /* X86_TUNE_SCHEDULE */
  m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
  /* X86_TUNE_USE_BT */
  m_ATHLON_K8_AMDFAM10,
  /* X86_TUNE_USE_INCDEC */
  ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
  /* X86_TUNE_PAD_RETURNS */
  m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
  /* X86_TUNE_EXT_80387_CONSTANTS */
  m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
 };
 /* Feature tests against the various architecture variations.  */
 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
  /* X86_ARCH_CMOVE */
  m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
  /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
  ~m_386,
  /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
  ~(m_386 | m_486),
  /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
  ~m_386,
  /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
  ~m_386,
 };
 static const unsigned int x86_accumulate_outgoing_args
  = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
 static const unsigned int x86_arch_always_fancy_math_387
  = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
    | m_NOCONA | m_CORE2 | m_GENERIC;
 const int x86_zero_extend_with_and = m_486 | m_PENT;
 /* Enable to zero extend integer registers to avoid partial dependencies */
 const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
                     | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
 const int x86_double_with_add = ~m_386;
 const int x86_use_bit_test = m_386;
 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
                              | m_K6 | m_CORE2 | m_GENERIC;
 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
                      | m_NOCONA;
 const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
                            | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
 /* Branch hints were put in P4 based on simulation result. But
   after P4 was made, no performance benefit was observed with
   branch hints. It also increases the code size. As the result,
   icc never generates branch hints.  */
 const int x86_branch_hints = 0;
 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
                         /*m_GENERIC | m_ATHLON_K8 ? */
 /* We probably ought to watch for partial register stalls on Generic32
   compilation setting as well.  However in current implementation the
   partial register stalls are not eliminated very well - they can
   be introduced via subregs synthesized by combine and can happen
   in caller/callee saving sequences.
   Because this option pays back little on PPro based chips and is in conflict
   with partial reg. dependencies used by Athlon/P4 based chips, it is better
   to leave it off for generic32 for now.  */
 const int x86_partial_reg_stall = m_PPRO;
 const int x86_partial_flag_reg_stall =  m_CORE2 | m_GENERIC;
 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
                                  | m_CORE2 | m_GENERIC);
 const int x86_use_mov0 = m_K6;
 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
 /* Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
 const int x86_use_xchgb = m_PENT4;
 const int x86_read_modify_write = ~m_PENT;
 const int x86_read_modify = ~(m_PENT | m_PPRO);
 const int x86_split_long_moves = m_PPRO;
 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
                               | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
                               /* m_PENT4 ? */
 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
 const int x86_qimode_math = ~(0);
 const int x86_promote_qi_regs = 0;
 /* On PPro this flag is meant to avoid partial register stalls.  Just like
   the x86_partial_reg_stall this option might be considered for Generic32
   if our scheme for avoiding partial stalls was more effective.  */
 const int x86_himode_math = ~(m_PPRO);
 const int x86_promote_hi_regs = m_PPRO;
 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
 const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
                          | m_CORE2 | m_GENERIC;
 const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
                          | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
 const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
                          | m_CORE2 | m_GENERIC;
 const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
                          | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
 /* Enable if integer moves are preferred for DFmode copies */
 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
                                       | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
 const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
                                       | m_CORE2 | m_GENERIC;
 const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA 
                                      | m_CORE2 | m_GENERIC;
 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
   for outgoing arguments will be computed and placed into the variable
   `current_function_outgoing_args_size'. No space will be pushed onto the stack
   for each call; instead, the function prologue should increase the stack frame
   size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
   not proper. */
 const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
                                         | m_NOCONA | m_PPRO | m_CORE2
                                         | m_GENERIC;
 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
 const int x86_shift1 = ~m_486;
 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
                                           | m_ATHLON_K8_AMDFAM10 | m_PENT4 
                                           | m_NOCONA | m_CORE2 | m_GENERIC;
 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
   that thread 128bit SSE registers as single units versus K8 based chips that
   divide SSE registers to two 64bit halves.
   x86_sse_partial_reg_dependency promote all store destinations to be 128bit
   to allow register renaming on 128bit SSE units, but usually results in one
   extra microop on 64bit SSE units.  Experimental results shows that disabling
   this option on P4 brings over 20% SPECfp regression, while enabling it on
   K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
   of moves.  */
 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
                                           | m_GENERIC | m_AMDFAM10;
 /* Set for machines where the type and dependencies are resolved on SSE
   register parts instead of whole registers, so we may maintain just
   lower part of scalar values in proper format leaving the upper part
   undefined.  */
 const int x86_sse_split_regs = m_ATHLON_K8;
 /* Code generation for scalar reg-reg moves of single and double precision data:
     if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
       movaps reg, reg
     else
       movss reg, reg
     if (x86_sse_partial_reg_dependency == true)
       movapd reg, reg
     else
       movsd reg, reg
   Code generation for scalar loads of double precision data:
     if (x86_sse_split_regs == true)
       movlpd mem, reg      (gas syntax)
     else
       movsd mem, reg
   Code generation for unaligned packed loads of single precision data
   (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
     if (x86_sse_unaligned_move_optimal)
       movups mem, reg
     if (x86_sse_partial_reg_dependency == true)
       {
         xorps  reg, reg
         movlps mem, reg
         movhps mem+8, reg
       }
     else
       {
         movlps mem, reg
         movhps mem+8, reg
       }
   Code generation for unaligned packed loads of double precision data
   (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
     if (x86_sse_unaligned_move_optimal)
       movupd mem, reg
     if (x86_sse_split_regs == true)
       {
         movlpd mem, reg
         movhpd mem+8, reg
       }
     else
       {
         movsd  mem, reg
         movhpd mem+8, reg
       }
 */
 const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
 const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
 const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
 const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC);
 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
                                    | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
 /* Some CPU cores are not able to predict more than 4 branch instructions in
   the 16 byte window.  */
 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
                                | m_NOCONA | m_CORE2 | m_GENERIC;
 const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
                         | m_CORE2 | m_GENERIC;
 const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
 /* Compare and exchange was added for 80486.  */
 const int x86_cmpxchg = ~m_386;
 /* Compare and exchange 8 bytes was added for pentium.  */
 const int x86_cmpxchg8b = ~(m_386 | m_486);
 /* Exchange and add was added for 80486.  */
 const int x86_xadd = ~m_386;
 /* Byteswap was added for 80486.  */
 const int x86_bswap = ~m_386;
 const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
 static enum stringop_alg stringop_alg = no_stringop;
@ -1397,11 +1431,9 @@ enum fpmath_unit ix86_fpmath;
 /* Which cpu are we scheduling for.  */
 enum processor_type ix86_tune;
 int ix86_tune_mask;
 /* Which instruction set architecture to use.  */
 enum processor_type ix86_arch;
 int ix86_arch_mask;
 /* true if sse prefetch instruction is not NOOP.  */
 int x86_prefetch_sse;
@ -1811,6 +1843,7 @@ override_options (void)
 {
  int i;
  int ix86_tune_defaulted = 0;
  unsigned int ix86_arch_mask, ix86_tune_mask;
  /* Comes from final.c -- no real reason to change it.  */
 #define MAX_CODE_ALIGN 16
@ -2124,6 +2157,10 @@ override_options (void)
  if (i == pta_size)
    error ("bad value (%s) for -march= switch", ix86_arch_string);
  ix86_arch_mask = 1u << ix86_arch;
  for (i = 0; i < X86_ARCH_LAST; ++i)
    ix86_arch_features[i] &= ix86_arch_mask;
  for (i = 0; i < pta_size; i++)
    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
      {
@ -2155,8 +2192,9 @@ override_options (void)
  if (i == pta_size)
    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
-  ix86_arch_mask = 1 << ix86_arch;
+  ix86_tune_mask = 1u << ix86_tune;
-  ix86_tune_mask = 1 << ix86_tune;
+  for (i = 0; i < X86_TUNE_LAST; ++i)
    ix86_tune_features[i] &= ix86_tune_mask;
  if (optimize_size)
    ix86_cost = &size_cost;
@ -2366,7 +2404,6 @@ override_options (void)
    error ("-msseregparm used without SSE enabled");
  ix86_fpmath = TARGET_FPMATH_DEFAULT;
  if (ix86_fpmath_string != 0)
    {
      if (! strcmp (ix86_fpmath_string, "387"))
@ -2425,6 +2462,15 @@ override_options (void)
      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
    }
  /* For sane SSE instruction set generation we need fcomi instruction.
     It is safe to enable all CMOVE instructions.  */
  if (TARGET_SSE)
    TARGET_CMOVE = 1;
  /* ??? Any idea why this is unconditionally disabled for 64-bit?  */
  if (TARGET_64BIT)
    TARGET_USE_SAHF = 0;
  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
  {
    char *p;
@ -4999,7 +5045,7 @@ standard_80387_constant_p (rtx x)
  /* For XFmode constants, try to find a special 80387 instruction when
     optimizing for size or on those CPUs that benefit from them.  */
  if (GET_MODE (x) == XFmode
-      && (optimize_size || x86_ext_80387_constants & ix86_tune_mask))
+      && (optimize_size || TARGET_EXT_80387_CONSTANTS))
    {
      int i;
@ -9499,6 +9545,55 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
 /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
   straight to ix86_expand_vector_move.  */
 /* Code generation for scalar reg-reg moves of single and double precision data:
     if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
       movaps reg, reg
     else
       movss reg, reg
     if (x86_sse_partial_reg_dependency == true)
       movapd reg, reg
     else
       movsd reg, reg
   Code generation for scalar loads of double precision data:
     if (x86_sse_split_regs == true)
       movlpd mem, reg      (gas syntax)
     else
       movsd mem, reg
   Code generation for unaligned packed loads of single precision data
   (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
     if (x86_sse_unaligned_move_optimal)
       movups mem, reg
     if (x86_sse_partial_reg_dependency == true)
       {
         xorps  reg, reg
         movlps mem, reg
         movhps mem+8, reg
       }
     else
       {
         movlps mem, reg
         movhps mem+8, reg
       }
   Code generation for unaligned packed loads of double precision data
   (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
     if (x86_sse_unaligned_move_optimal)
       movupd mem, reg
     if (x86_sse_split_regs == true)
       {
         movlpd mem, reg
         movhpd mem+8, reg
       }
     else
       {
         movsd  mem, reg
         movhpd mem+8, reg
       }
 */
 void
 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@ -179,111 +179,166 @@ extern const struct processor_costs *ix86_cost;
 #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
 #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
-extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
+/* Feature tests against the various tunings.  */
-extern const int x86_use_bit_test, x86_cmove, x86_deep_branch;
+enum ix86_tune_indices {
-extern const int x86_branch_hints, x86_unroll_strlen;
+  X86_TUNE_USE_LEAVE,
-extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx;
+  X86_TUNE_PUSH_MEMORY,
-extern const int x86_use_himode_fiop, x86_use_simode_fiop;
+  X86_TUNE_ZERO_EXTEND_WITH_AND,
-extern const int x86_use_mov0, x86_use_cltd, x86_use_xchgb;
+  X86_TUNE_USE_BIT_TEST,
-extern const int x86_read_modify_write, x86_read_modify, x86_split_long_moves;
+  X86_TUNE_UNROLL_STRLEN,
-extern const int x86_promote_QImode, x86_single_stringop, x86_fast_prefix;
+  X86_TUNE_DEEP_BRANCH_PREDICTION,
-extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
+  X86_TUNE_BRANCH_PREDICTION_HINTS,
-extern const int x86_promote_hi_regs, x86_integer_DFmode_moves;
+  X86_TUNE_DOUBLE_WITH_ADD,
-extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
+  X86_TUNE_USE_SAHF,			/* && !TARGET_64BIT */
-extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
+  X86_TUNE_MOVX,
-extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
+  X86_TUNE_PARTIAL_REG_STALL,
-extern const int x86_epilogue_using_move, x86_decompose_lea;
+  X86_TUNE_PARTIAL_FLAG_REG_STALL,
-extern const int x86_arch_always_fancy_math_387, x86_shift1;
+  X86_TUNE_USE_HIMODE_FIOP,
-extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs;
+  X86_TUNE_USE_SIMODE_FIOP,
-extern const int x86_sse_unaligned_move_optimal;
+  X86_TUNE_USE_MOV0,
-extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
+  X86_TUNE_USE_CLTD,
-extern const int x86_use_ffreep;
+  X86_TUNE_USE_XCHGB,
-extern const int x86_inter_unit_moves, x86_schedule;
+  X86_TUNE_SPLIT_LONG_MOVES,
-extern const int x86_use_bt;
+  X86_TUNE_READ_MODIFY_WRITE,
-extern const int x86_cmpxchg, x86_cmpxchg8b, x86_xadd;
+  X86_TUNE_READ_MODIFY,
-extern const int x86_use_incdec;
+  X86_TUNE_PROMOTE_QIMODE,
-extern const int x86_pad_returns;
+  X86_TUNE_FAST_PREFIX,
-extern const int x86_bswap;
+  X86_TUNE_SINGLE_STRINGOP,
-extern const int x86_partial_flag_reg_stall;
+  X86_TUNE_QIMODE_MATH,
-extern int x86_prefetch_sse, x86_cmpxchg16b;
+  X86_TUNE_HIMODE_MATH,
  X86_TUNE_PROMOTE_QI_REGS,
  X86_TUNE_PROMOTE_HI_REGS,
  X86_TUNE_ADD_ESP_4,
  X86_TUNE_ADD_ESP_8,
  X86_TUNE_SUB_ESP_4,
  X86_TUNE_SUB_ESP_8,
  X86_TUNE_INTEGER_DFMODE_MOVES,
  X86_TUNE_PARTIAL_REG_DEPENDENCY,
  X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY,
  X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL,
  X86_TUNE_SSE_SPLIT_REGS,
  X86_TUNE_SSE_TYPELESS_STORES,
  X86_TUNE_SSE_LOAD0_BY_PXOR,
  X86_TUNE_MEMORY_MISMATCH_STALL,
  X86_TUNE_PROLOGUE_USING_MOVE,
  X86_TUNE_EPILOGUE_USING_MOVE,
  X86_TUNE_SHIFT1,
  X86_TUNE_USE_FFREEP,
  X86_TUNE_INTER_UNIT_MOVES,
  X86_TUNE_FOUR_JUMP_LIMIT,
  X86_TUNE_SCHEDULE,
  X86_TUNE_USE_BT,
  X86_TUNE_USE_INCDEC,
  X86_TUNE_PAD_RETURNS,
  X86_TUNE_EXT_80387_CONSTANTS,
-#define TARGET_USE_LEAVE (x86_use_leave & ix86_tune_mask)
+  X86_TUNE_LAST
-#define TARGET_PUSH_MEMORY (x86_push_memory & ix86_tune_mask)
+};
-#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & ix86_tune_mask)
+
-#define TARGET_USE_BIT_TEST (x86_use_bit_test & ix86_tune_mask)
+extern unsigned int ix86_tune_features[X86_TUNE_LAST];
-#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & ix86_tune_mask)
+
-/* For sane SSE instruction set generation we need fcomi instruction.  It is
+#define TARGET_USE_LEAVE	ix86_tune_features[X86_TUNE_USE_LEAVE]
-   safe to enable all CMOVE instructions.  */
+#define TARGET_PUSH_MEMORY	ix86_tune_features[X86_TUNE_PUSH_MEMORY]
-#define TARGET_CMOVE ((x86_cmove & ix86_arch_mask) || TARGET_SSE)
+#define TARGET_ZERO_EXTEND_WITH_AND \
-#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
+	ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
-#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & ix86_tune_mask)
+#define TARGET_USE_BIT_TEST	ix86_tune_features[X86_TUNE_USE_BIT_TEST]
-#define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & ix86_tune_mask)
+#define TARGET_UNROLL_STRLEN	ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
-#define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & ix86_tune_mask)
+#define TARGET_DEEP_BRANCH_PREDICTION \
-#define TARGET_USE_SAHF ((x86_use_sahf & ix86_tune_mask) && !TARGET_64BIT)
+	ix86_tune_features[X86_TUNE_DEEP_BRANCH_PREDICTION]
-#define TARGET_MOVX (x86_movx & ix86_tune_mask)
+#define TARGET_BRANCH_PREDICTION_HINTS \
-#define TARGET_PARTIAL_REG_STALL (x86_partial_reg_stall & ix86_tune_mask)
+	ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
 #define TARGET_DOUBLE_WITH_ADD	ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
 #define TARGET_USE_SAHF		ix86_tune_features[X86_TUNE_USE_SAHF]
 #define TARGET_MOVX		ix86_tune_features[X86_TUNE_MOVX]
 #define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
 #define TARGET_PARTIAL_FLAG_REG_STALL \
-  (x86_partial_flag_reg_stall & ix86_tune_mask)
+	ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
-#define TARGET_USE_HIMODE_FIOP (x86_use_himode_fiop & ix86_tune_mask)
+#define TARGET_USE_HIMODE_FIOP	ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
-#define TARGET_USE_SIMODE_FIOP (x86_use_simode_fiop & ix86_tune_mask)
+#define TARGET_USE_SIMODE_FIOP	ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
-#define TARGET_USE_MOV0 (x86_use_mov0 & ix86_tune_mask)
+#define TARGET_USE_MOV0		ix86_tune_features[X86_TUNE_USE_MOV0]
-#define TARGET_USE_CLTD (x86_use_cltd & ix86_tune_mask)
+#define TARGET_USE_CLTD		ix86_tune_features[X86_TUNE_USE_CLTD]
-#define TARGET_USE_XCHGB (x86_use_xchgb & ix86_tune_mask)
+#define TARGET_USE_XCHGB	ix86_tune_features[X86_TUNE_USE_XCHGB]
-#define TARGET_SPLIT_LONG_MOVES (x86_split_long_moves & ix86_tune_mask)
+#define TARGET_SPLIT_LONG_MOVES	ix86_tune_features[X86_TUNE_SPLIT_LONG_MOVES]
-#define TARGET_READ_MODIFY_WRITE (x86_read_modify_write & ix86_tune_mask)
+#define TARGET_READ_MODIFY_WRITE ix86_tune_features[X86_TUNE_READ_MODIFY_WRITE]
-#define TARGET_READ_MODIFY (x86_read_modify & ix86_tune_mask)
+#define TARGET_READ_MODIFY	ix86_tune_features[X86_TUNE_READ_MODIFY]
-#define TARGET_PROMOTE_QImode (x86_promote_QImode & ix86_tune_mask)
+#define TARGET_PROMOTE_QImode	ix86_tune_features[X86_TUNE_PROMOTE_QIMODE]
-#define TARGET_FAST_PREFIX (x86_fast_prefix & ix86_tune_mask)
+#define TARGET_FAST_PREFIX	ix86_tune_features[X86_TUNE_FAST_PREFIX]
-#define TARGET_SINGLE_STRINGOP (x86_single_stringop & ix86_tune_mask)
+#define TARGET_SINGLE_STRINGOP	ix86_tune_features[X86_TUNE_SINGLE_STRINGOP]
-#define TARGET_QIMODE_MATH (x86_qimode_math & ix86_tune_mask)
+#define TARGET_QIMODE_MATH	ix86_tune_features[X86_TUNE_QIMODE_MATH]
-#define TARGET_HIMODE_MATH (x86_himode_math & ix86_tune_mask)
+#define TARGET_HIMODE_MATH	ix86_tune_features[X86_TUNE_HIMODE_MATH]
-#define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & ix86_tune_mask)
+#define TARGET_PROMOTE_QI_REGS	ix86_tune_features[X86_TUNE_PROMOTE_QI_REGS]
-#define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & ix86_tune_mask)
+#define TARGET_PROMOTE_HI_REGS	ix86_tune_features[X86_TUNE_PROMOTE_HI_REGS]
-#define TARGET_ADD_ESP_4 (x86_add_esp_4 & ix86_tune_mask)
+#define TARGET_ADD_ESP_4	ix86_tune_features[X86_TUNE_ADD_ESP_4]
-#define TARGET_ADD_ESP_8 (x86_add_esp_8 & ix86_tune_mask)
+#define TARGET_ADD_ESP_8	ix86_tune_features[X86_TUNE_ADD_ESP_8]
-#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & ix86_tune_mask)
+#define TARGET_SUB_ESP_4	ix86_tune_features[X86_TUNE_SUB_ESP_4]
-#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & ix86_tune_mask)
+#define TARGET_SUB_ESP_8	ix86_tune_features[X86_TUNE_SUB_ESP_8]
-#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & ix86_tune_mask)
+#define TARGET_INTEGER_DFMODE_MOVES \
 	ix86_tune_features[X86_TUNE_INTEGER_DFMODE_MOVES]
 #define TARGET_PARTIAL_REG_DEPENDENCY \
-  (x86_partial_reg_dependency & ix86_tune_mask)
+	ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
 #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
-  (x86_sse_partial_reg_dependency & ix86_tune_mask)
+	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
 #define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
-  (x86_sse_unaligned_move_optimal & ix86_tune_mask)
+	ix86_tune_features[X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL]
-#define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & ix86_tune_mask)
+#define TARGET_SSE_SPLIT_REGS	ix86_tune_features[X86_TUNE_SSE_SPLIT_REGS]
-#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & ix86_tune_mask)
+#define TARGET_SSE_TYPELESS_STORES \
-#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & ix86_tune_mask)
+	ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES]
 #define TARGET_SSE_LOAD0_BY_PXOR ix86_tune_features[X86_TUNE_SSE_LOAD0_BY_PXOR]
 #define TARGET_MEMORY_MISMATCH_STALL \
-  (x86_memory_mismatch_stall & ix86_tune_mask)
+	ix86_tune_features[X86_TUNE_MEMORY_MISMATCH_STALL]
-#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & ix86_tune_mask)
+#define TARGET_PROLOGUE_USING_MOVE \
-#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & ix86_tune_mask)
+	ix86_tune_features[X86_TUNE_PROLOGUE_USING_MOVE]
-#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
+#define TARGET_EPILOGUE_USING_MOVE \
-#define TARGET_SHIFT1 (x86_shift1 & ix86_tune_mask)
+	ix86_tune_features[X86_TUNE_EPILOGUE_USING_MOVE]
-#define TARGET_USE_FFREEP (x86_use_ffreep & ix86_tune_mask)
+#define TARGET_SHIFT1		ix86_tune_features[X86_TUNE_SHIFT1]
-#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & ix86_tune_mask)
+#define TARGET_USE_FFREEP	ix86_tune_features[X86_TUNE_USE_FFREEP]
-#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & ix86_tune_mask)
+#define TARGET_INTER_UNIT_MOVES	ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES]
-#define TARGET_SCHEDULE (x86_schedule & ix86_tune_mask)
+#define TARGET_FOUR_JUMP_LIMIT	ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
-#define TARGET_USE_BT (x86_use_bt & ix86_tune_mask)
+#define TARGET_SCHEDULE		ix86_tune_features[X86_TUNE_SCHEDULE]
-#define TARGET_USE_INCDEC (x86_use_incdec & ix86_tune_mask)
+#define TARGET_USE_BT		ix86_tune_features[X86_TUNE_USE_BT]
-#define TARGET_PAD_RETURNS (x86_pad_returns & ix86_tune_mask)
+#define TARGET_USE_INCDEC	ix86_tune_features[X86_TUNE_USE_INCDEC]
 #define TARGET_PAD_RETURNS	ix86_tune_features[X86_TUNE_PAD_RETURNS]
 #define TARGET_EXT_80387_CONSTANTS \
 	ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
  X86_ARCH_CMOVE,		/* || TARGET_SSE */
  X86_ARCH_CMPXCHG,
  X86_ARCH_CMPXCHG8B,
  X86_ARCH_XADD,
  X86_ARCH_BSWAP,
  X86_ARCH_LAST
 };
 extern unsigned int ix86_arch_features[X86_ARCH_LAST];
 #define TARGET_CMOVE		ix86_arch_features[X86_ARCH_CMOVE]
 #define TARGET_CMPXCHG		ix86_arch_features[X86_ARCH_CMPXCHG]
 #define TARGET_CMPXCHG8B	ix86_arch_features[X86_ARCH_CMPXCHG8B]
 #define TARGET_XADD		ix86_arch_features[X86_ARCH_XADD]
 #define TARGET_BSWAP		ix86_arch_features[X86_ARCH_BSWAP]
 #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
 extern int x86_prefetch_sse;
 #define TARGET_PREFETCH_SSE	x86_prefetch_sse
 extern int x86_cmpxchg16b;
 #define TARGET_CMPXCHG16B	x86_cmpxchg16b
 #define ASSEMBLER_DIALECT	(ix86_asm_dialect)
 #define TARGET_SSE_MATH		((ix86_fpmath & FPMATH_SSE) != 0)
-#define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \
+#define TARGET_MIX_SSE_I387 \
-			     && (ix86_fpmath & FPMATH_387))
+ ((ix86_fpmath & (FPMATH_SSE | FPMATH_387)) == (FPMATH_SSE | FPMATH_387))
 #define TARGET_GNU_TLS		(ix86_tls_dialect == TLS_DIALECT_GNU)
 #define TARGET_GNU2_TLS		(ix86_tls_dialect == TLS_DIALECT_GNU2)
 #define TARGET_ANY_GNU_TLS	(TARGET_GNU_TLS || TARGET_GNU2_TLS)
 #define TARGET_SUN_TLS		(ix86_tls_dialect == TLS_DIALECT_SUN)
 #define TARGET_CMPXCHG (x86_cmpxchg & ix86_arch_mask)
 #define TARGET_CMPXCHG8B (x86_cmpxchg8b & ix86_arch_mask)
 #define TARGET_CMPXCHG16B (x86_cmpxchg16b)
 #define TARGET_XADD (x86_xadd & ix86_arch_mask)
 #define TARGET_BSWAP (x86_bswap & ix86_arch_mask)
 #ifndef TARGET_64BIT_DEFAULT
 #define TARGET_64BIT_DEFAULT 0
 #endif
@ -2132,10 +2187,7 @@ enum processor_type
 };
 extern enum processor_type ix86_tune;
 extern int ix86_tune_mask;
 extern enum processor_type ix86_arch;
 extern int ix86_arch_mask;
 enum fpmath_unit
 {