x86-tune.def (partial_reg_stall): Disable for CoreI7 and newer.
* x86-tune.def (partial_reg_stall): Disable for CoreI7 and newer. (sse_typeless_stores): Enable for core (sse_load0_by_pxor): Likewise. (four_jump_limit): Disable for core. (pad_returns): Likewise. (avoid_vector_decode): Likewise. (fuse_cmp_and_branch): Enable for cores. * i386.c (x86_accumulate_outgoing_args): Disable for cores. From-SVN: r202813
This commit is contained in:
parent
765c1354c7
commit
0ca6c49ff1
|
@ -1,3 +1,14 @@
|
|||
2013-09-20 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* x86-tune.def (partial_reg_stall): Disable for CoreI7 and newer.
|
||||
(sse_typeless_stores): Enable for core
|
||||
(sse_load0_by_pxor): Likewise.
|
||||
(four_jump_limit): Disable for core.
|
||||
(pad_returns): Likewise.
|
||||
(avoid_vector_decode): Likewise.
|
||||
(fuse_cmp_and_branch): Enable for cores.
|
||||
* i386.c (x86_accumulate_outgoing_args): Disable for cores.
|
||||
|
||||
2013-09-20 John David Anglin <danglin@gcc.gnu.org>
|
||||
|
||||
PR middle-end/56791
|
||||
|
|
|
@ -1899,7 +1899,7 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
|
|||
};
|
||||
|
||||
static const unsigned int x86_accumulate_outgoing_args
|
||||
= m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC;
|
||||
= m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC;
|
||||
|
||||
static const unsigned int x86_arch_always_fancy_math_387
|
||||
= m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC;
|
||||
|
|
|
@ -52,7 +52,7 @@ DEF_TUNE (X86_TUNE_MOVX, "movx",
|
|||
and can happen in caller/callee saving sequences. */
|
||||
DEF_TUNE (X86_TUNE_PARTIAL_REG_STALL, "partial_reg_stall", m_PPRO)
|
||||
DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
|
||||
m_CORE_ALL | m_GENERIC)
|
||||
m_CORE2 | m_GENERIC)
|
||||
/* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
|
||||
* on 16-bit immediate moves into memory on Core2 and Corei7. */
|
||||
DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_GENERIC)
|
||||
|
@ -125,8 +125,10 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optim
|
|||
maintain just lower part of scalar values in proper format leaving the
|
||||
upper part undefined. */
|
||||
DEF_TUNE (X86_TUNE_SSE_SPLIT_REGS, "sse_split_regs", m_ATHLON_K8)
|
||||
DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", m_AMD_MULTIPLE)
|
||||
DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", m_PPRO | m_P4_NOCONA)
|
||||
DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",
|
||||
m_AMD_MULTIPLE | m_CORE_ALL)
|
||||
DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL)
|
||||
DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
|
||||
m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC)
|
||||
DEF_TUNE (X86_TUNE_PROLOGUE_USING_MOVE, "prologue_using_move",
|
||||
|
@ -144,7 +146,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
|
|||
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
|
||||
than 4 branch instructions in the 16 byte window. */
|
||||
DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM| m_AMD_MULTIPLE
|
||||
m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_AMD_MULTIPLE
|
||||
| m_GENERIC)
|
||||
DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
|
||||
m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE
|
||||
|
@ -154,13 +156,13 @@ DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
|
|||
DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
|
||||
~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC))
|
||||
DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
|
||||
m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC)
|
||||
m_AMD_MULTIPLE | m_GENERIC)
|
||||
DEF_TUNE (X86_TUNE_PAD_SHORT_FUNCTION, "pad_short_function", m_ATOM)
|
||||
DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE
|
||||
| m_ATHLON_K8 | m_GENERIC)
|
||||
DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
|
||||
m_CORE_ALL | m_K8 | m_GENERIC)
|
||||
m_K8 | m_GENERIC)
|
||||
/* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
|
||||
and SImode multiply, but 386 and 486 do HImode multiply faster. */
|
||||
DEF_TUNE (X86_TUNE_PROMOTE_HIMODE_IMUL, "promote_himode_imul",
|
||||
|
@ -193,7 +195,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
|
|||
/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
|
||||
with a subsequent conditional jump instruction into a single
|
||||
compare-and-branch uop. */
|
||||
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER)
|
||||
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER | m_CORE_ALL)
|
||||
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
|
||||
will impact LEA instruction selection. */
|
||||
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_ATOM | m_SLM)
|
||||
|
|
Loading…
Reference in New Issue