AMD znver1 enablement.
2015-10-06 Venkataramanan Kumar <Venkataramanan.kumar@amd.com> AMD znver1 enablement. * config.gcc (i[34567]86-*-linux* | ...): Add znver1. (case ${target}): Add znver1. * config/i386/cpuid.h(bit_CLZERO): Define. * config/i386/driver-i386.c: (host_detect_local_cpu): Let -march=native recognize znver1 processors. * config/i386/i386-c.c (ix86_target_macros_internal): Add znver1, clzero def_and_undef. * config/i386/i386.c (struct processor_costs znver1_cost): New. (m_znver1): New definition. (m_AMD_MULTIPLE): Includes m_znver1. (processor_target_table): Add znver1 entry. (ix86_target_string) : Add clzero entry. (static const char *const cpu_names): Add znver1 entry. (ix86_option_override_internal): Add znver1 instruction sets. (PTA_CLZERO) : New definition. (ix86_option_override_internal): Handle new clzerooption. (ix86_issue_rate): Add znver1. (ix86_adjust_cost): Add znver1. (ia32_multipass_dfa_lookahead): Add znver1. (has_dispatch): Add znver1. * config/i386/i386.h (TARGET_znver1): New definition. (TARGET_CLZERO): Define. (TARGET_CLZERO_P): Define. (struct ix86_size_cost): Add TARGET_ZNVER1. (enum processor_type): Add PROCESSOR_znver1. * config/i386/i386.md (define_attr "cpu"): Add znver1. (set_attr znver1_decode): New definitions for znver1. * config/i386/i386.opt (flag_dispatch_scheduler): Add znver1. (mclzero): New. * config/i386/mmx.md (set_attr znver1_decode): New definitions for znver1. * config/i386/sse.md (set_attr znver1_decode): Likewise. * config/i386/x86-tune.def: Add znver1 tunings. * config/i386/znver1.md: Introduce znver1 cpu and include new md file. * gcc/doc/invoke.texi: Add details about znver1 From-SVN: r228520
This commit is contained in:
parent
0580f6a1a8
commit
9ce29eb05d
|
@ -1,3 +1,41 @@
|
|||
2015-10-06 Venkataramanan Kumar <Venkataramanan.kumar@amd.com>
|
||||
|
||||
* config.gcc (i[34567]86-*-linux* | ...): Add znver1.
|
||||
(case ${target}): Add znver1.
|
||||
* config/i386/cpuid.h(bit_CLZERO): Define.
|
||||
* config/i386/driver-i386.c: (host_detect_local_cpu): Let
|
||||
-march=native recognize znver1 processors.
|
||||
* config/i386/i386-c.c (ix86_target_macros_internal): Add
|
||||
znver1, clzero def_and_undef.
|
||||
* config/i386/i386.c (struct processor_costs znver1_cost): New.
|
||||
(m_znver1): New definition.
|
||||
(m_AMD_MULTIPLE): Includes m_znver1.
|
||||
(processor_target_table): Add znver1 entry.
|
||||
(ix86_target_string) : Add clzero entry.
|
||||
(static const char *const cpu_names): Add znver1 entry.
|
||||
(ix86_option_override_internal): Add znver1 instruction sets.
|
||||
(PTA_CLZERO) : New definition.
|
||||
(ix86_option_override_internal): Handle new clzerooption.
|
||||
(ix86_issue_rate): Add znver1.
|
||||
(ix86_adjust_cost): Add znver1.
|
||||
(ia32_multipass_dfa_lookahead): Add znver1.
|
||||
(has_dispatch): Add znver1.
|
||||
* config/i386/i386.h (TARGET_znver1): New definition.
|
||||
(TARGET_CLZERO): Define.
|
||||
(TARGET_CLZERO_P): Define.
|
||||
(struct ix86_size_cost): Add TARGET_ZNVER1.
|
||||
(enum processor_type): Add PROCESSOR_znver1.
|
||||
* config/i386/i386.md (define_attr "cpu"): Add znver1.
|
||||
(set_attr znver1_decode): New definitions for znver1.
|
||||
* config/i386/i386.opt (flag_dispatch_scheduler): Add znver1.
|
||||
(mclzero): New.
|
||||
* config/i386/mmx.md (set_attr znver1_decode): New definitions
|
||||
for znver1.
|
||||
* config/i386/sse.md (set_attr znver1_decode): Likewise.
|
||||
* config/i386/x86-tune.def: Add znver1 tunings.
|
||||
* config/i386/znver1.md: Introduce znver1 cpu and include new md file.
|
||||
* gcc/doc/invoke.texi: Add details about znver1
|
||||
|
||||
2015-10-06 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/67859
|
||||
|
|
|
@ -592,7 +592,7 @@ pentium4 pentium4m pentiumpro prescott lakemont"
|
|||
# 64-bit x86 processors supported by --with-arch=. Each processor
|
||||
# MUST be separated by exactly one space.
|
||||
x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
|
||||
bdver3 bdver4 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
|
||||
bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
|
||||
core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \
|
||||
sandybridge ivybridge haswell broadwell bonnell silvermont knl \
|
||||
skylake-avx512 x86-64 native"
|
||||
|
@ -3119,6 +3119,10 @@ case ${target} in
|
|||
;;
|
||||
i686-*-* | i786-*-*)
|
||||
case ${target_noncanonical} in
|
||||
znver1-*)
|
||||
arch=znver1
|
||||
cpu=znver1
|
||||
;;
|
||||
bdver4-*)
|
||||
arch=bdver4
|
||||
cpu=bdver4
|
||||
|
@ -3232,6 +3236,10 @@ case ${target} in
|
|||
;;
|
||||
x86_64-*-*)
|
||||
case ${target_noncanonical} in
|
||||
znver1-*)
|
||||
arch=znver1
|
||||
cpu=znver1
|
||||
;;
|
||||
bdver4-*)
|
||||
arch=bdver4
|
||||
cpu=bdver4
|
||||
|
|
|
@ -65,6 +65,9 @@
|
|||
#define bit_3DNOWP (1 << 30)
|
||||
#define bit_3DNOW (1 << 31)
|
||||
|
||||
/* %ebx. */
|
||||
#define bit_CLZERO (1 << 0)
|
||||
|
||||
/* Extended Features (%eax == 7) */
|
||||
/* %ebx */
|
||||
#define bit_FSGSBASE (1 << 0)
|
||||
|
|
|
@ -414,6 +414,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
|
||||
unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
|
||||
unsigned int has_pcommit = 0, has_mwaitx = 0;
|
||||
unsigned int has_clzero = 0;
|
||||
|
||||
bool arch;
|
||||
|
||||
|
@ -533,6 +534,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
has_3dnowp = edx & bit_3DNOWP;
|
||||
has_3dnow = edx & bit_3DNOW;
|
||||
has_mwaitx = ecx & bit_MWAITX;
|
||||
|
||||
__cpuid (0x80000008, eax, ebx, ecx, edx);
|
||||
has_clzero = ebx & bit_CLZERO;
|
||||
}
|
||||
|
||||
/* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
|
||||
|
@ -607,6 +611,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
processor = PROCESSOR_GEODE;
|
||||
else if (has_movbe && family == 22)
|
||||
processor = PROCESSOR_BTVER2;
|
||||
else if (has_clzero)
|
||||
processor = PROCESSOR_ZNVER1;
|
||||
else if (has_avx2)
|
||||
processor = PROCESSOR_BDVER4;
|
||||
else if (has_xsaveopt)
|
||||
|
@ -872,6 +878,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
case PROCESSOR_BDVER4:
|
||||
cpu = "bdver4";
|
||||
break;
|
||||
case PROCESSOR_ZNVER1:
|
||||
cpu = "znver1";
|
||||
break;
|
||||
case PROCESSOR_BTVER1:
|
||||
cpu = "btver1";
|
||||
break;
|
||||
|
@ -961,7 +970,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
|
||||
const char *pcommit = has_pcommit ? " -mpcommit" : " -mno-pcommit";
|
||||
const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
|
||||
|
||||
const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
|
||||
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
|
||||
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
|
||||
popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
|
||||
|
@ -970,7 +979,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
fxsr, xsave, xsaveopt, avx512f, avx512er,
|
||||
avx512cd, avx512pf, prefetchwt1, clflushopt,
|
||||
xsavec, xsaves, avx512dq, avx512bw, avx512vl,
|
||||
avx512ifma, avx512vbmi, clwb, pcommit, mwaitx, NULL);
|
||||
avx512ifma, avx512vbmi, clwb, pcommit, mwaitx,
|
||||
clzero, NULL);
|
||||
}
|
||||
|
||||
done:
|
||||
|
|
|
@ -123,6 +123,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
def_or_undef (parse_in, "__bdver4");
|
||||
def_or_undef (parse_in, "__bdver4__");
|
||||
break;
|
||||
case PROCESSOR_ZNVER1:
|
||||
def_or_undef (parse_in, "__znver1");
|
||||
def_or_undef (parse_in, "__znver1__");
|
||||
break;
|
||||
case PROCESSOR_BTVER1:
|
||||
def_or_undef (parse_in, "__btver1");
|
||||
def_or_undef (parse_in, "__btver1__");
|
||||
|
@ -252,6 +256,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
case PROCESSOR_BDVER4:
|
||||
def_or_undef (parse_in, "__tune_bdver4__");
|
||||
break;
|
||||
case PROCESSOR_ZNVER1:
|
||||
def_or_undef (parse_in, "__tune_znver1__");
|
||||
break;
|
||||
case PROCESSOR_BTVER1:
|
||||
def_or_undef (parse_in, "__tune_btver1__");
|
||||
break;
|
||||
|
@ -424,6 +431,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
def_or_undef (parse_in, "__SSE2_MATH__");
|
||||
if (isa_flag & OPTION_MASK_ISA_CLFLUSHOPT)
|
||||
def_or_undef (parse_in, "__CLFLUSHOPT__");
|
||||
if (isa_flag & OPTION_MASK_ISA_CLZERO)
|
||||
def_or_undef (parse_in, "__CLZERO__");
|
||||
if (isa_flag & OPTION_MASK_ISA_XSAVEC)
|
||||
def_or_undef (parse_in, "__XSAVEC__");
|
||||
if (isa_flag & OPTION_MASK_ISA_XSAVES)
|
||||
|
|
|
@ -1342,6 +1342,96 @@ struct processor_costs bdver4_cost = {
|
|||
2, /* cond_not_taken_branch_cost. */
|
||||
};
|
||||
|
||||
|
||||
/* ZNVER1 has optimized REP instruction for medium sized blocks, but for
|
||||
very small blocks it is better to use loop. For large blocks, libcall
|
||||
can do nontemporary accesses and beat inline considerably. */
|
||||
static stringop_algs znver1_memcpy[2] = {
|
||||
{libcall, {{6, loop, false}, {14, unrolled_loop, false},
|
||||
{-1, rep_prefix_4_byte, false}}},
|
||||
{libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
|
||||
{-1, libcall, false}}}};
|
||||
static stringop_algs znver1_memset[2] = {
|
||||
{libcall, {{8, loop, false}, {24, unrolled_loop, false},
|
||||
{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
|
||||
{libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
|
||||
{-1, libcall, false}}}};
|
||||
struct processor_costs znver1_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of an add instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of a lea instruction. */
|
||||
COSTS_N_INSNS (1), /* variable shift costs. */
|
||||
COSTS_N_INSNS (1), /* constant shift costs. */
|
||||
{COSTS_N_INSNS (4), /* cost of starting multiply for QI. */
|
||||
COSTS_N_INSNS (4), /* HI. */
|
||||
COSTS_N_INSNS (4), /* SI. */
|
||||
COSTS_N_INSNS (6), /* DI. */
|
||||
COSTS_N_INSNS (6)}, /* other. */
|
||||
0, /* cost of multiply per each bit
|
||||
set. */
|
||||
{COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
|
||||
COSTS_N_INSNS (35), /* HI. */
|
||||
COSTS_N_INSNS (51), /* SI. */
|
||||
COSTS_N_INSNS (83), /* DI. */
|
||||
COSTS_N_INSNS (83)}, /* other. */
|
||||
COSTS_N_INSNS (1), /* cost of movsx. */
|
||||
COSTS_N_INSNS (1), /* cost of movzx. */
|
||||
8, /* "large" insn. */
|
||||
9, /* MOVE_RATIO. */
|
||||
4, /* cost for loading QImode using
|
||||
movzbl. */
|
||||
{5, 5, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer
|
||||
registers. */
|
||||
2, /* cost of reg,reg fld/fst. */
|
||||
{5, 5, 12}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode. */
|
||||
{4, 4, 8}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode. */
|
||||
2, /* cost of moving MMX register. */
|
||||
{4, 4}, /* cost of loading MMX registers
|
||||
in SImode and DImode. */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
in SImode and DImode. */
|
||||
2, /* cost of moving SSE register. */
|
||||
{4, 4, 4}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode. */
|
||||
{4, 4, 4}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode. */
|
||||
2, /* MMX or SSE register to integer. */
|
||||
32, /* size of l1 cache. */
|
||||
512, /* size of l2 cache. */
|
||||
64, /* size of prefetch block. */
|
||||
/* New AMD processors never drop prefetches; if they cannot be performed
|
||||
immediately, they are queued. We set number of simultaneous prefetches
|
||||
to a large constant to reflect this (it probably is not a good idea not
|
||||
to limit number of prefetches at all, as their execution also takes some
|
||||
time). */
|
||||
100, /* number of parallel prefetches. */
|
||||
2, /* Branch cost. */
|
||||
COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
|
||||
COSTS_N_INSNS (6), /* cost of FMUL instruction. */
|
||||
COSTS_N_INSNS (42), /* cost of FDIV instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
|
||||
znver1_memcpy,
|
||||
znver1_memset,
|
||||
6, /* scalar_stmt_cost. */
|
||||
4, /* scalar load_cost. */
|
||||
4, /* scalar_store_cost. */
|
||||
6, /* vec_stmt_cost. */
|
||||
0, /* vec_to_scalar_cost. */
|
||||
2, /* scalar_to_vec_cost. */
|
||||
4, /* vec_align_load_cost. */
|
||||
4, /* vec_unalign_load_cost. */
|
||||
4, /* vec_store_cost. */
|
||||
4, /* cond_taken_branch_cost. */
|
||||
2, /* cond_not_taken_branch_cost. */
|
||||
};
|
||||
|
||||
/* BTVER1 has optimized REP instruction for medium sized blocks, but for
|
||||
very small blocks it is better to use loop. For large blocks, libcall can
|
||||
do nontemporary accesses and beat inline considerably. */
|
||||
|
@ -2113,11 +2203,13 @@ const struct processor_costs *ix86_cost = &pentium_cost;
|
|||
#define m_BDVER2 (1<<PROCESSOR_BDVER2)
|
||||
#define m_BDVER3 (1<<PROCESSOR_BDVER3)
|
||||
#define m_BDVER4 (1<<PROCESSOR_BDVER4)
|
||||
#define m_ZNVER1 (1<<PROCESSOR_ZNVER1)
|
||||
#define m_BTVER1 (1<<PROCESSOR_BTVER1)
|
||||
#define m_BTVER2 (1<<PROCESSOR_BTVER2)
|
||||
#define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
|
||||
#define m_BTVER (m_BTVER1 | m_BTVER2)
|
||||
#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
|
||||
#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
|
||||
| m_ZNVER1)
|
||||
|
||||
#define m_GENERIC (1<<PROCESSOR_GENERIC)
|
||||
|
||||
|
@ -2580,6 +2672,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
|
|||
{"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
|
||||
{"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
|
||||
{"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
|
||||
{"znver1", &znver1_cost, 16, 10, 16, 7, 11},
|
||||
{"btver1", &btver1_cost, 16, 10, 16, 7, 11},
|
||||
{"btver2", &btver2_cost, 16, 10, 16, 7, 11}
|
||||
};
|
||||
|
@ -3672,6 +3765,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
|
|||
{ "-mclwb", OPTION_MASK_ISA_CLWB },
|
||||
{ "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
|
||||
{ "-mmwaitx", OPTION_MASK_ISA_MWAITX },
|
||||
{ "-mclzero", OPTION_MASK_ISA_CLZERO },
|
||||
};
|
||||
|
||||
/* Flag options. */
|
||||
|
@ -4216,6 +4310,7 @@ ix86_option_override_internal (bool main_args_p,
|
|||
#define PTA_CLWB (HOST_WIDE_INT_1 << 55)
|
||||
#define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
|
||||
#define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
|
||||
#define PTA_CLZERO (HOST_WIDE_INT_1 << 58)
|
||||
|
||||
#define PTA_CORE2 \
|
||||
(PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
|
||||
|
@ -4378,7 +4473,16 @@ ix86_option_override_internal (bool main_args_p,
|
|||
| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
|
||||
| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
|
||||
| PTA_MOVBE | PTA_MWAITX},
|
||||
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
|
||||
{"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
|
||||
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
|
||||
| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
|
||||
| PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
|
||||
| PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
|
||||
| PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
|
||||
| PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
|
||||
| PTA_SHA | PTA_LZCNT | PTA_POPCNT},
|
||||
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
|
||||
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
|
||||
| PTA_FXSR | PTA_XSAVE},
|
||||
|
@ -4799,6 +4903,9 @@ ix86_option_override_internal (bool main_args_p,
|
|||
if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
|
||||
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
|
||||
if (processor_alias_table[i].flags & PTA_CLZERO
|
||||
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
|
||||
if (processor_alias_table[i].flags & PTA_XSAVEC
|
||||
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
|
||||
|
@ -27168,6 +27275,7 @@ ix86_issue_rate (void)
|
|||
case PROCESSOR_BDVER2:
|
||||
case PROCESSOR_BDVER3:
|
||||
case PROCESSOR_BDVER4:
|
||||
case PROCESSOR_ZNVER1:
|
||||
case PROCESSOR_CORE2:
|
||||
case PROCESSOR_NEHALEM:
|
||||
case PROCESSOR_SANDYBRIDGE:
|
||||
|
@ -27428,6 +27536,7 @@ ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
|
|||
case PROCESSOR_BDVER2:
|
||||
case PROCESSOR_BDVER3:
|
||||
case PROCESSOR_BDVER4:
|
||||
case PROCESSOR_ZNVER1:
|
||||
case PROCESSOR_BTVER1:
|
||||
case PROCESSOR_BTVER2:
|
||||
case PROCESSOR_GENERIC:
|
||||
|
@ -35708,9 +35817,9 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
|
|||
arg_str = "bdver4";
|
||||
priority = P_PROC_AVX2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
cl_target_option_restore (&global_options, &cur_target);
|
||||
|
||||
if (predicate_list && arg_str == NULL)
|
||||
|
@ -36659,7 +36768,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
|
|||
{"bdver2", M_AMDFAM15H_BDVER2},
|
||||
{"bdver3", M_AMDFAM15H_BDVER3},
|
||||
{"bdver4", M_AMDFAM15H_BDVER4},
|
||||
{"btver2", M_AMD_BTVER2},
|
||||
{"btver2", M_AMD_BTVER2},
|
||||
};
|
||||
|
||||
static struct _isa_names_table
|
||||
|
@ -52714,8 +52823,8 @@ do_dispatch (rtx_insn *insn, int mode)
|
|||
static bool
|
||||
has_dispatch (rtx_insn *insn, int action)
|
||||
{
|
||||
if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
|
||||
&& flag_dispatch_scheduler)
|
||||
if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
|
||||
|| TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler)
|
||||
switch (action)
|
||||
{
|
||||
default:
|
||||
|
|
|
@ -116,6 +116,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|||
#define TARGET_SHA_P(x) TARGET_ISA_SHA_P(x)
|
||||
#define TARGET_CLFLUSHOPT TARGET_ISA_CLFLUSHOPT
|
||||
#define TARGET_CLFLUSHOPT_P(x) TARGET_ISA_CLFLUSHOPT_P(x)
|
||||
#define TARGET_CLZERO TARGET_ISA_CLZERO
|
||||
#define TARGET_CLZERO_P(x) TARGET_ISA_CLZERO_P(x)
|
||||
#define TARGET_XSAVEC TARGET_ISA_XSAVEC
|
||||
#define TARGET_XSAVEC_P(x) TARGET_ISA_XSAVEC_P(x)
|
||||
#define TARGET_XSAVES TARGET_ISA_XSAVES
|
||||
|
@ -350,6 +352,7 @@ extern const struct processor_costs ix86_size_cost;
|
|||
#define TARGET_BDVER4 (ix86_tune == PROCESSOR_BDVER4)
|
||||
#define TARGET_BTVER1 (ix86_tune == PROCESSOR_BTVER1)
|
||||
#define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2)
|
||||
#define TARGET_ZNVER1 (ix86_tune == PROCESSOR_ZNVER1)
|
||||
|
||||
/* Feature tests against the various tunings. */
|
||||
enum ix86_tune_indices {
|
||||
|
@ -2303,6 +2306,7 @@ enum processor_type
|
|||
PROCESSOR_BDVER4,
|
||||
PROCESSOR_BTVER1,
|
||||
PROCESSOR_BTVER2,
|
||||
PROCESSOR_ZNVER1,
|
||||
PROCESSOR_max
|
||||
};
|
||||
|
||||
|
|
|
@ -408,7 +408,7 @@
|
|||
;; Processor type.
|
||||
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
|
||||
atom,slm,haswell,generic,amdfam10,bdver1,bdver2,bdver3,
|
||||
bdver4,btver2"
|
||||
bdver4,btver2,znver1"
|
||||
(const (symbol_ref "ix86_schedule")))
|
||||
|
||||
;; A basic instruction type. Refinements due to arguments to be
|
||||
|
@ -1170,6 +1170,7 @@
|
|||
(include "bdver1.md")
|
||||
(include "bdver3.md")
|
||||
(include "btver2.md")
|
||||
(include "znver1.md")
|
||||
(include "geode.md")
|
||||
(include "atom.md")
|
||||
(include "slm.md")
|
||||
|
@ -1673,6 +1674,7 @@
|
|||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "double")
|
||||
(set_attr "znver1_decode" "double")
|
||||
(set (attr "enabled")
|
||||
(cond [(eq_attr "alternative" "0")
|
||||
(symbol_ref "TARGET_MIX_SSE_I387")
|
||||
|
@ -1692,7 +1694,8 @@
|
|||
(set_attr "mode" "<X87MODEF:MODE>")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
(set_attr "bdver1_decode" "double")
|
||||
(set_attr "znver1_decode" "double")])
|
||||
|
||||
;; Push/pop instructions.
|
||||
|
||||
|
@ -4013,6 +4016,10 @@
|
|||
(eq_attr "alternative" "0"))
|
||||
(const_string "0")
|
||||
(const_string "1")))
|
||||
(set (attr "znver1_decode")
|
||||
(if_then_else (eq_attr "prefix_0f" "0")
|
||||
(const_string "double")
|
||||
(const_string "direct")))
|
||||
(set (attr "modrm")
|
||||
(if_then_else (eq_attr "prefix_0f" "0")
|
||||
(const_string "0")
|
||||
|
@ -4964,6 +4971,7 @@
|
|||
"fild%Z1\t%1"
|
||||
[(set_attr "type" "fmov")
|
||||
(set_attr "mode" "<MODE>")
|
||||
(set_attr "znver1_decode" "double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "float<SWI48x:mode>xf2"
|
||||
|
@ -4973,6 +4981,7 @@
|
|||
"fild%Z1\t%1"
|
||||
[(set_attr "type" "fmov")
|
||||
(set_attr "mode" "XF")
|
||||
(set_attr "znver1_decode" "double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_expand "float<SWI48:mode><MODEF:mode>2"
|
||||
|
@ -5022,6 +5031,7 @@
|
|||
(set_attr "athlon_decode" "*,double,direct")
|
||||
(set_attr "amdfam10_decode" "*,vector,double")
|
||||
(set_attr "bdver1_decode" "*,double,direct")
|
||||
(set_attr "znver1_decode" "double,*,*")
|
||||
(set_attr "fp_int_src" "true")
|
||||
(set (attr "enabled")
|
||||
(cond [(eq_attr "alternative" "0")
|
||||
|
@ -5042,6 +5052,7 @@
|
|||
"fild%Z1\t%1"
|
||||
[(set_attr "type" "fmov")
|
||||
(set_attr "mode" "<MODEF:MODE>")
|
||||
(set_attr "znver1_decode" "double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory
|
||||
|
@ -10810,6 +10821,7 @@
|
|||
"bts{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "alu1")
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "znver1_decode" "double")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "*btrq"
|
||||
|
@ -10822,6 +10834,7 @@
|
|||
"btr{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "alu1")
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "znver1_decode" "double")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "*btcq"
|
||||
|
@ -10834,6 +10847,7 @@
|
|||
"btc{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "alu1")
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "znver1_decode" "double")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
;; Allow Nocona to avoid these instructions if a register is available.
|
||||
|
@ -12513,6 +12527,7 @@
|
|||
[(set_attr "type" "alu1")
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "btver2_decode" "double")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_expand "ctz<mode>2"
|
||||
|
@ -12991,6 +13006,7 @@
|
|||
"bsr{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "alu1")
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "bsr"
|
||||
|
@ -13002,6 +13018,7 @@
|
|||
"bsr{l}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "alu1")
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*bsrhi"
|
||||
|
@ -13013,6 +13030,7 @@
|
|||
"bsr{w}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "alu1")
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "HI")])
|
||||
|
||||
(define_expand "popcount<mode>2"
|
||||
|
@ -14164,6 +14182,7 @@
|
|||
&& flag_finite_math_only"
|
||||
"fprem"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_expand "fmodxf3"
|
||||
|
@ -14238,6 +14257,7 @@
|
|||
&& flag_finite_math_only"
|
||||
"fprem1"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_expand "remainderxf3"
|
||||
|
@ -14314,6 +14334,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"f<sincos>"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_insn "*<sincos>_extend<mode>xf2_i387"
|
||||
|
@ -14327,6 +14348,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"f<sincos>"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
;; When sincos pattern is defined, sin and cos builtin functions will be
|
||||
|
@ -14345,6 +14367,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fsincos"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_split
|
||||
|
@ -14380,6 +14403,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fsincos"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_split
|
||||
|
@ -14435,6 +14459,7 @@
|
|||
&& standard_80387_constant_p (operands[3]) == 2"
|
||||
"fptan"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_insn "fptan_extend<mode>xf4_i387"
|
||||
|
@ -14451,6 +14476,7 @@
|
|||
&& standard_80387_constant_p (operands[3]) == 2"
|
||||
"fptan"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_expand "tanxf2"
|
||||
|
@ -14495,6 +14521,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fpatan"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_insn "fpatan_extend<mode>xf3_i387"
|
||||
|
@ -14511,6 +14538,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fpatan"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_expand "atan2xf3"
|
||||
|
@ -14667,6 +14695,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fyl2x"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_insn "fyl2x_extend<mode>xf3_i387"
|
||||
|
@ -14682,6 +14711,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fyl2x"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_expand "logxf2"
|
||||
|
@ -14784,6 +14814,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fyl2xp1"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_insn "fyl2xp1_extend<mode>xf3_i387"
|
||||
|
@ -14799,6 +14830,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fyl2xp1"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_expand "log1pxf2"
|
||||
|
@ -14846,6 +14878,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fxtract"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_insn "fxtract_extend<mode>xf3_i387"
|
||||
|
@ -14861,6 +14894,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fxtract"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_expand "logbxf2"
|
||||
|
@ -14937,6 +14971,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"f2xm1"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_insn "fscalexf4_i387"
|
||||
|
@ -14951,6 +14986,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"fscale"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_expand "expNcorexf3"
|
||||
|
@ -15294,6 +15330,7 @@
|
|||
&& flag_unsafe_math_optimizations"
|
||||
"frndint"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "XF")])
|
||||
|
||||
(define_expand "rint<mode>2"
|
||||
|
|
|
@ -574,8 +574,8 @@ computations into a vector ones.
|
|||
|
||||
mdispatch-scheduler
|
||||
Target RejectNegative Var(flag_dispatch_scheduler)
|
||||
Do dispatch scheduling if processor is bdver1 or bdver2 or bdver3 or bdver4 and Haifa scheduling
|
||||
is selected.
|
||||
Do dispatch scheduling if processor is bdver1, bdver2, bdver3, bdver4
|
||||
or znver1 and Haifa scheduling is selected.
|
||||
|
||||
mprefer-avx128
|
||||
Target Report Mask(PREFER_AVX128) SAVE
|
||||
|
@ -751,6 +751,10 @@ mclflushopt
|
|||
Target Report Mask(ISA_CLFLUSHOPT) Var(ix86_isa_flags) Save
|
||||
Support CLFLUSHOPT instructions
|
||||
|
||||
mclzero
|
||||
Target Report Mask(ISA_CLZERO) Var(ix86_isa_flags) Save
|
||||
Support CLZERO instructions
|
||||
|
||||
mclwb
|
||||
Target Report Mask(ISA_CLWB) Var(ix86_isa_flags) Save
|
||||
Support CLWB instruction
|
||||
|
|
|
@ -1546,6 +1546,7 @@
|
|||
;; @@@ check ordering of operands in intel/nonintel syntax
|
||||
"maskmovq\t{%2, %1|%1, %2}"
|
||||
[(set_attr "type" "mmxcvt")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_expand "mmx_emms"
|
||||
|
|
|
@ -4021,6 +4021,7 @@
|
|||
(set_attr "amdfam10_decode" "vector,double,*")
|
||||
(set_attr "bdver1_decode" "double,direct,*")
|
||||
(set_attr "btver2_decode" "double,double,double")
|
||||
(set_attr "znver1_decode" "double,double,double")
|
||||
(set_attr "prefix" "orig,orig,maybe_evex")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
|
@ -4413,6 +4414,7 @@
|
|||
(set_attr "amdfam10_decode" "vector,double,*")
|
||||
(set_attr "bdver1_decode" "double,direct,*")
|
||||
(set_attr "btver2_decode" "double,double,double")
|
||||
(set_attr "znver1_decode" "double,double,double")
|
||||
(set_attr "prefix" "orig,orig,maybe_evex")
|
||||
(set_attr "mode" "DF")])
|
||||
|
||||
|
@ -13404,6 +13406,7 @@
|
|||
(set (attr "length_vex")
|
||||
(symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "znver1_decode" "vector")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "sse_ldmxcsr"
|
||||
|
@ -14341,6 +14344,7 @@
|
|||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "prefix" "orig,orig,vex")
|
||||
(set_attr "btver2_decode" "vector,vector,vector")
|
||||
(set_attr "znver1_decode" "vector,vector,vector")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;; Mode attribute used by `vmovntdqa' pattern
|
||||
|
@ -14376,6 +14380,7 @@
|
|||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "prefix" "orig,orig,vex")
|
||||
(set_attr "btver2_decode" "vector,vector,vector")
|
||||
(set_attr "znver1_decode" "vector,vector,vector")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
|
||||
|
|
|
@ -59,7 +59,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
|
|||
that can be partly masked by careful scheduling of moves. */
|
||||
DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
|
||||
| m_BDVER | m_GENERIC)
|
||||
| m_BDVER | m_ZNVER1 | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
|
||||
are resolved on SSE register parts instead of whole registers, so we may
|
||||
|
@ -97,19 +97,19 @@ DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
|
|||
conditional jump instruction for 32 bit TARGET.
|
||||
FIXME: revisit for generic. */
|
||||
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32",
|
||||
m_CORE_ALL | m_BDVER)
|
||||
m_CORE_ALL | m_BDVER | m_ZNVER1)
|
||||
|
||||
/* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent
|
||||
conditional jump instruction for TARGET_64BIT.
|
||||
FIXME: revisit for generic. */
|
||||
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_ZNVER1)
|
||||
|
||||
/* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a
|
||||
subsequent conditional jump instruction when the condition jump
|
||||
check sign flag (SF) or overflow flag (OF). */
|
||||
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_ZNVER1)
|
||||
|
||||
/* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional
|
||||
jump instruction when the alu instruction produces the CCFLAG consumed by
|
||||
|
@ -126,7 +126,7 @@ DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
|
|||
during reassociation of fp computation. */
|
||||
DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
|
||||
m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL |m_INTEL | m_BDVER1
|
||||
| m_BDVER2 | m_GENERIC)
|
||||
| m_BDVER2 | m_ZNVER1 | m_GENERIC)
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function prologue, epilogue and function calling sequences. */
|
||||
|
@ -282,7 +282,7 @@ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
|
|||
DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
|
||||
| m_KNL | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
|
||||
| m_BTVER | m_GENERIC)
|
||||
| m_BTVER | m_ZNVER1 | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */
|
||||
DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
|
||||
|
@ -334,19 +334,19 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
|
|||
/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead
|
||||
of a sequence loading registers by parts. */
|
||||
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_AMDFAM10 | m_BDVER
|
||||
| m_BTVER | m_SILVERMONT | m_KNL | m_INTEL | m_GENERIC)
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL
|
||||
| m_INTEL | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER1 | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead
|
||||
of a sequence loading registers by parts. */
|
||||
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_SILVERMONT
|
||||
| m_KNL | m_INTEL | m_GENERIC)
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL
|
||||
| m_INTEL | m_BDVER | m_ZNVER1 | m_GENERIC)
|
||||
|
||||
/* Use packed single precision instructions where posisble. I.e. movups instead
|
||||
of movupd. */
|
||||
DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal",
|
||||
m_BDVER)
|
||||
m_BDVER | m_ZNVER1)
|
||||
|
||||
/* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores. */
|
||||
DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",
|
||||
|
@ -355,7 +355,8 @@ DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",
|
|||
/* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to
|
||||
xorps/xorpd and other variants. */
|
||||
DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_GENERIC)
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER1
|
||||
| m_GENERIC)
|
||||
|
||||
/* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer
|
||||
to SSE registers. If disabled, the moves will be done by storing
|
||||
|
@ -415,11 +416,12 @@ DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL, "256_unaligned_load_optimal",
|
|||
/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if false, unaligned stores are
|
||||
split. */
|
||||
DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_store_optimal",
|
||||
~(m_NEHALEM | m_SANDYBRIDGE | m_BDVER | m_GENERIC))
|
||||
~(m_NEHALEM | m_SANDYBRIDGE | m_BDVER | m_ZNVER1 | m_GENERIC))
|
||||
|
||||
/* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
|
||||
the auto-vectorizer. */
|
||||
DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2)
|
||||
DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2
|
||||
| m_ZNVER1)
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Historical relics: tuning flags that helps a specific old CPU designs */
|
||||
|
|
|
@ -0,0 +1,973 @@
|
|||
;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GCC.
|
||||
;;
|
||||
;; GCC is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 3, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; GCC is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GCC; see the file COPYING3. If not see
|
||||
;; <http://www.gnu.org/licenses/>.
|
||||
;;
|
||||
|
||||
(define_attr "znver1_decode" "direct,vector,double"
|
||||
(const_string "direct"))
|
||||
|
||||
;; AMD znver1 Scheduling
|
||||
;; Modeling automatons for zen decoders, integer execution pipes,
|
||||
;; AGU pipes and floating point execution units.
|
||||
(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu")
|
||||
|
||||
;; Decoders unit has 4 decoders and all of them can decode fast path
|
||||
;; and vector type instructions.
|
||||
(define_cpu_unit "znver1-decode0" "znver1")
|
||||
(define_cpu_unit "znver1-decode1" "znver1")
|
||||
(define_cpu_unit "znver1-decode2" "znver1")
|
||||
(define_cpu_unit "znver1-decode3" "znver1")
|
||||
|
||||
;; Currently blocking all decoders for vector path instructions as
|
||||
;; they are dispatched separetely as microcode sequence.
|
||||
;; Fix me: Need to revisit this.
|
||||
(define_reservation "znver1-vector" "znver1-decode0+znver1-decode1+znver1-decode2+znver1-decode3")
|
||||
|
||||
;; Direct instructions can be issued to any of the four decoders.
|
||||
(define_reservation "znver1-direct" "znver1-decode0|znver1-decode1|znver1-decode2|znver1-decode3")
|
||||
|
||||
;; Fix me: Need to revisit this later to simulate fast path double behaviour.
|
||||
(define_reservation "znver1-double" "znver1-direct")
|
||||
|
||||
|
||||
;; Integer unit 4 ALU pipes.
|
||||
(define_cpu_unit "znver1-ieu0" "znver1_ieu")
|
||||
(define_cpu_unit "znver1-ieu1" "znver1_ieu")
|
||||
(define_cpu_unit "znver1-ieu2" "znver1_ieu")
|
||||
(define_cpu_unit "znver1-ieu3" "znver1_ieu")
|
||||
(define_reservation "znver1-ieu" "znver1-ieu0|znver1-ieu1|znver1-ieu2|znver1-ieu3")
|
||||
|
||||
;; 2 AGU pipes.
|
||||
(define_cpu_unit "znver1-agu0" "znver1_agu")
|
||||
(define_cpu_unit "znver1-agu1" "znver1_agu")
|
||||
(define_reservation "znver1-agu-reserve" "znver1-agu0|znver1-agu1")
|
||||
|
||||
(define_reservation "znver1-load" "znver1-agu-reserve")
|
||||
(define_reservation "znver1-store" "znver1-agu-reserve")
|
||||
|
||||
;; vectorpath (microcoded) instructions are single issue instructions.
|
||||
;; So, they occupy all the integer units.
|
||||
(define_reservation "znver1-ivector" "znver1-ieu0+znver1-ieu1
|
||||
+znver1-ieu2+znver1-ieu3
|
||||
+znver1-agu0+znver1-agu1")
|
||||
|
||||
;; Floating point unit 4 FP pipes.
|
||||
(define_cpu_unit "znver1-fp0" "znver1_fp")
|
||||
(define_cpu_unit "znver1-fp1" "znver1_fp")
|
||||
(define_cpu_unit "znver1-fp2" "znver1_fp")
|
||||
(define_cpu_unit "znver1-fp3" "znver1_fp")
|
||||
|
||||
(define_reservation "znver1-fpu" "znver1-fp0|znver1-fp1|znver1-fp2|znver1-fp3")
|
||||
|
||||
(define_reservation "znver1-fvector" "znver1-fp0+znver1-fp1
|
||||
+znver1-fp2+znver1-fp3
|
||||
+znver1-agu0+znver1-agu1")
|
||||
|
||||
;; Call instruction
|
||||
(define_insn_reservation "znver1_call" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(eq_attr "type" "call,callv"))
|
||||
"znver1-double,znver1-store,znver1-ieu0|znver1-ieu3")
|
||||
|
||||
;; General instructions
|
||||
(define_insn_reservation "znver1_push" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "push")
|
||||
(eq_attr "memory" "none,unknown")))
|
||||
"znver1-direct,znver1-store")
|
||||
|
||||
(define_insn_reservation "znver1_push_store" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "push")
|
||||
(eq_attr "memory" "store")))
|
||||
"znver1-direct,znver1-store")
|
||||
|
||||
(define_insn_reservation "znver1_push_both" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "push")
|
||||
(eq_attr "memory" "both")))
|
||||
"znver1-direct,znver1-load,znver1-store")
|
||||
|
||||
;; Leave
|
||||
(define_insn_reservation "znver1_leave" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(eq_attr "type" "leave"))
|
||||
"znver1-double,znver1-ieu, znver1-store")
|
||||
|
||||
;; Integer Instructions or General intructions
|
||||
;; Multiplications
|
||||
;; Reg operands
|
||||
(define_insn_reservation "znver1_imul" 3
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "imul")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-ieu1")
|
||||
|
||||
(define_insn_reservation "znver1_imul_mem" 7
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "imul")
|
||||
(eq_attr "memory" "!none")))
|
||||
"znver1-direct,znver1-load, znver1-ieu1")
|
||||
|
||||
;; Divisions
|
||||
;; Reg operands
|
||||
(define_insn_reservation "znver1_idiv_DI" 41
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-ieu2*41")
|
||||
|
||||
(define_insn_reservation "znver1_idiv_SI" 25
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-ieu2*25")
|
||||
|
||||
(define_insn_reservation "znver1_idiv_HI" 17
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-ieu2*17")
|
||||
|
||||
(define_insn_reservation "znver1_idiv_QI" 12
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(and (eq_attr "mode" "QI")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,znver1-ieu2*12")
|
||||
|
||||
;; Mem operands
|
||||
(define_insn_reservation "znver1_idiv_mem_DI" 45
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-load,znver1-ieu2*41")
|
||||
|
||||
(define_insn_reservation "znver1_idiv_mem_SI" 29
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-load,znver1-ieu2*25")
|
||||
|
||||
(define_insn_reservation "znver1_idiv_mem_HI" 21
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-load,znver1-ieu2*17")
|
||||
|
||||
(define_insn_reservation "znver1_idiv_mem_QI" 16
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(and (eq_attr "mode" "QI")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,znver1-load,znver1-ieu2*12")
|
||||
|
||||
;; STR ISHIFT which are micro coded.
|
||||
;; Fix me: Latency need to be rechecked.
|
||||
(define_insn_reservation "znver1_str_ishift" 6
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "str,ishift")
|
||||
(eq_attr "memory" "both,store")))
|
||||
"znver1-vector,znver1-ivector")
|
||||
;; MOV - integer moves
|
||||
(define_insn_reservation "znver1_load_imov_double" 2
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "znver1_decode" "double")
|
||||
(and (eq_attr "type" "imovx")
|
||||
(eq_attr "memory" "none,load"))))
|
||||
"znver1-double,znver1-ieu")
|
||||
|
||||
(define_insn_reservation "znver1_load_imov_direct" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "imov,imovx")
|
||||
(eq_attr "memory" "none,load")))
|
||||
"znver1-direct,znver1-ieu")
|
||||
|
||||
;; INTEGER/GENERAL instructions
|
||||
;; register/imm operands only: ALU, ICMP, NEG, NOT, ROTATE, ISHIFT, TEST
|
||||
(define_insn_reservation "znver1_insn" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec")
|
||||
(eq_attr "memory" "none,unknown")))
|
||||
"znver1-direct,znver1-ieu")
|
||||
|
||||
(define_insn_reservation "znver1_insn_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-direct,znver1-load,znver1-ieu")
|
||||
|
||||
(define_insn_reservation "znver1_insn_store" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec")
|
||||
(eq_attr "memory" "store")))
|
||||
"znver1-direct,znver1-ieu,znver1-store")
|
||||
|
||||
(define_insn_reservation "znver1_insn_both" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec")
|
||||
(eq_attr "memory" "both")))
|
||||
"znver1-direct,znver1-load,znver1-ieu,znver1-store")
|
||||
|
||||
;; Fix me: Other vector type insns keeping latency 6 as of now.
|
||||
(define_insn_reservation "znver1_ieu_vector" 6
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(eq_attr "type" "other,str,multi"))
|
||||
"znver1-vector,znver1-ivector")
|
||||
|
||||
;; ALU1 register operands.
|
||||
(define_insn_reservation "znver1_alu1_vector" 3
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "znver1_decode" "vector")
|
||||
(and (eq_attr "type" "alu1")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"znver1-vector,znver1-ivector")
|
||||
|
||||
(define_insn_reservation "znver1_alu1_double" 2
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "znver1_decode" "double")
|
||||
(and (eq_attr "type" "alu1")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"znver1-double,znver1-ieu")
|
||||
|
||||
(define_insn_reservation "znver1_alu1_direct" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "znver1_decode" "direct")
|
||||
(and (eq_attr "type" "alu1")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"znver1-direct,znver1-ieu")
|
||||
|
||||
;; Branches : Fix me need to model conditional branches.
|
||||
(define_insn_reservation "znver1_branch" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "ibr")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct")
|
||||
|
||||
;; Indirect branches check latencies.
|
||||
(define_insn_reservation "znver1_indirect_branch_mem" 6
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "ibr")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-vector,znver1-ivector")
|
||||
|
||||
;; LEA executes in ALU units with 1 cycle latency.
|
||||
(define_insn_reservation "znver1_lea" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(eq_attr "type" "lea"))
|
||||
"znver1-direct,znver1-ieu")
|
||||
|
||||
;; Other integer instrucions
|
||||
(define_insn_reservation "znver1_idirect" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "none,unknown")))
|
||||
"znver1-direct,znver1-ieu")
|
||||
|
||||
;; Floating point
|
||||
(define_insn_reservation "znver1_fp_cmov" 6
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(eq_attr "type" "fcmov"))
|
||||
"znver1-vector,znver1-fvector")
|
||||
|
||||
(define_insn_reservation "znver1_fp_mov_direct_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "znver1_decode" "direct")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,znver1-fp3|znver1-fp1")
|
||||
|
||||
(define_insn_reservation "znver1_fp_mov_direct_store" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "znver1_decode" "direct")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "store"))))
|
||||
"znver1-direct,znver1-fp2|znver1-fp3,znver1-store")
|
||||
|
||||
(define_insn_reservation "znver1_fp_mov_double" 4
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "znver1_decode" "double")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_fp_mov_double_load" 9
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "znver1_decode" "double")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_fp_mov_direct" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(eq_attr "type" "fmov"))
|
||||
"znver1-direct,znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_fp_spc_direct" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "fpspc")
|
||||
(eq_attr "memory" "store")))
|
||||
"znver1-direct,znver1-fp3,znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_fp_insn_vector" 6
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "znver1_decode" "vector")
|
||||
(eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov")))
|
||||
"znver1-vector,znver1-fvector")
|
||||
|
||||
;; FABS
|
||||
(define_insn_reservation "znver1_fp_fsgn" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(eq_attr "type" "fsgn"))
|
||||
"znver1-direct,znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_fp_fcmp" 2
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "znver1_decode" "double")
|
||||
(eq_attr "type" "fcmp"))))
|
||||
"znver1-double,znver1-fp0,znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_fp_fcmp_load" 6
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "znver1_decode" "double")
|
||||
(eq_attr "type" "fcmp"))))
|
||||
"znver1-double,znver1-load, znver1-fp0,znver1-fp2")
|
||||
|
||||
;;FADD FSUB FMUL
|
||||
(define_insn_reservation "znver1_fp_op_mul" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "fop,fmul")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-fp0*5")
|
||||
|
||||
(define_insn_reservation "znver1_fp_op_mul_load" 9
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "fop,fmul")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-direct,znver1-load,znver1-fp0*5")
|
||||
|
||||
(define_insn_reservation "znver1_fp_op_imul_load" 13
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "fop,fmul")
|
||||
(and (eq_attr "fp_int_src" "true")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fp3,znver1-fp0")
|
||||
|
||||
(define_insn_reservation "znver1_fp_op_div" 15
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "fdiv")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-fp3*15")
|
||||
|
||||
(define_insn_reservation "znver1_fp_op_div_load" 19
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "fdiv")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-direct,znver1-load,znver1-fp3*15")
|
||||
|
||||
(define_insn_reservation "znver1_fp_op_idiv_load" 24
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "fdiv")
|
||||
(and (eq_attr "fp_int_src" "true")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fp3*19")
|
||||
|
||||
;; MMX, SSE, SSEn.n, AVX, AVX2 instructions
|
||||
(define_insn_reservation "znver1_fp_insn" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(eq_attr "type" "mmx"))
|
||||
"znver1-direct,znver1-fpu")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_add" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxadd")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_add_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxadd")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_cmp" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxcmp")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-fp0|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_cmp_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxcmp")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-direct,znver1-load,znver1-fp0|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_cvt_pck_shuf" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-fp1|znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_cvt_pck_shuf_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_shift_move" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxshft,mmxmov")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_shift_move_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxshft,mmxmov")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-direct,znver1-load,znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_move_store" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxshft,mmxmov")
|
||||
(eq_attr "memory" "store,both")))
|
||||
"znver1-direct,znver1-fp2,znver1-store")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_mul" 3
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxmul")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-fp0*3")
|
||||
|
||||
(define_insn_reservation "znver1_mmx_load" 7
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "mmxmul")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-direct,znver1-load,znver1-fp0*3")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_log" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "type" "sselog")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-fpu")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_log_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "type" "sselog")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fpu")
|
||||
|
||||
(define_insn_reservation "znver1_sse_log" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "sselog")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-fpu")
|
||||
|
||||
(define_insn_reservation "znver1_sse_log_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "sselog")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-direct,znver1-load,znver1-fpu")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_log1" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "type" "sselog1")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-fp1|znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_log1_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "type" "sselog1")
|
||||
(eq_attr "memory" "!none"))))
|
||||
"znver1-double,znver1-load,znver1-fp1|znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_sse_log1" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "sselog1")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-fp1|znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_sse_log1_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "sselog1")
|
||||
(eq_attr "memory" "!none")))
|
||||
"znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_sse_comi" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF")
|
||||
(and (eq_attr "prefix" "!vex")
|
||||
(and (eq_attr "prefix_extra" "0")
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
(eq_attr "memory" "none"))))))
|
||||
"znver1-direct,znver1-fp0|znver1-fp1")
|
||||
|
||||
(define_insn_reservation "znver1_sse_comi_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF")
|
||||
(and (eq_attr "prefix" "!vex")
|
||||
(and (eq_attr "prefix_extra" "0")
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
(eq_attr "memory" "load"))))))
|
||||
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
|
||||
|
||||
(define_insn_reservation "znver1_sse_comi_double" 2
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(and (eq_attr "prefix" "vex")
|
||||
(and (eq_attr "prefix_extra" "0")
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
(eq_attr "memory" "none"))))))
|
||||
"znver1-double,znver1-fp0|znver1-fp1")
|
||||
|
||||
(define_insn_reservation "znver1_sse_comi_double_load" 7
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(and (eq_attr "prefix" "vex")
|
||||
(and (eq_attr "prefix_extra" "0")
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
(eq_attr "memory" "load"))))))
|
||||
"znver1-double,znver1-load,znver1-fp0|znver1-fp1")
|
||||
|
||||
(define_insn_reservation "znver1_sse_test" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "prefix_extra" "1")
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
(eq_attr "memory" "none")))))
|
||||
"znver1-direct,znver1-fp1|znver1-fp2")
|
||||
|
||||
(define_insn_reservation "znver1_sse_test_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "prefix_extra" "1")
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
(eq_attr "memory" "load")))))
|
||||
"znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
|
||||
|
||||
;; SSE moves
|
||||
;; Fix me: Need to revist this again some of the moves may be restricted
|
||||
;; to some fpu pipes.
|
||||
(define_insn_reservation "znver1_sse_mov" 2
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(and (eq_attr "isa" "avx")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(eq_attr "memory" "none")))))
|
||||
"znver1-direct,znver1-ieu0")
|
||||
|
||||
(define_insn_reservation "znver1_avx_mov" 2
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "TI")
|
||||
(and (eq_attr "isa" "avx")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (match_operand:SI 1 "register_operand")
|
||||
(eq_attr "memory" "none"))))))
|
||||
"znver1-direct,znver1-ieu2")
|
||||
|
||||
(define_insn_reservation "znver1_sseavx_mov" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,znver1-fpu")
|
||||
|
||||
(define_insn_reservation "znver1_sseavx_mov_store" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(eq_attr "memory" "store"))))
|
||||
"znver1-direct,znver1-fpu,znver1-store")
|
||||
|
||||
(define_insn_reservation "znver1_sseavx_mov_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,znver1-fpu")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_mov" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-fpu")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_mov_store" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(eq_attr "memory" "store"))))
|
||||
"znver1-double,znver1-fpu,znver1-store")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_mov_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fpu")
|
||||
|
||||
;; SSE add
|
||||
(define_insn_reservation "znver1_sseavx_add" 3
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,znver1-fp2|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_sseavx_add_load" 7
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,znver1-fp2|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_add" 3
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-fp2|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_add_load" 7
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fp2|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_sseavx_fma" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF")
|
||||
(and (eq_attr "type" "ssemuladd")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,(znver1-fp0+znver1-fp3)|(znver1-fp1+znver1-fp3)")
|
||||
|
||||
(define_insn_reservation "znver1_sseavx_fma_load" 9
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF")
|
||||
(and (eq_attr "type" "ssemuladd")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,(znver1-fp0+znver1-fp3)|(znver1-fp1+znver1-fp3)")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_fma" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF")
|
||||
(and (eq_attr "type" "ssemuladd")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,(znver1-fp0+znver1-fp3)|(znver1-fp1+znver1-fp3)")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_fma_load" 9
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF")
|
||||
(and (eq_attr "type" "ssemuladd")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,(znver1-fp0+znver1-fp3)|(znver1-fp1+znver1-fp3)")
|
||||
|
||||
(define_insn_reservation "znver1_sseavx_iadd" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "DI,TI")
|
||||
(and (eq_attr "type" "sseiadd")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_sseavx_iadd_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "DI,TI")
|
||||
(and (eq_attr "type" "sseiadd")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_iadd" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "OI")
|
||||
(and (eq_attr "type" "sseiadd")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-fp0|znver1-fp1|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_avx256_iadd_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "OI")
|
||||
(and (eq_attr "type" "sseiadd")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3")
|
||||
|
||||
;; SSE conversions.
|
||||
(define_insn_reservation "znver1_ssecvtsf_si_load" 9
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (match_operand:SF 1 "memory_operand")
|
||||
(eq_attr "memory" "load")))))
|
||||
"znver1-double,znver1-load,znver1-fp3,znver1-ieu0")
|
||||
|
||||
(define_insn_reservation "znver1_ssecvtdf_si" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(and (match_operand:DF 1 "register_operand")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(eq_attr "memory" "none")))))
|
||||
"znver1-double,znver1-fp3,znver1-ieu0")
|
||||
|
||||
(define_insn_reservation "znver1_ssecvtdf_si_load" 9
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (match_operand:DF 1 "memory_operand")
|
||||
(eq_attr "memory" "load")))))
|
||||
"znver1-double,znver1-load,znver1-fp3,znver1-ieu0")
|
||||
|
||||
;; All other used ssecvt fp3 pipes
|
||||
;; Check: Need to revisit this again.
|
||||
;; Some SSE converts may use different pipe combinations.
|
||||
(define_insn_reservation "znver1_ssecvt" 4
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(eq_attr "memory" "none")))
|
||||
"znver1-direct,znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_ssecvt_load" 8
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(eq_attr "memory" "load")))
|
||||
"znver1-direct,znver1-load,znver1-fp3")
|
||||
|
||||
;; SSE div
|
||||
(define_insn_reservation "znver1_ssediv_ss_ps" 10
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V4SF,SF")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,znver1-fp3*10")
|
||||
|
||||
(define_insn_reservation "znver1_ssediv_ss_ps_load" 14
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V4SF,SF")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,znver1-fp3*10")
|
||||
|
||||
(define_insn_reservation "znver1_ssediv_sd_pd" 13
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V2DF,DF")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,znver1-fp3*13")
|
||||
|
||||
(define_insn_reservation "znver1_ssediv_sd_pd_load" 17
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V2DF,DF")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,znver1-fp3*13")
|
||||
|
||||
(define_insn_reservation "znver1_ssediv_avx256_ps" 12
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"znver1-double,znver1-fp3*12")
|
||||
|
||||
(define_insn_reservation "znver1_ssediv_avx256_ps_load" 16
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fp3*12")
|
||||
|
||||
(define_insn_reservation "znver1_ssediv_avx256_pd" 15
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-fp3*15")
|
||||
|
||||
(define_insn_reservation "znver1_ssediv_avx256_pd_load" 18
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fp3*15")
|
||||
;; SSE MUL
|
||||
(define_insn_reservation "znver1_ssemul_ss_ps" 3
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V4SF,SF")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,(znver1-fp0|znver1-fp1)*3")
|
||||
|
||||
(define_insn_reservation "znver1_ssemul_ss_ps_load" 7
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V4SF,SF")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
|
||||
|
||||
(define_insn_reservation "znver1_ssemul_avx256_ps" 3
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,(znver1-fp0|znver1-fp1)*3")
|
||||
|
||||
(define_insn_reservation "znver1_ssemul_avx256_ps_load" 7
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*3")
|
||||
|
||||
(define_insn_reservation "znver1_ssemul_sd_pd" 4
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V2DF,DF")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,(znver1-fp0|znver1-fp1)*4")
|
||||
|
||||
(define_insn_reservation "znver1_ssemul_sd_pd_load" 8
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V2DF,DF")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*4")
|
||||
|
||||
(define_insn_reservation "znver1_ssemul_avx256_pd" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "none")))))
|
||||
"znver1-double,(znver1-fp0|znver1-fp1)*4")
|
||||
|
||||
(define_insn_reservation "znver1_ssemul_avx256_pd_load" 8
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*4")
|
||||
|
||||
;;SSE imul
|
||||
(define_insn_reservation "znver1_sseimul" 3
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "TI")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,znver1-fp0*3")
|
||||
|
||||
(define_insn_reservation "znver1_sseimul_avx256" 4
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "OI")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-fp0*4")
|
||||
|
||||
(define_insn_reservation "znver1_sseimul_load" 7
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "TI")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,znver1-fp0*3")
|
||||
|
||||
(define_insn_reservation "znver1_sseimul_avx256_load" 8
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "OI")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fp0*4")
|
||||
|
||||
(define_insn_reservation "znver1_sseimul_di" 4
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssemul"))))
|
||||
"znver1-direct,znver1-fp0*4")
|
||||
|
||||
(define_insn_reservation "znver1_sseimul_load_di" 8
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,znver1-fp0*4")
|
||||
|
||||
;; SSE compares
|
||||
(define_insn_reservation "znver1_sse_cmp" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,znver1-fp0|znver1-fp1")
|
||||
|
||||
(define_insn_reservation "znver1_sse_cmp_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
|
||||
|
||||
(define_insn_reservation "znver1_sse_cmp_avx256" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-fp0|znver1-fp1")
|
||||
|
||||
(define_insn_reservation "znver1_sse_cmp_avx256_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fp0|znver1-fp1")
|
||||
|
||||
(define_insn_reservation "znver1_sse_icmp" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "QI,HI,SI,DI,TI")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-direct,znver1-fp0|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_sse_icmp_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "QI,HI,SI,DI,TI")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-direct,znver1-load,znver1-fp0|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_sse_icmp_avx256" 1
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "OI")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "none"))))
|
||||
"znver1-double,znver1-fp0|znver1-fp3")
|
||||
|
||||
(define_insn_reservation "znver1_sse_icmp_avx256_load" 5
|
||||
(and (eq_attr "cpu" "znver1")
|
||||
(and (eq_attr "mode" "OI")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "load"))))
|
||||
"znver1-double,znver1-load,znver1-fp0|znver1-fp3")
|
||||
|
|
@ -22408,6 +22408,13 @@ supersets BMI, BMI2, TBM, F16C, FMA, FMA4, FSGSBASE, AVX, AVX2, XOP, LWP,
|
|||
AES, PCL_MUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1,
|
||||
SSE4.2, ABM and 64-bit instruction set extensions.
|
||||
|
||||
@item znver1
|
||||
AMD Family 17h core based CPUs with x86-64 instruction set support. (This
|
||||
supersets BMI, BMI2, F16C, FMA, FSGSBASE, AVX, AVX2, ADCX, RDSEED, MWAITX,
|
||||
SHA, CLZERO, AES, PCL_MUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3,
|
||||
SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, and 64-bit
|
||||
instruction set extensions.
|
||||
|
||||
@item btver1
|
||||
CPUs based on AMD Family 14h cores with x86-64 instruction set support. (This
|
||||
supersets MMX, SSE, SSE2, SSE3, SSSE3, SSE4A, CX16, ABM and 64-bit
|
||||
|
|
Loading…
Reference in New Issue