config.gcc: Support "knm".

gcc/

        * config.gcc: Support "knm".
        * config/i386/driver-i386.c (host_detect_local_cpu): Detect "knm".
        * config/i386/i386-c.c (ix86_target_macros_internal): Handle
        PROCESSOR_KNM.
        * config/i386/i386.c (m_KNM): Define.
        (processor_target_table): Add "knm".
        (PTA_KNM): Define.
        (ix86_option_override_internal): Add "knm".
        (ix86_issue_rate): Add PROCESSOR_KNM.
        (ix86_adjust_cost): Ditto.
        (ia32_multipass_dfa_lookahead): Ditto.
        (get_builtin_code_for_version): Handle PROCESSOR_KNM.
        (fold_builtin_cpu): Add M_INTEL_KNM.
        * config/i386/i386.h (processor_costs): Define TARGET_KNM.
        (processor_type): Add PROCESSOR_KNM.
         * config/i386/x86-tune.def: Add m_KNM.
        * doc/invoke.texi: Add knm as x86 -march=/-mtune= CPU type.

libgcc/
        * config/i386/cpuinfo.h (processor_types): Add INTEL_KNM.
        * config/i386/cpuinfo.c (get_intel_cpu): Detect Knights Mill.

gcc/testsuite/

        * gcc.target/i386/builtin_target.c: Test knm.
        * gcc.target/i386/funcspec-56.inc: Test arch=knm.

From-SVN: r253013
This commit is contained in:
Sebastian Peryt 2017-09-20 15:47:30 +02:00 committed by Uros Bizjak
parent 2288ea2381
commit cace2309d4
14 changed files with 123 additions and 47 deletions

View File

@ -1,3 +1,23 @@
2017-09-20 Sebastian Peryt <sebastian.peryt@intel.com>
* config.gcc: Support "knm".
* config/i386/driver-i386.c (host_detect_local_cpu): Detect "knm".
* config/i386/i386-c.c (ix86_target_macros_internal): Handle
PROCESSOR_KNM.
* config/i386/i386.c (m_KNM): Define.
(processor_target_table): Add "knm".
(PTA_KNM): Define.
(ix86_option_override_internal): Add "knm".
(ix86_issue_rate): Add PROCESSOR_KNM.
(ix86_adjust_cost): Ditto.
(ia32_multipass_dfa_lookahead): Ditto.
(get_builtin_code_for_version): Handle PROCESSOR_KNM.
(fold_builtin_cpu): Add M_INTEL_KNM.
* config/i386/i386.h (processor_costs): Define TARGET_KNM.
(processor_type): Add PROCESSOR_KNM.
* config/i386/x86-tune.def: Add m_KNM.
* doc/invoke.texi: Add knm as x86 -march=/-mtune= CPU type.
2017-09-20 Richard Biener <rguenther@suse.de>
PR tree-optimization/80213
@ -97,7 +117,7 @@
* rtl.h (get_stack_check_protect): Prototype.
* target.def (stack_clash_protection_final_dynamic_probe): New hook.
* targhooks.c (default_stack_clash_protection_final_dynamic_probe): New.
* targhooks.h (default_stack_clash_protection_final_dynamic_probe):
* targhooks.h (default_stack_clash_protection_final_dynamic_probe):
Prototype.
* doc/tm.texi.in (TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE):
Add @hook.
@ -312,8 +332,8 @@
2017-09-17 Daniel Santos <daniel.santos@pobox.com>
config/i386/i386.c: (xlogue_layout::STUB_NAME_MAX_LEN): Increase to 20
bytes.
* config/i386/i386.c (xlogue_layout::STUB_NAME_MAX_LEN):
Increase to 20 bytes.
(xlogue_layout::s_stub_names): Add an additional size-2 diminsion.
(xlogue_layout::get_stub_name): Modify to select the appropairate sse
or avx version of the stub.

View File

@ -623,7 +623,7 @@ pentium4 pentium4m pentiumpro prescott lakemont"
x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \
sandybridge ivybridge haswell broadwell bonnell silvermont knl \
sandybridge ivybridge haswell broadwell bonnell silvermont knl knm \
skylake-avx512 x86-64 native"
# Additional x86 processors supported by --with-cpu=. Each processor

View File

@ -790,6 +790,10 @@ const char *host_detect_local_cpu (int argc, const char **argv)
/* Knights Landing. */
cpu = "knl";
break;
case 0x85:
/* Knights Mill. */
cpu = "knm";
break;
default:
if (arch)
{
@ -797,6 +801,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
/* Assume Knights Landing. */
if (has_avx512f)
cpu = "knl";
/* Assume Knights Mill */
else if (has_avx5124vnniw)
cpu = "knm";
/* Assume Skylake. */
else if (has_clflushopt)
cpu = "skylake";

View File

@ -176,6 +176,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__knl");
def_or_undef (parse_in, "__knl__");
break;
case PROCESSOR_KNM:
def_or_undef (parse_in, "__knm");
def_or_undef (parse_in, "__knm__");
break;
case PROCESSOR_SKYLAKE_AVX512:
def_or_undef (parse_in, "__skylake_avx512");
def_or_undef (parse_in, "__skylake_avx512__");
@ -292,6 +296,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
case PROCESSOR_KNL:
def_or_undef (parse_in, "__tune_knl__");
break;
case PROCESSOR_KNM:
def_or_undef (parse_in, "__tune_knm__");
break;
case PROCESSOR_SKYLAKE_AVX512:
def_or_undef (parse_in, "__tune_skylake_avx512__");
break;

View File

@ -2192,6 +2192,7 @@ const struct processor_costs *ix86_cost = &pentium_cost;
#define m_BONNELL (1U<<PROCESSOR_BONNELL)
#define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
#define m_KNL (1U<<PROCESSOR_KNL)
#define m_KNM (1U<<PROCESSOR_KNM)
#define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
#define m_INTEL (1U<<PROCESSOR_INTEL)
@ -2903,6 +2904,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
{"bonnell", &atom_cost, 16, 15, 16, 7, 16},
{"silvermont", &slm_cost, 16, 15, 16, 7, 16},
{"knl", &slm_cost, 16, 15, 16, 7, 16},
{"knm", &slm_cost, 16, 15, 16, 7, 16},
{"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
{"intel", &intel_cost, 16, 15, 16, 7, 16},
{"geode", &geode_cost, 0, 0, 0, 0, 0},
@ -5352,6 +5354,8 @@ ix86_option_override_internal (bool main_args_p,
(PTA_CORE2 | PTA_MOVBE)
#define PTA_SILVERMONT \
(PTA_WESTMERE | PTA_MOVBE)
#define PTA_KNM \
(PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ)
/* if this reaches 64, need to widen struct pta flags below */
@ -5422,6 +5426,7 @@ ix86_option_override_internal (bool main_args_p,
{"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
{"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
{"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
{"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
{"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
{"geode", PROCESSOR_GEODE, CPU_GEODE,
PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
@ -30282,6 +30287,7 @@ ix86_issue_rate (void)
case PROCESSOR_BONNELL:
case PROCESSOR_SILVERMONT:
case PROCESSOR_KNL:
case PROCESSOR_KNM:
case PROCESSOR_INTEL:
case PROCESSOR_K6:
case PROCESSOR_BTVER2:
@ -30648,6 +30654,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
case PROCESSOR_SILVERMONT:
case PROCESSOR_KNL:
case PROCESSOR_KNM:
case PROCESSOR_INTEL:
if (!reload_completed)
return cost;
@ -30719,6 +30726,7 @@ ia32_multipass_dfa_lookahead (void)
case PROCESSOR_BONNELL:
case PROCESSOR_SILVERMONT:
case PROCESSOR_KNL:
case PROCESSOR_KNM:
case PROCESSOR_INTEL:
/* Generally, we want haifa-sched:max_issue() to look ahead as far
as many instructions can be executed on a cycle, i.e.,
@ -33844,6 +33852,10 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
arg_str = "knl";
priority = P_PROC_AVX512F;
break;
case PROCESSOR_KNM:
arg_str = "knm";
priority = P_PROC_AVX512F;
break;
case PROCESSOR_SILVERMONT:
arg_str = "silvermont";
priority = P_PROC_SSE4_2;
@ -34527,6 +34539,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
M_AMD_BTVER1,
M_AMD_BTVER2,
M_AMDFAM17H,
M_INTEL_KNM,
M_CPU_SUBTYPE_START,
M_INTEL_COREI7_NEHALEM,
M_INTEL_COREI7_WESTMERE,
@ -34570,6 +34583,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
{"bonnell", M_INTEL_BONNELL},
{"silvermont", M_INTEL_SILVERMONT},
{"knl", M_INTEL_KNL},
{"knm", M_INTEL_KNM},
{"amdfam10h", M_AMDFAM10H},
{"barcelona", M_AMDFAM10H_BARCELONA},
{"shanghai", M_AMDFAM10H_SHANGHAI},

View File

@ -351,6 +351,7 @@ extern const struct processor_costs ix86_size_cost;
#define TARGET_BONNELL (ix86_tune == PROCESSOR_BONNELL)
#define TARGET_SILVERMONT (ix86_tune == PROCESSOR_SILVERMONT)
#define TARGET_KNL (ix86_tune == PROCESSOR_KNL)
#define TARGET_KNM (ix86_tune == PROCESSOR_KNM)
#define TARGET_SKYLAKE_AVX512 (ix86_tune == PROCESSOR_SKYLAKE_AVX512)
#define TARGET_INTEL (ix86_tune == PROCESSOR_INTEL)
#define TARGET_GENERIC (ix86_tune == PROCESSOR_GENERIC)
@ -2250,6 +2251,7 @@ enum processor_type
PROCESSOR_BONNELL,
PROCESSOR_SILVERMONT,
PROCESSOR_KNL,
PROCESSOR_KNM,
PROCESSOR_SKYLAKE_AVX512,
PROCESSOR_INTEL,
PROCESSOR_GEODE,

View File

@ -41,7 +41,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
/* X86_TUNE_SCHEDULE: Enable scheduling. */
DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
| m_INTEL | m_KNL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
| m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
/* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
on modern chips. Preffer stores affecting whole integer register
@ -49,7 +49,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
value over movb. */
DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
| m_KNL | m_AMD_MULTIPLE | m_GENERIC)
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC)
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
destinations to be 128bit to allow register renaming on 128bit SSE units,
@ -85,13 +85,13 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
partial dependencies. */
DEF_TUNE (X86_TUNE_MOVX, "movx",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
| m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)
| m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)
/* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
full sized loads. */
DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
| m_KNL | m_AMD_MULTIPLE | m_GENERIC)
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC)
/* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
conditional jump instruction for 32 bit TARGET.
@ -125,7 +125,7 @@ DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
during reassociation of fp computation. */
DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL |m_INTEL | m_BDVER1
m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL | m_KNM |m_INTEL | m_BDVER1
| m_BDVER2 | m_ZNVER1 | m_GENERIC)
/*****************************************************************************/
@ -145,7 +145,7 @@ DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
regression on mgrid due to IRA limitation leading to unecessary
use of the frame pointer in 32bit mode. */
DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
| m_ATHLON_K8)
/* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are
@ -207,8 +207,8 @@ DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL |m_INTEL |
m_ATHLON_K8 | m_AMDFAM10)
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM
|m_INTEL | m_ATHLON_K8 | m_AMDFAM10)
/*****************************************************************************/
/* Integer instruction selection tuning */
@ -231,22 +231,22 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO))
/* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions. */
DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
~(m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
| m_KNL | m_GENERIC))
| m_KNL | m_KNM | m_GENERIC))
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
| m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
| m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL
| m_INTEL)
| m_KNM | m_INTEL)
/* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */
DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr",
m_BONNELL | m_SILVERMONT | m_KNL)
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM)
/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
vector path on AMD machines.
@ -263,7 +263,7 @@ DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8",
/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
a conditional move. */
DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove",
m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL)
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
/* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such
as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */
@ -281,17 +281,17 @@ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
/* X86_TUNE_USE_SAHF: Controls use of SAHF. */
DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
| m_KNL | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
| m_BTVER | m_ZNVER1 | m_GENERIC)
/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */
DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
| m_K6))
/* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */
DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
| m_LAKEMONT | m_AMD_MULTIPLE | m_GENERIC)
/*****************************************************************************/
@ -308,7 +308,7 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
integer operand. */
DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
| m_SILVERMONT | m_KNL | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
| m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
/* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */
DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
@ -316,7 +316,7 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
/* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */
DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
| m_KNL | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
/*****************************************************************************/
/* SSE instruction selection tuning */
@ -330,13 +330,13 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead
of a sequence loading registers by parts. */
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM
| m_INTEL | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER1 | m_GENERIC)
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead
of a sequence loading registers by parts. */
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM
| m_INTEL | m_BDVER | m_ZNVER1 | m_GENERIC)
/* Use packed single precision instructions where posisble. I.e. movups instead
@ -375,7 +375,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
/* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
fp converts to destination register. */
DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",
m_SILVERMONT | m_KNL | m_INTEL)
m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
from FP to FP. This form of instructions avoids partial write to the
@ -389,7 +389,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
/* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */
DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL)
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
/* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to
execute 2 or more vector instructions in parallel. */
@ -550,4 +550,4 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in
if-converted sequence to one. */
DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC)
m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GENERIC)

View File

@ -25089,6 +25089,12 @@ SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,
BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, AVX512F, AVX512PF, AVX512ER and
AVX512CD instruction set support.
@item knm
Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,
BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, AVX512F, AVX512PF, AVX512ER, AVX512CD,
AVX5124VNNIW, AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support.
@item skylake-avx512
Intel Skylake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
SSSE3, SSE4.1, SSE4.2, POPCNT, PKU, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,

View File

@ -1,3 +1,8 @@
2017-09-20 Sebastian Peryt <sebastian.peryt@intel.com>
* gcc.target/i386/builtin_target.c: Test knm.
* gcc.target/i386/funcspec-56.inc: Test arch=knm.
2017-09-20 Richard Biener <rguenther@suse.de>
PR tree-optimization/77362

View File

@ -42,6 +42,10 @@ check_intel_cpu_model (unsigned int family, unsigned int model,
/* Knights Landing. */
assert (__builtin_cpu_is ("knl"));
break;
case 0x85:
/* Knights Mill */
assert (__builtin_cpu_is ("knm"));
break;
case 0x1a:
case 0x1e:
case 0x1f:

View File

@ -142,6 +142,7 @@ extern void test_arch_corei7 (void) __attribute__((__target__("arch=corei7")));
extern void test_arch_corei7_avx (void) __attribute__((__target__("arch=corei7-avx")));
extern void test_arch_core_avx2 (void) __attribute__((__target__("arch=core-avx2")));
extern void test_arch_knl (void) __attribute__((__target__("arch=knl")));
extern void test_arch_knm (void) __attribute__((__target__("arch=knm")));
extern void test_arch_skylake_avx512 (void) __attribute__((__target__("arch=skylake-avx512")));
extern void test_arch_k8 (void) __attribute__((__target__("arch=k8")));
extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3")));

View File

@ -1,30 +1,35 @@
2017-09-20 Sebastian Peryt <sebastian.peryt@intel.com>
* config/i386/cpuinfo.h (processor_types): Add INTEL_KNM.
* config/i386/cpuinfo.c (get_intel_cpu): Detect Knights Mill.
2017-09-17 Daniel Santos <daniel.santos@pobox.com>
config/i386/i386-asm.h (PASTE2): New macro.
* config/i386/i386-asm.h (PASTE2): New macro.
(ASMNAME): Modify to use PASTE2.
(MS2SYSV_STUB_PREFIX): New macro for isa prefix.
(MS2SYSV_STUB_BEGIN, MS2SYSV_STUB_END): New macros for stub headers.
config/i386/resms64.S: Rename to a header file, use MS2SYSV_STUB_BEGIN
* config/i386/resms64.S: Rename to a header file, use MS2SYSV_STUB_BEGIN
instead of HIDDEN_FUNC and MS2SYSV_STUB_END instead of FUNC_END.
config/i386/resms64f.S: Likewise.
config/i386/resms64fx.S: Likewise.
config/i386/resms64x.S: Likewise.
config/i386/savms64.S: Likewise.
config/i386/savms64f.S: Likewise.
config/i386/avx_resms64.S: New file that only defines a macro and
* config/i386/resms64f.S: Likewise.
* config/i386/resms64fx.S: Likewise.
* config/i386/resms64x.S: Likewise.
* config/i386/savms64.S: Likewise.
* config/i386/savms64f.S: Likewise.
* config/i386/avx_resms64.S: New file that only defines a macro and
includes it's corresponding header file.
config/i386/avx_resms64f.S: Likewise.
config/i386/avx_resms64fx.S: Likewise.
config/i386/avx_resms64x.S: Likewise.
config/i386/avx_savms64.S: Likewise.
config/i386/avx_savms64f.S: Likewise.
config/i386/sse_resms64.S: Likewise.
config/i386/sse_resms64f.S: Likewise.
config/i386/sse_resms64fx.S: Likewise.
config/i386/sse_resms64x.S: Likewise.
config/i386/sse_savms64.S: Likewise.
config/i386/sse_savms64f.S: Likewise.
config/i386/t-msabi: Modified to add avx and sse versions of stubs.
* config/i386/avx_resms64f.S: Likewise.
* config/i386/avx_resms64fx.S: Likewise.
* config/i386/avx_resms64x.S: Likewise.
* config/i386/avx_savms64.S: Likewise.
* config/i386/avx_savms64f.S: Likewise.
* config/i386/sse_resms64.S: Likewise.
* config/i386/sse_resms64f.S: Likewise.
* config/i386/sse_resms64fx.S: Likewise.
* config/i386/sse_resms64x.S: Likewise.
* config/i386/sse_savms64.S: Likewise.
* config/i386/sse_savms64f.S: Likewise.
* config/i386/t-msabi: Modified to add avx and sse versions of stubs.
2017-09-01 Olivier Hainque <hainque@adacore.com>

View File

@ -137,6 +137,10 @@ get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
/* Knights Landing. */
__cpu_model.__cpu_type = INTEL_KNL;
break;
case 0x85:
/* Knights Mill. */
__cpu_model.__cpu_type = INTEL_KNM;
break;
case 0x1a:
case 0x1e:
case 0x1f:

View File

@ -47,6 +47,7 @@ enum processor_types
AMD_BTVER1,
AMD_BTVER2,
AMDFAM17H,
INTEL_KNM,
CPU_TYPE_MAX
};