AVX512FP16: Initial support for AVX512FP16 feature and scalar _Float16 instructions.
gcc/ChangeLog: * common/config/i386/cpuinfo.h (get_available_features): Detect FEATURE_AVX512FP16. * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512FP16_SET, OPTION_MASK_ISA_AVX512FP16_UNSET, OPTION_MASK_ISA2_AVX512FP16_SET, OPTION_MASK_ISA2_AVX512FP16_UNSET): New. (OPTION_MASK_ISA2_AVX512BW_UNSET, OPTION_MASK_ISA2_AVX512BF16_UNSET): Add AVX512FP16. (ix86_handle_option): Handle -mavx512fp16. * common/config/i386/i386-cpuinfo.h (enum processor_features): Add FEATURE_AVX512FP16. * common/config/i386/i386-isas.h: Add entry for AVX512FP16. * config.gcc: Add avx512fp16intrin.h. * config/i386/avx512fp16intrin.h: New intrinsic header. * config/i386/cpuid.h: Add bit_AVX512FP16. * config/i386/i386-builtin-types.def: (FLOAT16): New primitive type. * config/i386/i386-builtins.c: Support _Float16 type for i386 backend. (ix86_register_float16_builtin_type): New function. (ix86_float16_type_node): New. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AVX512FP16__. * config/i386/i386-expand.c (ix86_expand_branch): Support HFmode. (ix86_prepare_fp_compare_args): Adjust TARGET_SSE_MATH && SSE_FLOAT_MODE_P to SSE_FLOAT_MODE_SSEMATH_OR_HF_P. (ix86_expand_fp_movcc): Ditto. * config/i386/i386-isa.def: Add PTA define for AVX512FP16. * config/i386/i386-options.c (isa2_opts): Add -mavx512fp16. (ix86_valid_target_attribute_inner_p): Add avx512fp16 attribute. * config/i386/i386.c (ix86_get_ssemov): Use vmovdqu16/vmovw/vmovsh for HFmode/HImode scalar or vector. (ix86_get_excess_precision): Use FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 when TARGET_AVX512FP16 existed. (sse_store_index): Use SFmode cost for HFmode cost. (inline_memory_move_cost): Add HFmode, and perfer SSE cost over GPR cost for HFmode. (ix86_hard_regno_mode_ok): Allow HImode in sse register. (ix86_mangle_type): Add manlging for _Float16 type. (inline_secondary_memory_needed): No memory is needed for 16bit movement between gpr and sse reg under TARGET_AVX512FP16. (ix86_multiplication_cost): Adjust TARGET_SSE_MATH && SSE_FLOAT_MODE_P to SSE_FLOAT_MODE_SSEMATH_OR_HF_P. (ix86_division_cost): Ditto. (ix86_rtx_costs): Ditto. (ix86_add_stmt_cost): Ditto. (ix86_optab_supported_p): Ditto. * config/i386/i386.h (VALID_AVX512F_SCALAR_MODE): Add HFmode. (SSE_FLOAT_MODE_SSEMATH_OR_HF_P): Add HFmode. (PTA_SAPPHIRERAPIDS): Add PTA_AVX512FP16. * config/i386/i386.md (mode): Add HFmode. (MODE_SIZE): Add HFmode. (isa): Add avx512fp16. (enabled): Handle avx512fp16. (ssemodesuffix): Add sh suffix for HFmode. (comm): Add mult, div. (plusminusmultdiv): New code iterator. (insn): Add mult, div. (*movhf_internal): Adjust for avx512fp16 instruction. (*movhi_internal): Ditto. (*cmpi<unord>hf): New define_insn for HFmode. (*ieee_s<ieee_maxmin>hf3): Likewise. (extendhf<mode>2): Likewise. (trunc<mode>hf2): Likewise. (float<floatunssuffix><mode>hf2): Likewise. (*<insn>hf): Likewise. (cbranchhf4): New expander. (movhfcc): Likewise. (<insn>hf3): Likewise. (mulhf3): Likewise. (divhf3): Likewise. * config/i386/i386.opt: Add mavx512fp16. * config/i386/immintrin.h: Include avx512fp16intrin.h. * doc/invoke.texi: Add mavx512fp16. * doc/extend.texi: Add avx512fp16 Usage Notes. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Add -mavx512fp16 in dg-options. * gcc.target/i386/avx-2.c: Ditto. * gcc.target/i386/avx512-check.h: Check cpuid for AVX512FP16. * gcc.target/i386/funcspec-56.inc: Add new target attribute check. * gcc.target/i386/sse-13.c: Add -mavx512fp16. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * lib/target-supports.exp: (check_effective_target_avx512fp16): New. * g++.target/i386/float16-1.C: New test. * g++.target/i386/float16-2.C: Ditto. * g++.target/i386/float16-3.C: Ditto. * gcc.target/i386/avx512fp16-12a.c: Ditto. * gcc.target/i386/avx512fp16-12b.c: Ditto. * gcc.target/i386/float16-3a.c: Ditto. * gcc.target/i386/float16-3b.c: Ditto. * gcc.target/i386/float16-4a.c: Ditto. * gcc.target/i386/float16-4b.c: Ditto. * gcc.target/i386/pr54855-12.c: Ditto. * g++.dg/other/i386-2.C: Ditto. * g++.dg/other/i386-3.C: Ditto. Co-Authored-By: H.J. Lu <hongjiu.lu@intel.com> Co-Authored-By: Liu Hongtao <hongtao.liu@intel.com> Co-Authored-By: Wang Hongyu <hongyu.wang@intel.com> Co-Authored-By: Xu Dianhong <dianhong.xu@intel.com>
This commit is contained in:
parent
f19a327077
commit
a68412117f
@ -731,6 +731,8 @@ get_available_features (struct __processor_model *cpu_model,
|
||||
set_feature (FEATURE_AVX5124FMAPS);
|
||||
if (edx & bit_AVX512VP2INTERSECT)
|
||||
set_feature (FEATURE_AVX512VP2INTERSECT);
|
||||
if (edx & bit_AVX512FP16)
|
||||
set_feature (FEATURE_AVX512FP16);
|
||||
}
|
||||
|
||||
__cpuid_count (7, 1, eax, ebx, ecx, edx);
|
||||
|
@ -82,6 +82,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_AVX5124VNNIW_SET OPTION_MASK_ISA2_AVX5124VNNIW
|
||||
#define OPTION_MASK_ISA_AVX512VBMI2_SET \
|
||||
(OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512F_SET)
|
||||
#define OPTION_MASK_ISA_AVX512FP16_SET OPTION_MASK_ISA_AVX512BW_SET
|
||||
#define OPTION_MASK_ISA2_AVX512FP16_SET OPTION_MASK_ISA2_AVX512FP16
|
||||
#define OPTION_MASK_ISA_AVX512VNNI_SET \
|
||||
(OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET)
|
||||
#define OPTION_MASK_ISA2_AVXVNNI_SET OPTION_MASK_ISA2_AVXVNNI
|
||||
@ -231,6 +233,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_AVX5124FMAPS_UNSET OPTION_MASK_ISA2_AVX5124FMAPS
|
||||
#define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW
|
||||
#define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2
|
||||
#define OPTION_MASK_ISA_AVX512FP16_UNSET OPTION_MASK_ISA_AVX512BW_UNSET
|
||||
#define OPTION_MASK_ISA2_AVX512FP16_UNSET OPTION_MASK_ISA2_AVX512FP16
|
||||
#define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI
|
||||
#define OPTION_MASK_ISA2_AVXVNNI_UNSET OPTION_MASK_ISA2_AVXVNNI
|
||||
#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ
|
||||
@ -313,7 +317,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
(OPTION_MASK_ISA2_AVX512BF16_UNSET \
|
||||
| OPTION_MASK_ISA2_AVX5124FMAPS_UNSET \
|
||||
| OPTION_MASK_ISA2_AVX5124VNNIW_UNSET \
|
||||
| OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
|
||||
| OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET \
|
||||
| OPTION_MASK_ISA2_AVX512FP16_UNSET)
|
||||
#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
|
||||
(OPTION_MASK_ISA2_AVX512F_UNSET)
|
||||
#define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
|
||||
@ -326,7 +331,9 @@ along with GCC; see the file COPYING3. If not see
|
||||
(OPTION_MASK_ISA2_SSE3_UNSET | OPTION_MASK_ISA2_KL_UNSET)
|
||||
#define OPTION_MASK_ISA2_SSE_UNSET OPTION_MASK_ISA2_SSE2_UNSET
|
||||
|
||||
#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA2_AVX512BF16_UNSET
|
||||
#define OPTION_MASK_ISA2_AVX512BW_UNSET \
|
||||
(OPTION_MASK_ISA2_AVX512BF16_UNSET \
|
||||
| OPTION_MASK_ISA2_AVX512FP16_UNSET)
|
||||
|
||||
/* Set 1 << value as value of -malign-FLAG option. */
|
||||
|
||||
@ -853,6 +860,21 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mavx512fp16:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX512FP16_SET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512FP16_SET;
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512FP16_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512FP16_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512FP16_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512FP16_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mavx512vnni:
|
||||
if (value)
|
||||
{
|
||||
|
@ -228,6 +228,7 @@ enum processor_features
|
||||
FEATURE_AESKLE,
|
||||
FEATURE_WIDEKL,
|
||||
FEATURE_AVXVNNI,
|
||||
FEATURE_AVX512FP16,
|
||||
CPU_FEATURE_MAX
|
||||
};
|
||||
|
||||
|
@ -169,4 +169,5 @@ ISA_NAMES_TABLE_START
|
||||
ISA_NAMES_TABLE_ENTRY("aeskle", FEATURE_AESKLE, P_NONE, NULL)
|
||||
ISA_NAMES_TABLE_ENTRY("widekl", FEATURE_WIDEKL, P_NONE, "-mwidekl")
|
||||
ISA_NAMES_TABLE_ENTRY("avxvnni", FEATURE_AVXVNNI, P_NONE, "-mavxvnni")
|
||||
ISA_NAMES_TABLE_ENTRY("avx512fp16", FEATURE_AVX512FP16, P_NONE, "-mavx512fp16")
|
||||
ISA_NAMES_TABLE_END
|
||||
|
@ -416,7 +416,7 @@ i[34567]86-*-* | x86_64-*-*)
|
||||
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
|
||||
amxbf16intrin.h x86gprintrin.h uintrintrin.h
|
||||
hresetintrin.h keylockerintrin.h avxvnniintrin.h
|
||||
mwaitintrin.h"
|
||||
mwaitintrin.h avx512fp16intrin.h"
|
||||
;;
|
||||
ia64-*-*)
|
||||
extra_headers=ia64intrin.h
|
||||
|
53
gcc/config/i386/avx512fp16intrin.h
Normal file
53
gcc/config/i386/avx512fp16intrin.h
Normal file
@ -0,0 +1,53 @@
|
||||
/* Copyright (C) 2019 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512FP16INTRIN_H_INCLUDED
|
||||
#define __AVX512FP16INTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512FP16__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512fp16")
|
||||
#define __DISABLE_AVX512FP16__
|
||||
#endif /* __AVX512FP16__ */
|
||||
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
|
||||
typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
|
||||
typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
|
||||
|
||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||
vector types, and their scalar components. */
|
||||
typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
|
||||
typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
|
||||
typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
|
||||
#ifdef __DISABLE_AVX512FP16__
|
||||
#undef __DISABLE_AVX512FP16__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512FP16__ */
|
||||
|
||||
#endif /* __AVX512FP16INTRIN_H_INCLUDED */
|
@ -126,6 +126,7 @@
|
||||
#define bit_AVX5124VNNIW (1 << 2)
|
||||
#define bit_AVX5124FMAPS (1 << 3)
|
||||
#define bit_AVX512VP2INTERSECT (1 << 8)
|
||||
#define bit_AVX512FP16 (1 << 23)
|
||||
#define bit_IBT (1 << 20)
|
||||
#define bit_UINTR (1 << 5)
|
||||
#define bit_PCONFIG (1 << 18)
|
||||
|
@ -68,6 +68,7 @@ DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
|
||||
DEF_PRIMITIVE_TYPE (UINT16, short_unsigned_type_node)
|
||||
DEF_PRIMITIVE_TYPE (INT64, long_long_integer_type_node)
|
||||
DEF_PRIMITIVE_TYPE (UINT64, long_long_unsigned_type_node)
|
||||
DEF_PRIMITIVE_TYPE (FLOAT16, ix86_float16_type_node)
|
||||
DEF_PRIMITIVE_TYPE (FLOAT, float_type_node)
|
||||
DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node)
|
||||
DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node)
|
||||
|
@ -125,6 +125,7 @@ BDESC_VERIFYS (IX86_BUILTIN_MAX,
|
||||
/* Table for the ix86 builtin non-function types. */
|
||||
static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
|
||||
|
||||
tree ix86_float16_type_node = NULL_TREE;
|
||||
/* Retrieve an element from the above table, building some of
|
||||
the types lazily. */
|
||||
|
||||
@ -1343,6 +1344,26 @@ ix86_init_builtins_va_builtins_abi (void)
|
||||
BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
|
||||
}
|
||||
|
||||
static void
|
||||
ix86_register_float16_builtin_type (void)
|
||||
{
|
||||
/* Provide the _Float16 type and float16_type_node if needed so that
|
||||
it can be used in AVX512FP16 intrinsics and builtins. */
|
||||
if (!float16_type_node)
|
||||
{
|
||||
ix86_float16_type_node = make_node (REAL_TYPE);
|
||||
TYPE_PRECISION (ix86_float16_type_node) = 16;
|
||||
SET_TYPE_MODE (ix86_float16_type_node, HFmode);
|
||||
layout_type (ix86_float16_type_node);
|
||||
}
|
||||
else
|
||||
ix86_float16_type_node = float16_type_node;
|
||||
|
||||
if (!maybe_get_identifier ("_Float16") && TARGET_SSE2)
|
||||
lang_hooks.types.register_builtin_type (ix86_float16_type_node,
|
||||
"_Float16");
|
||||
}
|
||||
|
||||
static void
|
||||
ix86_init_builtin_types (void)
|
||||
{
|
||||
@ -1371,6 +1392,8 @@ ix86_init_builtin_types (void)
|
||||
it. */
|
||||
lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
|
||||
|
||||
ix86_register_float16_builtin_type ();
|
||||
|
||||
const_string_type_node
|
||||
= build_pointer_type (build_qualified_type
|
||||
(char_type_node, TYPE_QUAL_CONST));
|
||||
|
@ -598,6 +598,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__PTWRITE__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AVX512BF16)
|
||||
def_or_undef (parse_in, "__AVX512BF16__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AVX512FP16)
|
||||
def_or_undef (parse_in, "__AVX512FP16__");
|
||||
if (TARGET_MMX_WITH_SSE)
|
||||
def_or_undef (parse_in, "__MMX_WITH_SSE__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_ENQCMD)
|
||||
|
@ -2351,6 +2351,7 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case E_HFmode:
|
||||
case E_SFmode:
|
||||
case E_DFmode:
|
||||
case E_XFmode:
|
||||
@ -2664,7 +2665,7 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
|
||||
bool unordered_compare = ix86_unordered_fp_compare (code);
|
||||
rtx op0 = *pop0, op1 = *pop1;
|
||||
machine_mode op_mode = GET_MODE (op0);
|
||||
bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
|
||||
bool is_sse = SSE_FLOAT_MODE_SSEMATH_OR_HF_P (op_mode);
|
||||
|
||||
/* All of the unordered compare instructions only work on registers.
|
||||
The same is true of the fcomi compare instructions. The XFmode
|
||||
@ -4149,7 +4150,7 @@ ix86_expand_fp_movcc (rtx operands[])
|
||||
rtx op0 = XEXP (operands[1], 0);
|
||||
rtx op1 = XEXP (operands[1], 1);
|
||||
|
||||
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
|
||||
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
{
|
||||
machine_mode cmode;
|
||||
|
||||
|
@ -108,3 +108,4 @@ DEF_PTA(HRESET)
|
||||
DEF_PTA(KL)
|
||||
DEF_PTA(WIDEKL)
|
||||
DEF_PTA(AVXVNNI)
|
||||
DEF_PTA(AVX512FP16)
|
||||
|
@ -223,7 +223,8 @@ static struct ix86_target_opts isa2_opts[] =
|
||||
{ "-mhreset", OPTION_MASK_ISA2_HRESET },
|
||||
{ "-mkl", OPTION_MASK_ISA2_KL },
|
||||
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
|
||||
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }
|
||||
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
|
||||
{ "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }
|
||||
};
|
||||
static struct ix86_target_opts isa_opts[] =
|
||||
{
|
||||
@ -1049,6 +1050,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
||||
IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16),
|
||||
IX86_ATTR_ISA ("hreset", OPT_mhreset),
|
||||
IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
|
||||
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
|
||||
|
||||
/* enum options */
|
||||
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
|
||||
|
@ -5508,6 +5508,14 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
|
||||
case MODE_SI:
|
||||
return "%vmovd\t{%1, %0|%0, %1}";
|
||||
|
||||
case MODE_HI:
|
||||
if (GENERAL_REG_P (operands[0]))
|
||||
return "vmovw\t{%1, %k0|%k0, %1}";
|
||||
else if (GENERAL_REG_P (operands[1]))
|
||||
return "vmovw\t{%k1, %0|%0, %k1}";
|
||||
else
|
||||
return "vmovw\t{%1, %0|%0, %1}";
|
||||
|
||||
case MODE_DF:
|
||||
if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
|
||||
return "vmovsd\t{%d1, %0|%0, %d1}";
|
||||
@ -5520,6 +5528,12 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
|
||||
else
|
||||
return "%vmovss\t{%1, %0|%0, %1}";
|
||||
|
||||
case MODE_HF:
|
||||
if (REG_P (operands[0]) && REG_P (operands[1]))
|
||||
return "vmovsh\t{%d1, %0|%0, %d1}";
|
||||
else
|
||||
return "vmovsh\t{%1, %0|%0, %1}";
|
||||
|
||||
case MODE_V1DF:
|
||||
gcc_assert (!TARGET_AVX);
|
||||
return "movlpd\t{%1, %0|%0, %1}";
|
||||
@ -13999,7 +14013,7 @@ output_387_binary_op (rtx_insn *insn, rtx *operands)
|
||||
|
||||
if (is_sse)
|
||||
{
|
||||
p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
|
||||
p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
|
||||
strcat (buf, p);
|
||||
|
||||
if (TARGET_AVX)
|
||||
@ -19311,10 +19325,19 @@ inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
|
||||
if (!TARGET_SSE2)
|
||||
return true;
|
||||
|
||||
if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
|
||||
return true;
|
||||
|
||||
int msize = GET_MODE_SIZE (mode);
|
||||
|
||||
/* Between SSE and general, we have moves no larger than word size. */
|
||||
if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
|
||||
|| GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)
|
||||
|| GET_MODE_SIZE (mode) > UNITS_PER_WORD)
|
||||
if (msize > UNITS_PER_WORD)
|
||||
return true;
|
||||
|
||||
/* In addition to SImode moves, AVX512FP16 also enables HImode moves. */
|
||||
int minsize = GET_MODE_SIZE (TARGET_AVX512FP16 ? HImode : SImode);
|
||||
|
||||
if (msize < minsize)
|
||||
return true;
|
||||
|
||||
/* If the target says that inter-unit moves are more expensive
|
||||
@ -19408,21 +19431,27 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
|
||||
static inline int
|
||||
sse_store_index (machine_mode mode)
|
||||
{
|
||||
switch (GET_MODE_SIZE (mode))
|
||||
{
|
||||
case 4:
|
||||
return 0;
|
||||
case 8:
|
||||
return 1;
|
||||
case 16:
|
||||
return 2;
|
||||
case 32:
|
||||
return 3;
|
||||
case 64:
|
||||
return 4;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
/* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
|
||||
costs to processor_costs, which requires changes to all entries in
|
||||
processor cost table. */
|
||||
if (mode == E_HFmode)
|
||||
mode = E_SFmode;
|
||||
|
||||
switch (GET_MODE_SIZE (mode))
|
||||
{
|
||||
case 4:
|
||||
return 0;
|
||||
case 8:
|
||||
return 1;
|
||||
case 16:
|
||||
return 2;
|
||||
case 32:
|
||||
return 3;
|
||||
case 64:
|
||||
return 4;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return the cost of moving data of mode M between a
|
||||
@ -19444,11 +19473,13 @@ static inline int
|
||||
inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
|
||||
{
|
||||
int cost;
|
||||
|
||||
if (FLOAT_CLASS_P (regclass))
|
||||
{
|
||||
int index;
|
||||
switch (mode)
|
||||
{
|
||||
case E_HFmode:
|
||||
case E_SFmode:
|
||||
index = 0;
|
||||
break;
|
||||
@ -19549,11 +19580,32 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (in == 2)
|
||||
return MAX (ix86_cost->hard_register.int_load[1],
|
||||
ix86_cost->hard_register.int_store[1]);
|
||||
return in ? ix86_cost->hard_register.int_load[1]
|
||||
: ix86_cost->hard_register.int_store[1];
|
||||
{
|
||||
int cost;
|
||||
if (in == 2)
|
||||
cost = MAX (ix86_cost->hard_register.int_load[1],
|
||||
ix86_cost->hard_register.int_store[1]);
|
||||
else
|
||||
cost = in ? ix86_cost->hard_register.int_load[1]
|
||||
: ix86_cost->hard_register.int_store[1];
|
||||
|
||||
if (mode == E_HFmode)
|
||||
{
|
||||
/* Prefer SSE over GPR for HFmode. */
|
||||
int sse_cost;
|
||||
int index = sse_store_index (mode);
|
||||
if (in == 2)
|
||||
sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
|
||||
ix86_cost->hard_register.sse_store[index]);
|
||||
else
|
||||
sse_cost = (in
|
||||
? ix86_cost->hard_register.sse_load [index]
|
||||
: ix86_cost->hard_register.sse_store [index]);
|
||||
if (sse_cost >= cost)
|
||||
cost = sse_cost + 1;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
default:
|
||||
if (in == 2)
|
||||
cost = MAX (ix86_cost->hard_register.int_load[2],
|
||||
@ -19727,6 +19779,8 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|
||||
- XI mode
|
||||
- any of 512-bit wide vector mode
|
||||
- any scalar mode. */
|
||||
/* For AVX512FP16, vmovw supports movement of HImode
|
||||
between gpr and sse registser. */
|
||||
if (TARGET_AVX512F
|
||||
&& (mode == XImode
|
||||
|| VALID_AVX512F_REG_MODE (mode)
|
||||
@ -20048,7 +20102,7 @@ ix86_multiplication_cost (const struct processor_costs *cost,
|
||||
if (VECTOR_MODE_P (mode))
|
||||
inner_mode = GET_MODE_INNER (mode);
|
||||
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
return inner_mode == DFmode ? cost->mulsd : cost->mulss;
|
||||
else if (X87_FLOAT_MODE_P (mode))
|
||||
return cost->fmul;
|
||||
@ -20100,7 +20154,7 @@ ix86_division_cost (const struct processor_costs *cost,
|
||||
if (VECTOR_MODE_P (mode))
|
||||
inner_mode = GET_MODE_INNER (mode);
|
||||
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
return inner_mode == DFmode ? cost->divsd : cost->divss;
|
||||
else if (X87_FLOAT_MODE_P (mode))
|
||||
return cost->fdiv;
|
||||
@ -20518,7 +20572,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
{
|
||||
*total = cost->addss;
|
||||
return false;
|
||||
@ -20557,7 +20611,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
/* FALLTHRU */
|
||||
|
||||
case NEG:
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
{
|
||||
*total = cost->sse_op;
|
||||
return false;
|
||||
@ -20639,14 +20693,14 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
return false;
|
||||
|
||||
case FLOAT_EXTEND:
|
||||
if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
|
||||
if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
*total = 0;
|
||||
else
|
||||
*total = ix86_vec_cost (mode, cost->addss);
|
||||
return false;
|
||||
|
||||
case FLOAT_TRUNCATE:
|
||||
if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
|
||||
if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
*total = cost->fadd;
|
||||
else
|
||||
*total = ix86_vec_cost (mode, cost->addss);
|
||||
@ -20656,7 +20710,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
/* SSE requires memory load for the constant operand. It may make
|
||||
sense to account for this. Of course the constant operand may or
|
||||
may not be reused. */
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
*total = cost->sse_op;
|
||||
else if (X87_FLOAT_MODE_P (mode))
|
||||
*total = cost->fabs;
|
||||
@ -20665,7 +20719,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
return false;
|
||||
|
||||
case SQRT:
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
*total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
|
||||
else if (X87_FLOAT_MODE_P (mode))
|
||||
*total = cost->fsqrt;
|
||||
@ -22154,6 +22208,10 @@ ix86_mangle_type (const_tree type)
|
||||
|
||||
switch (TYPE_MODE (type))
|
||||
{
|
||||
case E_HFmode:
|
||||
/* _Float16 is "DF16_".
|
||||
Align with clang's decision in https://reviews.llvm.org/D33719. */
|
||||
return "DF16_";
|
||||
case E_TFmode:
|
||||
/* __float128 is "g". */
|
||||
return "g";
|
||||
@ -22777,7 +22835,7 @@ ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
|
||||
case MINUS_EXPR:
|
||||
if (kind == scalar_stmt)
|
||||
{
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
stmt_cost = ix86_cost->addss;
|
||||
else if (X87_FLOAT_MODE_P (mode))
|
||||
stmt_cost = ix86_cost->fadd;
|
||||
@ -22803,7 +22861,7 @@ ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
|
||||
break;
|
||||
|
||||
case NEGATE_EXPR:
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
stmt_cost = ix86_cost->sse_op;
|
||||
else if (X87_FLOAT_MODE_P (mode))
|
||||
stmt_cost = ix86_cost->fchs;
|
||||
@ -22859,7 +22917,7 @@ ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
|
||||
case BIT_XOR_EXPR:
|
||||
case BIT_AND_EXPR:
|
||||
case BIT_NOT_EXPR:
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
|
||||
stmt_cost = ix86_cost->sse_op;
|
||||
else if (VECTOR_MODE_P (mode))
|
||||
stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
|
||||
@ -23574,14 +23632,18 @@ ix86_get_excess_precision (enum excess_precision_type type)
|
||||
/* The fastest type to promote to will always be the native type,
|
||||
whether that occurs with implicit excess precision or
|
||||
otherwise. */
|
||||
return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
|
||||
return TARGET_AVX512FP16
|
||||
? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
|
||||
: FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
|
||||
case EXCESS_PRECISION_TYPE_STANDARD:
|
||||
case EXCESS_PRECISION_TYPE_IMPLICIT:
|
||||
/* Otherwise, the excess precision we want when we are
|
||||
in a standards compliant mode, and the implicit precision we
|
||||
provide would be identical were it not for the unpredictable
|
||||
cases. */
|
||||
if (!TARGET_80387)
|
||||
if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
|
||||
return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
|
||||
else if (!TARGET_80387)
|
||||
return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
|
||||
else if (!TARGET_MIX_SSE_I387)
|
||||
{
|
||||
|
@ -1007,7 +1007,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
|
||||
#define VALID_AVX512F_SCALAR_MODE(MODE) \
|
||||
((MODE) == DImode || (MODE) == DFmode || (MODE) == SImode \
|
||||
|| (MODE) == SFmode)
|
||||
|| (MODE) == SFmode \
|
||||
|| (TARGET_AVX512FP16 && ((MODE) == HImode || (MODE) == HFmode)))
|
||||
|
||||
#define VALID_AVX512F_REG_MODE(MODE) \
|
||||
((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \
|
||||
@ -1046,7 +1047,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
|
||||
#define VALID_FP_MODE_P(MODE) \
|
||||
((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \
|
||||
|| (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) \
|
||||
|| (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode)
|
||||
|
||||
#define VALID_INT_MODE_P(MODE) \
|
||||
((MODE) == QImode || (MODE) == HImode \
|
||||
@ -1079,6 +1080,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
#define SSE_FLOAT_MODE_P(MODE) \
|
||||
((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
|
||||
|
||||
#define SSE_FLOAT_MODE_SSEMATH_OR_HF_P(MODE) \
|
||||
((SSE_FLOAT_MODE_P (MODE) && TARGET_SSE_MATH) \
|
||||
|| (TARGET_AVX512FP16 && (MODE) == HFmode))
|
||||
|
||||
#define FMA4_VEC_FLOAT_MODE_P(MODE) \
|
||||
(TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \
|
||||
|| (MODE) == V8SFmode || (MODE) == V4DFmode))
|
||||
@ -2295,7 +2300,7 @@ constexpr wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI
|
||||
constexpr wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI
|
||||
| PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE
|
||||
| PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE
|
||||
| PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI;
|
||||
| PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16;
|
||||
constexpr wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF
|
||||
| PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1;
|
||||
constexpr wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
|
||||
|
@ -499,7 +499,7 @@
|
||||
|
||||
;; Main data type used by the insn
|
||||
(define_attr "mode"
|
||||
"unknown,none,QI,HI,SI,DI,TI,OI,XI,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF,
|
||||
"unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF,
|
||||
V2DF,V2SF,V1DF,V8DF"
|
||||
(const_string "unknown"))
|
||||
|
||||
@ -835,8 +835,7 @@
|
||||
sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
|
||||
avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
|
||||
avx512bw,noavx512bw,avx512dq,noavx512dq,
|
||||
avx512vl,noavx512vl,
|
||||
avxvnni,avx512vnnivl"
|
||||
avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16"
|
||||
(const_string "base"))
|
||||
|
||||
;; Define instruction set of MMX instructions
|
||||
@ -888,6 +887,8 @@
|
||||
(eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI")
|
||||
(eq_attr "isa" "avx512vnnivl")
|
||||
(symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL")
|
||||
(eq_attr "isa" "avx512fp16")
|
||||
(symbol_ref "TARGET_AVX512FP16")
|
||||
|
||||
(eq_attr "mmx_isa" "native")
|
||||
(symbol_ref "!TARGET_MMX_WITH_SSE")
|
||||
@ -909,6 +910,7 @@
|
||||
(set_attr "type" "multi")])
|
||||
|
||||
(define_code_iterator plusminus [plus minus])
|
||||
(define_code_iterator plusminusmultdiv [plus minus mult div])
|
||||
|
||||
(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
|
||||
|
||||
@ -924,7 +926,8 @@
|
||||
|
||||
;; Mark commutative operators as such in constraints.
|
||||
(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
|
||||
(minus "") (ss_minus "") (us_minus "")])
|
||||
(minus "") (ss_minus "") (us_minus "")
|
||||
(mult "%") (div "")])
|
||||
|
||||
;; Mapping of max and min
|
||||
(define_code_iterator maxmin [smax smin umax umin])
|
||||
@ -1024,7 +1027,8 @@
|
||||
(minus "sub") (ss_minus "sssub") (us_minus "ussub")
|
||||
(sign_extend "extend") (zero_extend "zero_extend")
|
||||
(ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")
|
||||
(rotate "rotl") (rotatert "rotr")])
|
||||
(rotate "rotl") (rotatert "rotr")
|
||||
(mult "mul") (div "div")])
|
||||
|
||||
;; All integer modes.
|
||||
(define_mode_iterator SWI1248x [QI HI SI DI])
|
||||
@ -1092,8 +1096,9 @@
|
||||
;; compile time constant, it is faster to use <MODE_SIZE> than
|
||||
;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on
|
||||
;; command line options just use GET_MODE_SIZE macro.
|
||||
(define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") (TI "16")
|
||||
(SF "4") (DF "8") (XF "GET_MODE_SIZE (XFmode)")
|
||||
(define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
|
||||
(TI "16") (HF "2") (SF "4") (DF "8")
|
||||
(XF "GET_MODE_SIZE (XFmode)")
|
||||
(V16QI "16") (V32QI "32") (V64QI "64")
|
||||
(V8HI "16") (V16HI "32") (V32HI "64")
|
||||
(V4SI "16") (V8SI "32") (V16SI "64")
|
||||
@ -1226,8 +1231,8 @@
|
||||
;; All x87 floating point modes
|
||||
(define_mode_iterator X87MODEF [SF DF XF])
|
||||
|
||||
;; All x87 floating point modes plus HF
|
||||
(define_mode_iterator X87MODEFH [SF DF XF HF])
|
||||
;; All x87 floating point modes plus HFmode
|
||||
(define_mode_iterator X87MODEFH [HF SF DF XF])
|
||||
|
||||
;; All SSE floating point modes
|
||||
(define_mode_iterator SSEMODEF [SF DF TF])
|
||||
@ -1235,7 +1240,7 @@
|
||||
|
||||
;; SSE instruction suffix for various modes
|
||||
(define_mode_attr ssemodesuffix
|
||||
[(SF "ss") (DF "sd")
|
||||
[(HF "sh") (SF "ss") (DF "sd")
|
||||
(V16SF "ps") (V8DF "pd")
|
||||
(V8SF "ps") (V4DF "pd")
|
||||
(V4SF "ps") (V2DF "pd")
|
||||
@ -1500,6 +1505,23 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "cbranchhf4"
|
||||
[(set (reg:CC FLAGS_REG)
|
||||
(compare:CC (match_operand:HF 1 "cmp_fp_expander_operand")
|
||||
(match_operand:HF 2 "cmp_fp_expander_operand")))
|
||||
(set (pc) (if_then_else
|
||||
(match_operator 0 "ix86_fp_comparison_operator"
|
||||
[(reg:CC FLAGS_REG)
|
||||
(const_int 0)])
|
||||
(label_ref (match_operand 3))
|
||||
(pc)))]
|
||||
"TARGET_AVX512FP16"
|
||||
{
|
||||
ix86_expand_branch (GET_CODE (operands[0]),
|
||||
operands[1], operands[2], operands[3]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "cbranch<mode>4"
|
||||
[(set (reg:CC FLAGS_REG)
|
||||
(compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand")
|
||||
@ -1709,6 +1731,17 @@
|
||||
(eq_attr "alternative" "0")
|
||||
(symbol_ref "true")
|
||||
(symbol_ref "false"))))])
|
||||
|
||||
(define_insn "*cmpi<unord>hf"
|
||||
[(set (reg:CCFP FLAGS_REG)
|
||||
(compare:CCFP
|
||||
(match_operand:HF 0 "register_operand" "v")
|
||||
(match_operand:HF 1 "nonimmediate_operand" "vm")))]
|
||||
"TARGET_AVX512FP16"
|
||||
"v<unord>comish\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecomi")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "HF")])
|
||||
|
||||
;; Push/pop instructions.
|
||||
|
||||
@ -2440,8 +2473,8 @@
|
||||
(symbol_ref "true")))])
|
||||
|
||||
(define_insn "*movhi_internal"
|
||||
[(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k")
|
||||
(match_operand:HI 1 "general_operand" "r ,rn,rm,rn,*r,*km,*k,*k,CBC"))]
|
||||
[(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k,?r,?v,*v,*v,*m")
|
||||
(match_operand:HI 1 "general_operand" "r ,rn,rm,rn,*r,*km,*k,*k,CBC,v, r, v, m, v"))]
|
||||
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
|
||||
&& ix86_hardreg_mov_ok (operands[0], operands[1])"
|
||||
|
||||
@ -2467,6 +2500,9 @@
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
case TYPE_SSEMOV:
|
||||
return ix86_output_ssemov (insn, operands);
|
||||
|
||||
case TYPE_MSKLOG:
|
||||
if (operands[1] == const0_rtx)
|
||||
return "kxorw\t%0, %0, %0";
|
||||
@ -2481,8 +2517,15 @@
|
||||
return "mov{w}\t{%1, %0|%0, %1}";
|
||||
}
|
||||
}
|
||||
[(set (attr "type")
|
||||
(cond [(eq_attr "alternative" "4,5,6,7")
|
||||
[(set (attr "isa")
|
||||
(cond [(eq_attr "alternative" "9,10,11,12,13")
|
||||
(const_string "avx512fp16")
|
||||
]
|
||||
(const_string "*")))
|
||||
(set (attr "type")
|
||||
(cond [(eq_attr "alternative" "9,10,11,12,13")
|
||||
(const_string "ssemov")
|
||||
(eq_attr "alternative" "4,5,6,7")
|
||||
(const_string "mskmov")
|
||||
(eq_attr "alternative" "8")
|
||||
(const_string "msklog")
|
||||
@ -2507,6 +2550,8 @@
|
||||
(set (attr "mode")
|
||||
(cond [(eq_attr "type" "imovx")
|
||||
(const_string "SI")
|
||||
(eq_attr "alternative" "11")
|
||||
(const_string "HF")
|
||||
(and (eq_attr "alternative" "1,2")
|
||||
(match_operand:HI 1 "aligned_operand"))
|
||||
(const_string "SI")
|
||||
@ -3731,7 +3776,10 @@
|
||||
(eq_attr "alternative" "2")
|
||||
(const_string "sselog1")
|
||||
(eq_attr "alternative" "4,5,6,7")
|
||||
(const_string "sselog")
|
||||
(if_then_else
|
||||
(match_test ("TARGET_AVX512FP16"))
|
||||
(const_string "ssemov")
|
||||
(const_string "sselog"))
|
||||
]
|
||||
(const_string "ssemov")))
|
||||
(set (attr "memory")
|
||||
@ -3754,9 +3802,15 @@
|
||||
(eq_attr "alternative" "2")
|
||||
(const_string "V4SF")
|
||||
(eq_attr "alternative" "4,5,6,7")
|
||||
(const_string "TI")
|
||||
(if_then_else
|
||||
(match_test "TARGET_AVX512FP16")
|
||||
(const_string "HI")
|
||||
(const_string "TI"))
|
||||
(eq_attr "alternative" "3")
|
||||
(const_string "SF")
|
||||
(if_then_else
|
||||
(match_test "TARGET_AVX512FP16")
|
||||
(const_string "HF")
|
||||
(const_string "SF"))
|
||||
]
|
||||
(const_string "*")))])
|
||||
|
||||
@ -4497,6 +4551,17 @@
|
||||
emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
|
||||
})
|
||||
|
||||
(define_insn "extendhf<mode>2"
|
||||
[(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
|
||||
(float_extend:MODEF
|
||||
(match_operand:HF 1 "nonimmediate_operand" "vm")))]
|
||||
"TARGET_AVX512FP16"
|
||||
"vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
|
||||
(define_expand "extend<mode>xf2"
|
||||
[(set (match_operand:XF 0 "nonimmediate_operand")
|
||||
(float_extend:XF (match_operand:MODEF 1 "general_operand")))]
|
||||
@ -4674,6 +4739,18 @@
|
||||
(symbol_ref "flag_unsafe_math_optimizations")
|
||||
]
|
||||
(symbol_ref "true")))])
|
||||
|
||||
;; Conversion from {SF,DF}mode to HFmode.
|
||||
|
||||
(define_insn "trunc<mode>hf2"
|
||||
[(set (match_operand:HF 0 "register_operand" "=v")
|
||||
(float_truncate:HF
|
||||
(match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
|
||||
"TARGET_AVX512FP16"
|
||||
"vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "HF")])
|
||||
|
||||
;; Signed conversion to DImode.
|
||||
|
||||
@ -5050,6 +5127,16 @@
|
||||
(symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")]
|
||||
(symbol_ref "true")))])
|
||||
|
||||
(define_insn "float<floatunssuffix><mode>hf2"
|
||||
[(set (match_operand:HF 0 "register_operand" "=v")
|
||||
(any_float:HF
|
||||
(match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
|
||||
"TARGET_AVX512FP16"
|
||||
"vcvt<floatsuffix>si2sh<rex64suffix>\t{%1, %d0|%d0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "HF")])
|
||||
|
||||
(define_insn "*floatdi<MODEF:mode>2_i387"
|
||||
[(set (match_operand:MODEF 0 "register_operand" "=f")
|
||||
(float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))]
|
||||
@ -7653,6 +7740,13 @@
|
||||
(match_operand:XF 2 "register_operand")))]
|
||||
"TARGET_80387")
|
||||
|
||||
(define_expand "<insn>hf3"
|
||||
[(set (match_operand:HF 0 "register_operand")
|
||||
(plusminus:HF
|
||||
(match_operand:HF 1 "register_operand")
|
||||
(match_operand:HF 2 "nonimmediate_operand")))]
|
||||
"TARGET_AVX512FP16")
|
||||
|
||||
(define_expand "<insn><mode>3"
|
||||
[(set (match_operand:MODEF 0 "register_operand")
|
||||
(plusminus:MODEF
|
||||
@ -8230,6 +8324,12 @@
|
||||
(match_operand:XF 2 "register_operand")))]
|
||||
"TARGET_80387")
|
||||
|
||||
(define_expand "mulhf3"
|
||||
[(set (match_operand:HF 0 "register_operand")
|
||||
(mult:HF (match_operand:HF 1 "register_operand")
|
||||
(match_operand:HF 2 "nonimmediate_operand")))]
|
||||
"TARGET_AVX512FP16")
|
||||
|
||||
(define_expand "mul<mode>3"
|
||||
[(set (match_operand:MODEF 0 "register_operand")
|
||||
(mult:MODEF (match_operand:MODEF 1 "register_operand")
|
||||
@ -8247,6 +8347,12 @@
|
||||
(match_operand:XF 2 "register_operand")))]
|
||||
"TARGET_80387")
|
||||
|
||||
(define_expand "divhf3"
|
||||
[(set (match_operand:HF 0 "register_operand")
|
||||
(div:HF (match_operand:HF 1 "register_operand")
|
||||
(match_operand:HF 2 "nonimmediate_operand")))]
|
||||
"TARGET_AVX512FP16")
|
||||
|
||||
(define_expand "div<mode>3"
|
||||
[(set (match_operand:MODEF 0 "register_operand")
|
||||
(div:MODEF (match_operand:MODEF 1 "register_operand")
|
||||
@ -16667,6 +16773,17 @@
|
||||
(symbol_ref "true")
|
||||
(symbol_ref "false"))))])
|
||||
|
||||
(define_insn "*<insn>hf"
|
||||
[(set (match_operand:HF 0 "register_operand" "=v")
|
||||
(plusminusmultdiv:HF
|
||||
(match_operand:HF 1 "nonimmediate_operand" "<comm>v")
|
||||
(match_operand:HF 2 "nonimmediate_operand" "vm")))]
|
||||
"TARGET_AVX512FP16
|
||||
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
|
||||
"v<insn>sh\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "HF")])
|
||||
|
||||
(define_insn "*rcpsf2_sse"
|
||||
[(set (match_operand:SF 0 "register_operand" "=x,x,x")
|
||||
(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
|
||||
@ -19619,6 +19736,15 @@
|
||||
operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
|
||||
})
|
||||
|
||||
(define_expand "movhfcc"
|
||||
[(set (match_operand:HF 0 "register_operand")
|
||||
(if_then_else:HF
|
||||
(match_operand 1 "comparison_operator")
|
||||
(match_operand:HF 2 "register_operand")
|
||||
(match_operand:HF 3 "register_operand")))]
|
||||
"TARGET_AVX512FP16"
|
||||
"if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
|
||||
|
||||
(define_expand "mov<mode>cc"
|
||||
[(set (match_operand:X87MODEF 0 "register_operand")
|
||||
(if_then_else:X87MODEF
|
||||
@ -19785,6 +19911,18 @@
|
||||
;; Their operands are not commutative, and thus they may be used in the
|
||||
;; presence of -0.0 and NaN.
|
||||
|
||||
(define_insn "*ieee_s<ieee_maxmin>hf3"
|
||||
[(set (match_operand:HF 0 "register_operand" "=v")
|
||||
(unspec:HF
|
||||
[(match_operand:HF 1 "register_operand" "v")
|
||||
(match_operand:HF 2 "nonimmediate_operand" "vm")]
|
||||
IEEE_MAXMIN))]
|
||||
"TARGET_AVX512FP16"
|
||||
"v<ieee_maxmin>sh\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "prefix" "evex")
|
||||
(set_attr "type" "sseadd")
|
||||
(set_attr "mode" "HF")])
|
||||
|
||||
(define_insn "*ieee_s<ieee_maxmin><mode>3"
|
||||
[(set (match_operand:MODEF 0 "register_operand" "=x,v")
|
||||
(unspec:MODEF
|
||||
|
@ -1166,3 +1166,7 @@ Emit GNU_PROPERTY_X86_ISA_1_NEEDED GNU property.
|
||||
mmwait
|
||||
Target Mask(ISA2_MWAIT) Var(ix86_isa_flags2) Save
|
||||
Support MWAIT and MONITOR built-in functions and code generation.
|
||||
|
||||
mavx512fp16
|
||||
Target Mask(ISA2_AVX512FP16) Var(ix86_isa_flags2) Save
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX512FP16 built-in functions and code generation.
|
||||
|
@ -94,6 +94,10 @@
|
||||
|
||||
#include <avx512vp2intersectvlintrin.h>
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <avx512fp16intrin.h>
|
||||
#endif
|
||||
|
||||
#include <shaintrin.h>
|
||||
|
||||
#include <fmaintrin.h>
|
||||
|
@ -1163,6 +1163,14 @@ as 32-bit precision. This may lead to inconsistent behavior between software
|
||||
emulation and AVX512-FP16 instructions. Using @option{-fexcess-precision=16}
|
||||
will force round back after each operation.
|
||||
|
||||
Using @option{-mavx512fp16} will generate AVX512-FP16 instructions instead of
|
||||
software emulation. The default behavior of @code{FLT_EVAL_METHOD} is to round
|
||||
after each operation. The same is true with @option{-fexcess-precision=standard}
|
||||
and @option{-mfpmath=sse}. If there is no @option{-mfpmath=sse},
|
||||
@option{-fexcess-precision=standard} alone does the same thing as before,
|
||||
It is useful for code that does not have @code{_Float16} and runs on the x87
|
||||
FPU.
|
||||
|
||||
@node Decimal Float
|
||||
@section Decimal Floating Types
|
||||
@cindex decimal floating types
|
||||
|
@ -1395,6 +1395,7 @@ See RS/6000 and PowerPC Options.
|
||||
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
|
||||
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
|
||||
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
|
||||
-mavx512fp16 @gol
|
||||
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
|
||||
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
|
||||
-mkl -mwidekl @gol
|
||||
@ -31289,6 +31290,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@itemx -mavx512bf16
|
||||
@opindex mavx512bf16
|
||||
@need 200
|
||||
@itemx -mavx512fp16
|
||||
@opindex mavx512fp16
|
||||
@need 200
|
||||
@itemx -mgfni
|
||||
@opindex mgfni
|
||||
@need 200
|
||||
@ -31367,9 +31371,9 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP,
|
||||
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
|
||||
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
|
||||
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
|
||||
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI or CLDEMOTE
|
||||
extended instruction sets. Each has a corresponding @option{-mno-} option to
|
||||
disable use of these instructions.
|
||||
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16
|
||||
or CLDEMOTE extended instruction sets. Each has a corresponding
|
||||
@option{-mno-} option to disable use of these instructions.
|
||||
|
||||
These extensions are also available as built-in functions: see
|
||||
@ref{x86 Built-in Functions}, for details of the functions enabled and
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
|
8
gcc/testsuite/g++.target/i386/float16-1.C
Normal file
8
gcc/testsuite/g++.target/i386/float16-1.C
Normal file
@ -0,0 +1,8 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mno-sse2" } */
|
||||
|
||||
_Float16/* { dg-error "does not name a type" } */
|
||||
foo (_Float16 x)
|
||||
{
|
||||
return x;
|
||||
}
|
14
gcc/testsuite/g++.target/i386/float16-2.C
Normal file
14
gcc/testsuite/g++.target/i386/float16-2.C
Normal file
@ -0,0 +1,14 @@
|
||||
/* { dg-do assemble { target avx512fp16 } } */
|
||||
/* { dg-options "-O2 -mavx512fp16" } */
|
||||
|
||||
union flt
|
||||
{
|
||||
_Float16 flt;
|
||||
short s;
|
||||
};
|
||||
|
||||
_Float16
|
||||
foo (union flt x)
|
||||
{
|
||||
return x.flt;
|
||||
}
|
10
gcc/testsuite/g++.target/i386/float16-3.C
Normal file
10
gcc/testsuite/g++.target/i386/float16-3.C
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-do assemble { target avx512fp16 } } */
|
||||
/* { dg-options "-O0 -mavx512fp16" } */
|
||||
|
||||
template <typename> void a(char *) {}
|
||||
char b, d;
|
||||
void c()
|
||||
{
|
||||
a<unsigned char>(&d);
|
||||
a<_Float16>(&b);
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw" } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw" } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
@ -87,6 +87,9 @@ main ()
|
||||
#ifdef AVX512VNNI
|
||||
&& (ecx & bit_AVX512VNNI)
|
||||
#endif
|
||||
#ifdef AVX512FP16
|
||||
&& (edx & bit_AVX512FP16)
|
||||
#endif
|
||||
#ifdef VAES
|
||||
&& (ecx & bit_VAES)
|
||||
#endif
|
||||
|
21
gcc/testsuite/gcc.target/i386/avx512fp16-12a.c
Normal file
21
gcc/testsuite/gcc.target/i386/avx512fp16-12a.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx512fp16" } */
|
||||
|
||||
_Float16
|
||||
__attribute__ ((noinline, noclone))
|
||||
do_max (_Float16 __A, _Float16 __B)
|
||||
{
|
||||
return __A > __B ? __A : __B;
|
||||
}
|
||||
|
||||
_Float16
|
||||
__attribute__ ((noinline, noclone))
|
||||
do_min (_Float16 __A, _Float16 __B)
|
||||
{
|
||||
return __A < __B ? __A : __B;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vminsh\[ \\t\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */
|
27
gcc/testsuite/gcc.target/i386/avx512fp16-12b.c
Normal file
27
gcc/testsuite/gcc.target/i386/avx512fp16-12b.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* { dg-do run { target avx512fp16 } } */
|
||||
/* { dg-options "-O2 -mavx512fp16" } */
|
||||
|
||||
#include <string.h>
|
||||
|
||||
static void do_test (void);
|
||||
|
||||
#define DO_TEST do_test
|
||||
#define AVX512FP16
|
||||
#include "avx512-check.h"
|
||||
#include "avx512fp16-12a.c"
|
||||
|
||||
static void
|
||||
do_test (void)
|
||||
{
|
||||
_Float16 x = 0.1f;
|
||||
_Float16 y = -3.2f;
|
||||
_Float16 z;
|
||||
|
||||
z = do_max (x, y);
|
||||
if (z != x)
|
||||
abort ();
|
||||
|
||||
z = do_min (x, y);
|
||||
if (z != y)
|
||||
abort ();
|
||||
}
|
10
gcc/testsuite/gcc.target/i386/float16-3a.c
Normal file
10
gcc/testsuite/gcc.target/i386/float16-3a.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx512fp16" } */
|
||||
|
||||
_Float16
|
||||
foo (int x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vcvtsi2shl\[ \t\]+\[^\n\r]*%xmm0" 1 } } */
|
10
gcc/testsuite/gcc.target/i386/float16-3b.c
Normal file
10
gcc/testsuite/gcc.target/i386/float16-3b.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx512fp16" } */
|
||||
|
||||
_Float16
|
||||
foo (unsigned int x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vcvtusi2shl\[ \t\]+\[^\n\r]*%xmm0" 1 } } */
|
10
gcc/testsuite/gcc.target/i386/float16-4a.c
Normal file
10
gcc/testsuite/gcc.target/i386/float16-4a.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mavx512fp16" } */
|
||||
|
||||
_Float16
|
||||
foo (long long x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vcvtsi2shq\[ \t\]+\[^\n\r]*%xmm0" 1 } } */
|
10
gcc/testsuite/gcc.target/i386/float16-4b.c
Normal file
10
gcc/testsuite/gcc.target/i386/float16-4b.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mavx512fp16" } */
|
||||
|
||||
_Float16
|
||||
foo (unsigned long long x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vcvtusi2shq\[ \t\]+\[^\n\r]*%xmm0" 1 } } */
|
@ -79,6 +79,7 @@ extern void test_hreset (void) __attribute__((__target__("hreset")));
|
||||
extern void test_keylocker (void) __attribute__((__target__("kl")));
|
||||
extern void test_widekl (void) __attribute__((__target__("widekl")));
|
||||
extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
|
||||
extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
|
||||
|
||||
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
|
||||
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
|
||||
@ -159,6 +160,7 @@ extern void test_no_hreset (void) __attribute__((__target__("no-hreset")));
|
||||
extern void test_no_keylocker (void) __attribute__((__target__("no-kl")));
|
||||
extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
|
||||
extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
|
||||
extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
|
||||
|
||||
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
|
||||
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
|
||||
|
14
gcc/testsuite/gcc.target/i386/pr54855-12.c
Normal file
14
gcc/testsuite/gcc.target/i386/pr54855-12.c
Normal file
@ -0,0 +1,14 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx512fp16" } */
|
||||
/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */
|
||||
/* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
_Float16
|
||||
foo (_Float16 x, _Float16 y)
|
||||
{
|
||||
x = x > y ? x : y;
|
||||
return x;
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
@ -103,7 +103,7 @@
|
||||
|
||||
|
||||
#ifndef DIFFERENT_PRAGMAS
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni")
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
|
||||
#endif
|
||||
|
||||
/* Following intrinsics require immediate arguments. They
|
||||
@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
|
||||
|
||||
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
|
||||
#ifdef DIFFERENT_PRAGMAS
|
||||
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni")
|
||||
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
test_1 (_cvtss_sh, unsigned short, float, 1)
|
||||
|
@ -708,6 +708,6 @@
|
||||
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||||
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
|
||||
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni")
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
@ -3164,7 +3164,7 @@ proc check_effective_target_has_q_floating_suffix { } {
|
||||
|
||||
proc check_effective_target_float16 {} {
|
||||
return [check_no_compiler_messages_nocache float16 object {
|
||||
_Float16 x;
|
||||
_Float16 foo (_Float16 x) { return x; }
|
||||
} [add_options_for_float16 ""]]
|
||||
}
|
||||
|
||||
@ -8877,6 +8877,17 @@ proc check_prefer_avx128 { } {
|
||||
}
|
||||
|
||||
|
||||
# Return 1 if avx512fp16 instructions can be compiled.
|
||||
|
||||
proc check_effective_target_avx512fp16 { } {
|
||||
return [check_no_compiler_messages avx512fp16 object {
|
||||
void foo (void)
|
||||
{
|
||||
asm volatile ("vmovw %edi, %xmm0");
|
||||
}
|
||||
} "-O2 -mavx512fp16" ]
|
||||
}
|
||||
|
||||
# Return 1 if avx512f instructions can be compiled.
|
||||
|
||||
proc check_effective_target_avx512f { } {
|
||||
|
Loading…
Reference in New Issue
Block a user