Enable support for bfloat16 which will be in Future Cooper Lake.

There are 3 instructions for AVX512BF16: VCVTNE2PS2BF16, VCVTNEPS2BF16 and
DPBF16PS instructions, which are Vector Neural Network Instructions
supporting:
    
- VCVTNE2PS2BF16: Convert Two Packed Single Data to One Packed BF16 Data.
- VCVTNEPS2BF16: Convert Packed Single Data to Packed BF16 Data.
- VDPBF16PS: Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

2019-05-07  Wei Xiao  <wei3.xiao@intel.com>

	* common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BF16_SET
	OPTION_MASK_ISA_AVX512BF16_UNSET, OPTION_MASK_ISA2_AVX512BW_UNSET): New.
	(OPTION_MASK_ISA2_AVX512F_UNSET): Add OPTION_MASK_ISA_AVX512BF16_UNSET.
	(ix86_handle_option): Handle -mavx512bf16.
	* config.gcc: Add avx512bf16vlintrin.h and avx512bf16intrin.h
	to extra_headers.
	* config/i386/avx512bf16vlintrin.h: New.
	* config/i386/avx512bf16intrin.h: New.
	* config/i386/cpuid.h (bit_AVX512BF16): New.
	* config/i386/driver-i386.c (host_detect_local_cpu): Detect BF16.
	* config/i386/i386-builtin-types.def: Add new types.
	* config/i386/i386-builtin.def: Add new builtins.
	* config/i386/i386-c.c (ix86_target_macros_internal): Define
	__AVX512BF16__.
	* config/i386/i386-option.c (ix86_target_string): Add -mavx512bf16.
	(ix86_option_override_internal): Handle BF16.
	(ix86_valid_target_attribute_inner_p): Ditto.
	* config/i386/i386-expand.c (ix86_expand_args_builtin): Ditto.
	* config/i386/i386-builtin.c (enum processor_features): Add
	F_AVX512BF16.
	(static const _isa_names_table isa_names_table): Ditto.
	* config/i386/i386.h (TARGET_AVX512BF16, TARGET_AVX512BF16_P): New.
	(PTA_AVX512BF16): Ditto.
	* config/i386/i386.opt: Add -mavx512bf16.
	* config/i386/immintrin.h: Include avx512bf16intrin.h
	and avx512bf16vlintrin.h.
	* config/i386/sse.md (avx512f_cvtne2ps2bf16_<mode><mask_name>,
	avx512f_cvtneps2bf16_<mode><mask_name>,
	avx512f_dpbf16ps_<mode><mask_half_name>): New define_insn patterns.
	* config/i386/subst.md (mask_half): Add new subst.
	* doc/invoke.texi: Document -mavx512bf16.

2019-05-07  Wei Xiao  <wei3.xiao@intel.com>

	* gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c: New test.
	* gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c: New test.
	* gcc.target/i386/avx512bf16-vdpbf16ps-1.c: New test.
	* gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c: New test.
	* gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: New test.
	* gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c: New test.
	* gcc.target/i386/builtin_target.c: Handle avx512bf16.
	* gcc.target/i386/sse-12.c: Add -mavx512bf16.
	* gcc.target/i386/sse-13.c: Ditto.
	* gcc.target/i386/sse-14.c: Ditto.
	* gcc.target/i386/sse-22.c: Ditto.
	* gcc.target/i386/sse-23.c: Ditto.
	* g++.dg/other/i386-2.C: Ditto.
	* g++.dg/other/i386-3.C: Ditto.

2019-05-07  Hongtao Liu  <hongtao.liu@intel.com>

	* config/i386/cpuinfo.c (get_available_features): Detect BF16.
	* config/i386/cpuinfo.h (enum processor_features): Add
	FEATURE_AVX512BF16.

From-SVN: r271006
This commit is contained in:
Hongtao Liu 2019-05-08 10:21:40 +00:00
parent da2d30c199
commit 4f0e90fae9
38 changed files with 824 additions and 18 deletions

View File

@ -3,6 +3,40 @@
PR tree-optimization/90356
* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.
2019-05-07 Wei Xiao <wei3.xiao@intel.com>
* common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BF16_SET
OPTION_MASK_ISA_AVX512BF16_UNSET, OPTION_MASK_ISA2_AVX512BW_UNSET): New.
(OPTION_MASK_ISA2_AVX512F_UNSET): Add OPTION_MASK_ISA_AVX512BF16_UNSET.
(ix86_handle_option): Handle -mavx512bf16.
* config.gcc: Add avx512bf16vlintrin.h and avx512bf16intrin.h
to extra_headers.
* config/i386/avx512bf16vlintrin.h: New.
* config/i386/avx512bf16intrin.h: New.
* config/i386/cpuid.h (bit_AVX512BF16): New.
* config/i386/driver-i386.c (host_detect_local_cpu): Detect BF16.
* config/i386/i386-builtin-types.def: Add new types.
* config/i386/i386-builtin.def: Add new builtins.
* config/i386/i386-c.c (ix86_target_macros_internal): Define
__AVX512BF16__.
* config/i386/i386-option.c (ix86_target_string): Add -mavx512bf16.
(ix86_option_override_internal): Handle BF16.
(ix86_valid_target_attribute_inner_p): Ditto.
* config/i386/i386-expand.c (ix86_expand_args_builtin): Ditto.
* config/i386/i386-builtin.c (enum processor_features): Add
F_AVX512BF16.
(static const _isa_names_table isa_names_table): Ditto.
* config/i386/i386.h (TARGET_AVX512BF16, TARGET_AVX512BF16_P): New.
(PTA_AVX512BF16): Ditto.
* config/i386/i386.opt: Add -mavx512bf16.
* config/i386/immintrin.h: Include avx512bf16intrin.h
and avx512bf16vlintrin.h.
* config/i386/sse.md (avx512f_cvtne2ps2bf16_<mode><mask_name>,
avx512f_cvtneps2bf16_<mode><mask_name>,
avx512f_dpbf16ps_<mode><mask_half_name>): New define_insn patterns.
* config/i386/subst.md (mask_half): Add new subst.
* doc/invoke.texi: Document -mavx512bf16.
2019-05-07 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000-protos.h (rs6000_legitimize_reload_address_ptr):

View File

@ -88,6 +88,7 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512BITALG_SET \
(OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512BF16_SET OPTION_MASK_ISA_AVX512BF16
#define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
@ -215,6 +216,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI
#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ
#define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG
#define OPTION_MASK_ISA_AVX512BF16_UNSET OPTION_MASK_ISA_AVX512BF16
#define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
@ -276,10 +278,14 @@ along with GCC; see the file COPYING3. If not see
| OPTION_MASK_ISA_SSE_UNSET)
#define OPTION_MASK_ISA2_AVX512F_UNSET \
(OPTION_MASK_ISA_AVX5124FMAPS_UNSET | OPTION_MASK_ISA_AVX5124VNNIW_UNSET)
(OPTION_MASK_ISA_AVX512BF16_UNSET \
| OPTION_MASK_ISA_AVX5124FMAPS_UNSET \
| OPTION_MASK_ISA_AVX5124VNNIW_UNSET)
#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
(OPTION_MASK_ISA2_AVX512F_UNSET)
#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA_AVX512BF16_UNSET
/* Set 1 << value as value of -malign-FLAG option. */
static void
@ -738,6 +744,21 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
case OPT_mavx512bf16:
if (value)
{
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512BF16_SET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512BF16_SET;
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW_SET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512BW_SET;
}
else
{
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512BF16_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512BF16_UNSET;
}
return true;
case OPT_msgx:
if (value)
{
@ -800,6 +821,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512BW_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512BW_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512BW_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512BW_UNSET;
}
return true;

View File

@ -407,7 +407,7 @@ i[34567]86-*-*)
avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h
avx512vpopcntdqvlintrin.h avx512bitalgintrin.h
pconfigintrin.h wbnoinvdintrin.h movdirintrin.h
waitpkgintrin.h cldemoteintrin.h"
waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h avx512bf16intrin.h"
;;
x86_64-*-*)
cpu_type=i386
@ -439,7 +439,7 @@ x86_64-*-*)
avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h
avx512vpopcntdqvlintrin.h avx512bitalgintrin.h
pconfigintrin.h wbnoinvdintrin.h movdirintrin.h
waitpkgintrin.h cldemoteintrin.h"
waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h avx512bf16intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h

View File

@ -0,0 +1,118 @@
/* Copyright (C) 2019 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512BF16INTRIN_H_INCLUDED
#define _AVX512BF16INTRIN_H_INCLUDED
#ifndef __AVX512BF16__
#pragma GCC push_options
#pragma GCC target("avx512bf16")
#define __DISABLE_AVX512BF16__
#endif /* __AVX512BF16__ */
/* Internal data types for implementing the intrinsics. */
typedef short __v32bh __attribute__ ((__vector_size__ (64)));
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
/* vcvtne2ps2bf16 */
extern __inline __m512bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
{
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
}
extern __inline __m512bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
{
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
}
extern __inline __m512bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
{
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
}
/* vcvtneps2bf16 */
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtneps_pbh (__m512 __A)
{
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
}
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
{
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
}
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
{
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
}
/* vdpbf16ps */
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
{
return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
{
return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
{
return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
}
#ifdef __DISABLE_AVX512BF16__
#undef __DISABLE_AVX512BF16__
#pragma GCC pop_options
#endif /* __DISABLE_AVX512BF16__ */
#endif /* _AVX512BF16INTRIN_H_INCLUDED */

View File

@ -0,0 +1,183 @@
/* Copyright (C) 2019 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
#define _AVX512BF16VLINTRIN_H_INCLUDED
#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
#pragma GCC push_options
#pragma GCC target("avx512bf16,avx512vl")
#define __DISABLE_AVX512BF16VL__
#endif /* __AVX512BF16__ */
/* Internal data types for implementing the intrinsics. */
typedef short __v16bh __attribute__ ((__vector_size__ (32)));
typedef short __v8bh __attribute__ ((__vector_size__ (16)));
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
/* vcvtne2ps2bf16 */
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
{
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
}
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
{
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
}
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
{
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
{
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
{
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
{
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
}
/* vcvtneps2bf16 */
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtneps_pbh (__m256 __A)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtneps_pbh (__m128 __A)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
}
/* vdpbf16ps */
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
{
return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
{
return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
{
return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
{
return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
{
return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
{
return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
}
#ifdef __DISABLE_AVX512BF16VL__
#undef __DISABLE_AVX512BF16VL__
#pragma GCC pop_options
#endif /* __DISABLE_AVX512BF16VL__ */
#endif /* _AVX512BF16VLINTRIN_H_INCLUDED */

View File

@ -21,6 +21,9 @@
* <http://www.gnu.org/licenses/>.
*/
/* %eax */
#define bit_AVX512BF16 (1 << 5)
/* %ecx */
#define bit_SSE3 (1 << 0)
#define bit_PCLMUL (1 << 1)

View File

@ -426,6 +426,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_movdiri = 0, has_movdir64b = 0;
unsigned int has_waitpkg = 0;
unsigned int has_cldemote = 0;
unsigned int has_avx512bf16 = 0;
unsigned int has_ptwrite = 0;
@ -533,6 +534,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_shstk = ecx & bit_SHSTK;
has_pconfig = edx & bit_PCONFIG;
has_waitpkg = ecx & bit_WAITPKG;
__cpuid_count (7, 1, eax, ebx, ecx, edx);
has_avx512bf16 = eax & bit_AVX512BF16;
}
if (max_level >= 13)
@ -1143,6 +1147,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
const char *avx512bf16 = has_avx512bf16 ? " -mavx512bf16" : " -mno-avx512bf16";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
@ -1157,7 +1162,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
ptwrite,
ptwrite, avx512bf16,
NULL);
}

View File

@ -1262,3 +1262,29 @@ DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT)
DEF_FUNCTION_TYPE (V4DI, V4DI)
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, UHI)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, UHI)
# BF16 builtins
DEF_FUNCTION_TYPE (V32HI, V16SF, V16SF)
DEF_FUNCTION_TYPE (V32HI, V16SF, V16SF, V32HI, USI)
DEF_FUNCTION_TYPE (V32HI, V16SF, V16SF, USI)
DEF_FUNCTION_TYPE (V16HI, V8SF, V8SF)
DEF_FUNCTION_TYPE (V16HI, V8SF, V8SF, V16HI, UHI)
DEF_FUNCTION_TYPE (V16HI, V8SF, V8SF, UHI)
DEF_FUNCTION_TYPE (V8HI, V4SF, V4SF)
DEF_FUNCTION_TYPE (V8HI, V4SF, V4SF, V8HI, UQI)
DEF_FUNCTION_TYPE (V8HI, V4SF, V4SF, UQI)
DEF_FUNCTION_TYPE (V16HI, V16SF)
DEF_FUNCTION_TYPE (V16HI, V16SF, V16HI, UHI)
DEF_FUNCTION_TYPE (V16HI, V16SF, UHI)
DEF_FUNCTION_TYPE (V8HI, V8SF)
DEF_FUNCTION_TYPE (V8HI, V8SF, V8HI, UQI)
DEF_FUNCTION_TYPE (V8HI, V8SF, UQI)
DEF_FUNCTION_TYPE (V8HI, V4SF)
DEF_FUNCTION_TYPE (V8HI, V4SF, V8HI, UQI)
DEF_FUNCTION_TYPE (V8HI, V4SF, UQI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V32HI, V32HI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V32HI, V32HI, UHI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V16HI, V16HI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V16HI, V16HI, UQI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V8HI, V8HI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V8HI, V8HI, UQI)

View File

@ -2703,6 +2703,35 @@ BDESC (0, OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaes
BDESC (0, OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
BDESC (0, OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
/* BF16 */
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32hi, "__builtin_ia32_cvtne2ps2bf16_v32hi", IX86_BUILTIN_CVTNE2PS2HI16_V32HI, UNKNOWN, (int) V32HI_FTYPE_V16SF_V16SF)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32hi_mask, "__builtin_ia32_cvtne2ps2bf16_v32hi_mask", IX86_BUILTIN_CVTNE2PS2HI16_V32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V16SF_V16SF_V32HI_USI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32hi_maskz, "__builtin_ia32_cvtne2ps2bf16_v32hi_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V16SF_V16SF_USI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16hi, "__builtin_ia32_cvtne2ps2bf16_v16hi", IX86_BUILTIN_CVTNE2PS2HI16_V16HI, UNKNOWN, (int) V16HI_FTYPE_V8SF_V8SF)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16hi_mask, "__builtin_ia32_cvtne2ps2bf16_v16hi_mask", IX86_BUILTIN_CVTNE2PS2HI16_V16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SF_V8SF_V16HI_UHI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16hi_maskz, "__builtin_ia32_cvtne2ps2bf16_v16hi_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V8SF_V8SF_UHI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8hi, "__builtin_ia32_cvtne2ps2bf16_v8hi", IX86_BUILTIN_CVTNE2PS2HI16_V8HI, UNKNOWN, (int) V8HI_FTYPE_V4SF_V4SF)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8hi_mask, "__builtin_ia32_cvtne2ps2bf16_v8hi_mask", IX86_BUILTIN_CVTNE2PS2HI16_V8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_V4SF_V8HI_UQI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8hi_maskz, "__builtin_ia32_cvtne2ps2bf16_v8hi_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V4SF_V4SF_UQI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2HI16_V16SF, UNKNOWN, (int) V16HI_FTYPE_V16SF)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2HI16_V16SF_MASK, UNKNOWN, (int) V16HI_FTYPE_V16SF_V16HI_UHI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V16SF_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16SF_UHI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf, "__builtin_ia32_cvtneps2bf16_v8sf", IX86_BUILTIN_CVTNEPS2HI16_V8SF, UNKNOWN, (int) V8HI_FTYPE_V8SF)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_mask, "__builtin_ia32_cvtneps2bf16_v8sf_mask", IX86_BUILTIN_CVTNEPS2HI16_V8SF_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_V8HI_UQI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_maskz, "__builtin_ia32_cvtneps2bf16_v8sf_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V8SF_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8SF_UQI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf, "__builtin_ia32_cvtneps2bf16_v4sf", IX86_BUILTIN_CVTNEPS2HI16_V4SF, UNKNOWN, (int) V8HI_FTYPE_V4SF)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_mask, "__builtin_ia32_cvtneps2bf16_v4sf_mask", IX86_BUILTIN_CVTNEPS2HI16_V4SF_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_V8HI_UQI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_maskz, "__builtin_ia32_cvtneps2bf16_v4sf_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V4SF_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V4SF_UQI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPHI16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32HI_V32HI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPHI16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32HI_V32HI_UHI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPHI16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32HI_V32HI_UHI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf, "__builtin_ia32_dpbf16ps_v8sf", IX86_BUILTIN_DPHI16PS_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16HI_V16HI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_mask, "__builtin_ia32_dpbf16ps_v8sf_mask", IX86_BUILTIN_DPHI16PS_V8SF_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16HI_V16HI_UQI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_maskz, "__builtin_ia32_dpbf16ps_v8sf_maskz", IX86_BUILTIN_DPHI16PS_V8SF_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16HI_V16HI_UQI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf, "__builtin_ia32_dpbf16ps_v4sf", IX86_BUILTIN_DPHI16PS_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__builtin_ia32_dpbf16ps_v4sf_mask", IX86_BUILTIN_DPHI16PS_V4SF_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)

View File

@ -1920,6 +1920,7 @@ enum processor_features
F_VPCLMULQDQ,
F_AVX512VNNI,
F_AVX512BITALG,
F_AVX512BF16,
F_MAX
};
@ -2064,7 +2065,8 @@ static const _isa_names_table isa_names_table[] =
{"gfni", F_GFNI, P_ZERO},
{"vpclmulqdq", F_VPCLMULQDQ, P_ZERO},
{"avx512vnni", F_AVX512VNNI, P_ZERO},
{"avx512bitalg", F_AVX512BITALG, P_ZERO}
{"avx512bitalg", F_AVX512BITALG, P_ZERO},
{"avx512bf16", F_AVX512BF16, P_ZERO}
};
/* This parses the attribute arguments to target in DECL and determines

View File

@ -548,6 +548,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__CLDEMOTE__");
if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
def_or_undef (parse_in, "__PTWRITE__");
if (isa_flag2 & OPTION_MASK_ISA_AVX512BF16)
def_or_undef (parse_in, "__AVX512BF16__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");

View File

@ -8968,6 +8968,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8DF_FTYPE_V2DF:
case V8DF_FTYPE_V8DF:
case V4DI_FTYPE_V4DI:
case V16HI_FTYPE_V16SF:
case V8HI_FTYPE_V8SF:
case V8HI_FTYPE_V4SF:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
@ -9092,6 +9095,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case USI_FTYPE_USI_USI:
case UDI_FTYPE_UDI_UDI:
case V16SI_FTYPE_V8DF_V8DF:
case V32HI_FTYPE_V16SF_V16SF:
case V16HI_FTYPE_V8SF_V8SF:
case V8HI_FTYPE_V4SF_V4SF:
case V16HI_FTYPE_V16SF_UHI:
case V8HI_FTYPE_V8SF_UQI:
case V8HI_FTYPE_V4SF_UQI:
nargs = 2;
break;
case V2DI_FTYPE_V2DI_INT_CONVERT:
@ -9274,6 +9283,15 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16HI_FTYPE_V16HI_V16HI_V16HI:
case V8SI_FTYPE_V8SI_V8SI_V8SI:
case V8HI_FTYPE_V8HI_V8HI_V8HI:
case V32HI_FTYPE_V16SF_V16SF_USI:
case V16HI_FTYPE_V8SF_V8SF_UHI:
case V8HI_FTYPE_V4SF_V4SF_UQI:
case V16HI_FTYPE_V16SF_V16HI_UHI:
case V8HI_FTYPE_V8SF_V8HI_UQI:
case V8HI_FTYPE_V4SF_V8HI_UQI:
case V16SF_FTYPE_V16SF_V32HI_V32HI:
case V8SF_FTYPE_V8SF_V16HI_V16HI:
case V4SF_FTYPE_V4SF_V8HI_V8HI:
nargs = 3;
break;
case V32QI_FTYPE_V32QI_V32QI_INT:
@ -9413,6 +9431,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
case V32HI_FTYPE_V16SF_V16SF_V32HI_USI:
case V16HI_FTYPE_V8SF_V8SF_V16HI_UHI:
case V8HI_FTYPE_V4SF_V4SF_V8HI_UQI:
nargs = 4;
break;
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
@ -9456,6 +9477,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
break;
case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
case V16SF_FTYPE_V16SF_V32HI_V32HI_UHI:
case V8SF_FTYPE_V8SF_V16HI_V16HI_UQI:
case V4SF_FTYPE_V4SF_V8HI_V8HI_UQI:
nargs = 4;
break;
case UQI_FTYPE_V8DI_V8DI_INT_UQI:

View File

@ -209,7 +209,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
{ "-mmovdir64b", OPTION_MASK_ISA_MOVDIR64B },
{ "-mwaitpkg", OPTION_MASK_ISA_WAITPKG },
{ "-mcldemote", OPTION_MASK_ISA_CLDEMOTE },
{ "-mptwrite", OPTION_MASK_ISA_PTWRITE }
{ "-mptwrite", OPTION_MASK_ISA_PTWRITE },
{ "-mavx512bf16", OPTION_MASK_ISA_AVX512BF16 }
};
static struct ix86_target_opts isa_opts[] =
{
@ -919,6 +920,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg),
IX86_ATTR_ISA ("cldemote", OPT_mcldemote),
IX86_ATTR_ISA ("ptwrite", OPT_mptwrite),
IX86_ATTR_ISA ("avx512bf16", OPT_mavx512bf16),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@ -2034,6 +2036,10 @@ ix86_option_override_internal (bool main_args_p,
&& !(opts->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512VPOPCNTDQ))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
if (((processor_alias_table[i].flags & PTA_AVX512BF16) != 0)
&& !(opts->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_AVX512BF16))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512BF16;
if (((processor_alias_table[i].flags & PTA_SGX) != 0)
&& !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;

View File

@ -193,6 +193,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_CLDEMOTE_P(x) TARGET_ISA_CLDEMOTE_P(x)
#define TARGET_PTWRITE TARGET_ISA_PTWRITE
#define TARGET_PTWRITE_P(x) TARGET_ISA_PTWRITE_P(x)
#define TARGET_AVX512BF16 TARGET_ISA_AVX512BF16
#define TARGET_AVX512BF16_P(x) TARGET_ISA_AVX512BF16_P(x)
#define TARGET_LP64 TARGET_ABI_64
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
@ -2355,6 +2357,7 @@ const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 9);
const wide_int_bitmask PTA_PTWRITE (0, HOST_WIDE_INT_1U << 10);
const wide_int_bitmask PTA_AVX512BF16 (0, HOST_WIDE_INT_1U << 11);
const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
| PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;

View File

@ -1101,3 +1101,8 @@ Enum(instrument_return) String(nop5) Value(instrument_return_nop5)
mrecord-return
Target Report Var(ix86_flag_record_return) Init(0)
Generate a __return_loc section pointing to all return instrumentation code.
mavx512bf16
Target Report Mask(ISA_AVX512BF16) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and
AVX512BF16 built-in functions and code generation.

View File

@ -130,6 +130,10 @@
#include <cldemoteintrin.h>
#include <avx512bf16vlintrin.h>
#include <avx512bf16intrin.h>
#include <rdseedintrin.h>
#include <prfchwintrin.h>

View File

@ -187,6 +187,11 @@
;; For AVX512BITALG support
UNSPEC_VPSHUFBIT
;; For AVX512BF16 support
UNSPEC_VCVTNE2PS2BF16
UNSPEC_VCVTNEPS2BF16
UNSPEC_VDPBF16PS
])
(define_c_enum "unspecv" [
@ -726,6 +731,15 @@
(V16SF "hi") (V8SF "qi") (V4SF "qi")
(V8DF "qi") (V4DF "qi") (V2DF "qi")])
;; Mapping of vector modes to corresponding mask half size
(define_mode_attr avx512fmaskhalfmode
[(V64QI "SI") (V32QI "HI") (V16QI "QI")
(V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
(V16SI "QI") (V8SI "QI") (V4SI "QI")
(V8DI "QI") (V4DI "QI") (V2DI "QI")
(V16SF "QI") (V8SF "QI") (V4SF "QI")
(V8DF "QI") (V4DF "QI") (V2DF "QI")])
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
[(V16SF "V16SI") (V8DF "V8DI")
@ -22184,3 +22198,90 @@
"vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
;; Converting from BF to SF
(define_mode_attr bf16_cvt_2sf
[(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
;; Converting from SF to BF
(define_mode_attr sf_cvt_bf16
[(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
;; Mapping from BF to SF
(define_mode_attr sf_bf16
[(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
(define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
[(match_operand:BF16 0 "register_operand")
(match_operand:<bf16_cvt_2sf> 1 "register_operand")
(match_operand:<bf16_cvt_2sf> 2 "register_operand")
(match_operand:<avx512fmaskmode> 3 "register_operand")]
"TARGET_AVX512BF16"
{
emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
operands[2], CONST0_RTX(<MODE>mode), operands[3]));
DONE;
})
(define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
[(set (match_operand:BF16 0 "register_operand" "=v")
(unspec:BF16
[(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
(match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
UNSPEC_VCVTNE2PS2BF16))]
"TARGET_AVX512BF16"
"vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
(define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
[(match_operand:<sf_cvt_bf16> 0 "register_operand")
(match_operand:VF1_AVX512VL 1 "register_operand")
(match_operand:<avx512fmaskmode> 2 "register_operand")]
"TARGET_AVX512BF16"
{
emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
DONE;
})
(define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
[(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
(unspec:<sf_cvt_bf16>
[(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
UNSPEC_VCVTNEPS2BF16))]
"TARGET_AVX512BF16"
"vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
(define_expand "avx512f_dpbf16ps_<mode>_maskz"
[(match_operand:VF1_AVX512VL 0 "register_operand")
(match_operand:VF1_AVX512VL 1 "register_operand")
(match_operand:<sf_bf16> 2 "register_operand")
(match_operand:<sf_bf16> 3 "register_operand")
(match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
"TARGET_AVX512BF16"
{
emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
DONE;
})
(define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
[(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
(unspec:VF1_AVX512VL
[(match_operand:VF1_AVX512VL 1 "register_operand" "0")
(match_operand:<sf_bf16> 2 "register_operand" "v")
(match_operand:<sf_bf16> 3 "register_operand" "v")]
UNSPEC_VDPBF16PS))]
"TARGET_AVX512BF16"
"vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
(define_insn "avx512f_dpbf16ps_<mode>_mask"
[(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
(vec_merge:VF1_AVX512VL
(unspec:VF1_AVX512VL
[(match_operand:VF1_AVX512VL 1 "register_operand" "0")
(match_operand:<sf_bf16> 2 "register_operand" "v")
(match_operand:<sf_bf16> 3 "register_operand" "v")]
UNSPEC_VDPBF16PS)
(match_dup 1)
(match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512BF16"
"vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")

View File

@ -313,3 +313,16 @@
(const_int 1))
(match_operand:SI 3 "const48_operand")]
UNSPEC_EMBEDDED_ROUNDING))])
(define_subst_attr "maskz_half_name" "maskz_half" "" "_maskz_1")
(define_subst_attr "maskz_half_operand4" "maskz_half" "" "%{%5%}%N4")
(define_subst "maskz_half"
[(set (match_operand:SUBST_V 0)
(match_operand:SUBST_V 1))]
""
[(set (match_dup 0)
(vec_merge:SUBST_V
(match_dup 1)
(match_operand:SUBST_V 2 "const0_operand" "C")
(match_operand:<avx512fmaskhalfmode> 3 "register_operand" "Yk")))])

View File

@ -1274,7 +1274,7 @@ See RS/6000 and PowerPC Options.
-msse4a -m3dnow -m3dnowa -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop @gol
-madx -mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mhle -mlwp @gol
-mmwaitx -mclzero -mpku -mthreads -mgfni -mvaes -mwaitpkg @gol
-mshstk -mmanual-endbr -mforce-indirect-call -mavx512vbmi2 @gol
-mshstk -mmanual-endbr -mforce-indirect-call -mavx512vbmi2 -mavx512bf16 @gol
-mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq @gol
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
-mrdseed -msgx @gol
@ -28041,6 +28041,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -mavx512vbmi2
@opindex mavx512vbmi2
@need 200
@itemx -mavx512bf16
@opindex mavx512bf16
@need 200
@itemx -mgfni
@opindex mgfni
@need 200
@ -28083,7 +28086,7 @@ AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP,
3DNow!@:, enhanced 3DNow!@:, POPCNT, ABM, ADX, BMI, BMI2, LZCNT, FXSR, XSAVE,
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B,
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16
AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, or CLDEMOTE
extended instruction sets. Each has a corresponding @option{-mno-} option to
disable use of these instructions.

View File

@ -12,6 +12,23 @@
* gcc.dg/tree-ssa/pr90356-3.c: New test.
* gcc.dg/tree-ssa/pr90356-4.c: New test.
2019-05-07 Wei Xiao <wei3.xiao@intel.com>
* gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c: New test.
* gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c: New test.
* gcc.target/i386/avx512bf16-vdpbf16ps-1.c: New test.
* gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c: New test.
* gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: New test.
* gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c: New test.
* gcc.target/i386/builtin_target.c: Handle avx512bf16.
* gcc.target/i386/sse-12.c: Add -mavx512bf16.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* g++.dg/other/i386-2.C: Ditto.
* g++.dg/other/i386-3.C: Ditto.
2019-05-07 Cherry Zhang <cherryyz@google.com>
* go.dg/arrayclear.go: New test.

View File

@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,

View File

@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-mavx512bf16 -O2" } */
/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m512bh res;
volatile __m512 x1, x2;
volatile __mmask32 m32;
void extern
avx512bf16_test (void)
{
res = _mm512_cvtne2ps_pbh (x1, x2);
res = _mm512_mask_cvtne2ps_pbh (res, m32, x1, x2);
res = _mm512_maskz_cvtne2ps_pbh (m32, x1, x2);
}

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-mavx512bf16 -O2" } */
/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m256bh res;
volatile __m512 x1;
volatile __mmask16 m16;
void extern
avx512bf16_test (void)
{
res = _mm512_cvtneps_pbh (x1);
res = _mm512_mask_cvtneps_pbh (res, m16, x1);
res = _mm512_maskz_cvtneps_pbh (m16, x1);
}

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-mavx512bf16 -O2" } */
/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m512 res;
volatile __m512bh x1, x2;
volatile __mmask16 m16;
void extern
avx512bf16_test (void)
{
res = _mm512_dpbf16_ps (res, x1, x2);
res = _mm512_mask_dpbf16_ps (res, m16, x1, x2);
res = _mm512_maskz_dpbf16_ps (m16, res, x1, x2);
}

View File

@ -0,0 +1,49 @@
/* { dg-do compile } */
/* { dg-options "-mavx512bf16 -O2" } */
/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
typedef union
{
__m512 x;
float a[16];
} union512s;
float res_ref[16];
union512s res;
__m512bh x1, x2;
__mmask16 m16;
static void __attribute__((noinline, unused))
merge_masking_s (float *arr, unsigned long long mask, int size)
{
int i;
for (i = 0; i < size; i++)
{
arr[i] = (mask & (1LL << i)) ? arr[i] : 117;
}
}
static int __attribute__((noinline, unused))
check_union512s (union512s u, const float *v)
{
int i;
int err = 0;
for (i = 0; i < (sizeof (u.a) / sizeof ((u.a)[0])); i++)
if (u.a[i] != v[i])
{
err++;
;
}
return err;
}
void extern
avx512bf16_test (void)
{
res.x = _mm512_mask_dpbf16_ps (res.x, m16, x1, x2);
merge_masking_s (res_ref, m16, 16);
if (check_union512s (res, res_ref))
abort ();
}

View File

@ -0,0 +1,29 @@
/* { dg-do compile } */
/* { dg-options "-mavx512bf16 -mavx512vl -O2" } */
/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m128bh res1;
volatile __m256bh res2;
volatile __m128 x1, x2;
volatile __m256 x3, x4;
volatile __mmask8 m8;
volatile __mmask16 m16;
void extern
avx512bf16_test (void)
{
res2 = _mm256_cvtne2ps_pbh (x3, x4);
res2 = _mm256_mask_cvtne2ps_pbh (res2, m16, x3, x4);
res2 = _mm256_maskz_cvtne2ps_pbh (m16, x3, x4);
res1 = _mm_cvtne2ps_pbh (x1, x2);
res1 = _mm_mask_cvtne2ps_pbh (res1, m8, x1, x2);
res1 = _mm_maskz_cvtne2ps_pbh (m8, x1, x2);
}

View File

@ -0,0 +1,27 @@
/* { dg-do compile } */
/* { dg-options "-mavx512bf16 -mavx512vl -O2" } */
/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m128bh res1, res2;
volatile __m128 x1;
volatile __m256 x2;
volatile __mmask8 m8;
void extern
avx512bf16_test (void)
{
res2 = _mm256_cvtneps_pbh (x2);
res2 = _mm256_mask_cvtneps_pbh (res2, m8, x2);
res2 = _mm256_maskz_cvtneps_pbh (m8, x2);
res1 = _mm_cvtneps_pbh (x1);
res1 = _mm_mask_cvtneps_pbh (res1, m8, x1);
res1 = _mm_maskz_cvtneps_pbh (m8, x1);
}

View File

@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-options "-mavx512bf16 -mavx512vl -O2" } */
/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m256 res1;
volatile __m256bh x1, x2;
volatile __m128 res2;
volatile __m128bh x3, x4;
volatile __mmask8 m8;
void extern
avx512bf16_test (void)
{
res1 = _mm256_dpbf16_ps (res1, x1, x2);
res1 = _mm256_mask_dpbf16_ps (res1, m8, x1, x2);
res1 = _mm256_maskz_dpbf16_ps (m8, res1, x1, x2);
res2 = _mm_dpbf16_ps (res2, x3, x4);
res2 = _mm_mask_dpbf16_ps (res2, m8, x3, x4);
res2 = _mm_maskz_dpbf16_ps (m8, res2, x3, x4);
}

View File

@ -265,6 +265,10 @@ check_features (unsigned int ecx, unsigned int edx,
assert (__builtin_cpu_supports ("avx5124vnniw"));
if (edx & bit_AVX5124FMAPS)
assert (__builtin_cpu_supports ("avx5124fmaps"));
__cpuid_count (7, 1, eax, ebx, ecx, edx);
if (eax & bit_AVX512BF16)
assert (__builtin_cpu_supports ("avx512bf16"));
}
/* Check cpuid level of extended features. */

View File

@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
#include <x86intrin.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd" } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>

View File

@ -101,7 +101,7 @@
#ifndef DIFFERENT_PRAGMAS
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg")
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16")
#endif
/* Following intrinsics require immediate arguments. They
@ -218,7 +218,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg")
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)

View File

@ -696,6 +696,6 @@
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd")
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16")
#include <x86intrin.h>

View File

@ -1,3 +1,9 @@
2019-05-07 Hongtao Liu <hongtao.liu@intel.com>
* config/i386/cpuinfo.c (get_available_features): Detect BF16.
* config/i386/cpuinfo.h (enum processor_features): Add
FEATURE_AVX512BF16.
2019-04-23 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
Bernd Edlinger <bernd.edlinger@hotmail.de>
Jakub Jelinek <jakub@redhat.com>

View File

@ -336,7 +336,7 @@ get_available_features (unsigned int ecx, unsigned int edx,
set_feature (FEATURE_FMA);
}
/* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
/* Get Advanced Features at level 7 (eax = 7, ecx = 0/1). */
if (max_cpuid_level >= 7)
{
__cpuid_count (7, 0, eax, ebx, ecx, edx);
@ -385,6 +385,10 @@ get_available_features (unsigned int ecx, unsigned int edx,
set_feature (FEATURE_AVX5124VNNIW);
if (edx & bit_AVX5124FMAPS)
set_feature (FEATURE_AVX5124FMAPS);
__cpuid_count (7, 1, eax, ebx, ecx, edx);
if (eax & bit_AVX512BF16)
set_feature (FEATURE_AVX512BF16);
}
}

View File

@ -119,7 +119,8 @@ enum processor_features
FEATURE_GFNI,
FEATURE_VPCLMULQDQ,
FEATURE_AVX512VNNI,
FEATURE_AVX512BITALG
FEATURE_AVX512BITALG,
FEATURE_AVX512BF16
};
extern struct __processor_model