Enable GCC support for AMX-TILE,AMX-INT8,AMX-BF16.
AMX-TILE:ldtilecfg/sttilecfg/tileloadd/tileloaddt1/tilezero/tilerelease AMX-INT8:tdpbssd/tdpbsud/tdpbusd/tdpbuud AMX-BF16:tdpbf16ps gcc/ChangeLog * common/config/i386/i386-common.c (OPTION_MASK_ISA2_AMX_TILE_SET, OPTION_MASK_ISA2_AMX_INT8_SET, OPTION_MASK_ISA2_AMX_BF16_SET, OPTION_MASK_ISA2_AMX_TILE_UNSET, OPTION_MASK_ISA2_AMX_INT8_UNSET, OPTION_MASK_ISA2_AMX_BF16_UNSET, OPTION_MASK_ISA2_XSAVE_UNSET): New marcos. (ix86_handle_option): Hanlde -mamx-tile, -mamx-int8, -mamx-bf16. * common/config/i386/i386-cpuinfo.h (processor_types): Add FEATURE_AMX_TILE, FEATURE_AMX_INT8, FEATURE_AMX_BF16. * common/config/i386/cpuinfo.h (XSTATE_TILECFG, XSTATE_TILEDATA, XCR_AMX_ENABLED_MASK): New macro. (get_available_features): Enable AMX features only if their states are suoorited by OSXSAVE. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for amx-tile, amx-int8, amx-bf16. * config.gcc: Add amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h to extra headers. * config/i386/amxbf16intrin.h: New file. * config/i386/amxint8intrin.h: Ditto. * config/i386/amxtileintrin.h: Ditto. * config/i386/cpuid.h (bit_AMX_BF16, bit_AMX_TILE, bit_AMX_INT8): New macro. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AMX_TILE__, __AMX_INT8__, AMX_BF16__. * config/i386/i386-options.c (ix86_target_string): Add -mamx-tile, -mamx-int8, -mamx-bf16. (ix86_option_override_internal): Handle AMX-TILE, AMX-INT8, AMX-BF16. * config/i386/i386.h (TARGET_AMX_TILE, TARGET_AMX_TILE_P, TARGET_AMX_INT8, TARGET_AMX_INT8_P, TARGET_AMX_BF16_P, PTA_AMX_TILE, PTA_AMX_INT8, PTA_AMX_BF16): New macros. * config/i386/i386.opt: Add -mamx-tile, -mamx-int8, -mamx-bf16. * config/i386/immintrin.h: Include amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h. * doc/invoke.texi: Document -mamx-tile, -mamx-int8, -mamx-bf16. * doc/extend.texi: Document amx-tile, amx-int8, amx-bf16. * doc/sourcebuild.texi ((Effective-Target Keywords, Other hardware attributes): Document amx_int8, amx_tile, amx_bf16. gcc/testsuite/ChangeLog * lib/target-supports.exp (check_effective_target_amx_tile, check_effective_target_amx_int8, check_effective_target_amx_bf16): New proc. * g++.dg/other/i386-2.C: Add -mamx-tile, -mamx-int8, -mamx-bf16. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/sse-12.c: Ditto. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * gcc.target/i386/amx-check.h: New header file. * gcc.target/i386/amxbf16-asmatt-1.c: New test. * gcc.target/i386/amxint8-asmatt-1.c: New test. * gcc.target/i386/amxtile-asmatt-1.c: Ditto. * gcc.target/i386/amxbf16-asmintel-1.c: Ditto. * gcc.target/i386/amxint8-asmintel-1.c: Ditto. * gcc.target/i386/amxtile-asmintel-1.c: Ditto. * gcc.target/i386/amxbf16-dpbf16ps-2.c: Ditto. * gcc.target/i386/amxint8-dpbssd-2.c: Ditto. * gcc.target/i386/amxint8-dpbsud-2.c: Ditto. * gcc.target/i386/amxint8-dpbusd-2.c: Ditto. * gcc.target/i386/amxint8-dpbuud-2.c: Ditto. * gcc.target/i386/amxtile-2.c: Ditto.
This commit is contained in:
parent
92f0d3d03a
commit
5c609842d1
|
@ -509,15 +509,20 @@ get_available_features (struct __processor_model *cpu_model,
|
|||
#define XSTATE_OPMASK 0x20
|
||||
#define XSTATE_ZMM 0x40
|
||||
#define XSTATE_HI_ZMM 0x80
|
||||
#define XSTATE_TILECFG 0x20000
|
||||
#define XSTATE_TILEDATA 0x40000
|
||||
|
||||
#define XCR_AVX_ENABLED_MASK \
|
||||
(XSTATE_SSE | XSTATE_YMM)
|
||||
#define XCR_AVX512F_ENABLED_MASK \
|
||||
(XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
|
||||
#define XCR_AMX_ENABLED_MASK \
|
||||
(XSTATE_TILECFG | XSTATE_TILEDATA)
|
||||
|
||||
/* Check if AVX and AVX512 are usable. */
|
||||
int avx_usable = 0;
|
||||
int avx512_usable = 0;
|
||||
int amx_usable = 0;
|
||||
if ((ecx & bit_OSXSAVE))
|
||||
{
|
||||
/* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
|
||||
|
@ -533,6 +538,8 @@ get_available_features (struct __processor_model *cpu_model,
|
|||
avx512_usable = ((xcrlow & XCR_AVX512F_ENABLED_MASK)
|
||||
== XCR_AVX512F_ENABLED_MASK);
|
||||
}
|
||||
amx_usable = ((xcrlow & XCR_AMX_ENABLED_MASK)
|
||||
== XCR_AMX_ENABLED_MASK);
|
||||
}
|
||||
|
||||
#define set_feature(f) \
|
||||
|
@ -651,6 +658,15 @@ get_available_features (struct __processor_model *cpu_model,
|
|||
set_feature (FEATURE_PCONFIG);
|
||||
if (edx & bit_IBT)
|
||||
set_feature (FEATURE_IBT);
|
||||
if (amx_usable)
|
||||
{
|
||||
if (edx & bit_AMX_TILE)
|
||||
set_feature (FEATURE_AMX_TILE);
|
||||
if (edx & bit_AMX_INT8)
|
||||
set_feature (FEATURE_AMX_INT8);
|
||||
if (edx & bit_AMX_BF16)
|
||||
set_feature (FEATURE_AMX_BF16);
|
||||
}
|
||||
if (avx512_usable)
|
||||
{
|
||||
if (ebx & bit_AVX512F)
|
||||
|
|
|
@ -101,6 +101,9 @@ along with GCC; see the file COPYING3. If not see
|
|||
(OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_XSAVE_SET)
|
||||
#define OPTION_MASK_ISA_CLWB_SET OPTION_MASK_ISA_CLWB
|
||||
#define OPTION_MASK_ISA2_AVX512VP2INTERSECT_SET OPTION_MASK_ISA2_AVX512VP2INTERSECT
|
||||
#define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
|
||||
#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
|
||||
#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
|
||||
as -msse4.2. */
|
||||
|
@ -194,6 +197,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
(OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_XSAVEOPT_UNSET \
|
||||
| OPTION_MASK_ISA_XSAVES_UNSET | OPTION_MASK_ISA_XSAVEC_UNSET \
|
||||
| OPTION_MASK_ISA_AVX_UNSET)
|
||||
#define OPTION_MASK_ISA2_XSAVE_UNSET OPTION_MASK_ISA2_AMX_TILE_UNSET
|
||||
#define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT
|
||||
#define OPTION_MASK_ISA_AVX2_UNSET \
|
||||
(OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
|
||||
|
@ -247,6 +251,9 @@ along with GCC; see the file COPYING3. If not see
|
|||
#define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE
|
||||
#define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT
|
||||
#define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK
|
||||
#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE
|
||||
#define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
|
||||
#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
|
||||
as -mno-sse4.1. */
|
||||
|
@ -931,6 +938,47 @@ ix86_handle_option (struct gcc_options *opts,
|
|||
}
|
||||
return true;
|
||||
|
||||
case OPT_mamx_tile:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_TILE_SET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_TILE_SET;
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XSAVE_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_TILE_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_TILE_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mamx_int8:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_INT8_SET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_INT8_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_INT8_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_INT8_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mamx_bf16:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16_SET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_BF16_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_BF16_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_BF16_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mfma:
|
||||
if (value)
|
||||
{
|
||||
|
@ -1265,6 +1313,8 @@ ix86_handle_option (struct gcc_options *opts,
|
|||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_XSAVE_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XSAVE_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_XSAVE_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_XSAVE_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
|
|
|
@ -216,6 +216,9 @@ enum processor_features
|
|||
FEATURE_XSAVEC,
|
||||
FEATURE_XSAVEOPT,
|
||||
FEATURE_XSAVES,
|
||||
FEATURE_AMX_TILE,
|
||||
FEATURE_AMX_INT8,
|
||||
FEATURE_AMX_BF16,
|
||||
CPU_FEATURE_MAX
|
||||
};
|
||||
|
||||
|
|
|
@ -160,4 +160,7 @@ ISA_NAMES_TABLE_START
|
|||
ISA_NAMES_TABLE_ENTRY("xsaveopt", FEATURE_XSAVEOPT, P_NONE,
|
||||
"-mxsaveopt")
|
||||
ISA_NAMES_TABLE_ENTRY("xsaves", FEATURE_XSAVES, P_NONE, "-mxsaves")
|
||||
ISA_NAMES_TABLE_ENTRY("amx-tile", FEATURE_AMX_TILE, P_NONE, "-mamx-tile")
|
||||
ISA_NAMES_TABLE_ENTRY("amx-int8", FEATURE_AMX_INT8, P_NONE, "-mamx-int8")
|
||||
ISA_NAMES_TABLE_ENTRY("amx-bf16", FEATURE_AMX_BF16, P_NONE, "-mamx-bf16")
|
||||
ISA_NAMES_TABLE_END
|
||||
|
|
|
@ -412,7 +412,8 @@ i[34567]86-*-*)
|
|||
waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h
|
||||
avx512bf16intrin.h enqcmdintrin.h serializeintrin.h
|
||||
avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
|
||||
tsxldtrkintrin.h"
|
||||
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
|
||||
amxbf16intrin.h"
|
||||
;;
|
||||
x86_64-*-*)
|
||||
cpu_type=i386
|
||||
|
@ -447,7 +448,8 @@ x86_64-*-*)
|
|||
waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h
|
||||
avx512bf16intrin.h enqcmdintrin.h serializeintrin.h
|
||||
avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
|
||||
tsxldtrkintrin.h"
|
||||
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
|
||||
amxbf16intrin.h"
|
||||
;;
|
||||
ia64-*-*)
|
||||
extra_headers=ia64intrin.h
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxbf16intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AMXBF16INTRIN_H_INCLUDED
|
||||
#define _AMXBF16INTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AMX_BF16__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-bf16")
|
||||
#define __DISABLE_AMX_BF16__
|
||||
#endif /* __AMX_BF16__ */
|
||||
|
||||
#if defined(__x86_64__) && defined(__AMX_BF16__)
|
||||
#define _tile_dpbf16ps_internal(dst,src1,src2) \
|
||||
__asm__ volatile\
|
||||
("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||
|
||||
#define _tile_dpbf16ps(dst,src1,src2) \
|
||||
_tile_dpbf16ps_internal (dst, src1, src2)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AMX_BF16__
|
||||
#undef __DISABLE_AMX_BF16__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AMX_BF16__ */
|
||||
|
||||
#endif /* _AMXBF16INTRIN_H_INCLUDED */
|
|
@ -0,0 +1,38 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxint8intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AMXINT8INTRIN_H_INCLUDED
|
||||
#define _AMXINT8INTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AMX_INT8__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-int8")
|
||||
#define __DISABLE_AMX_INT8__
|
||||
#endif /* __AMX_INT8__ */
|
||||
|
||||
#if defined(__x86_64__) && defined(__AMX_INT8__)
|
||||
#define _tile_int8_dp_internal(name,dst,src1,src2) \
|
||||
__asm__ volatile \
|
||||
("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||
|
||||
#define _tile_dpbssd(dst,src1,src2) \
|
||||
_tile_int8_dp_internal (tdpbssd, dst, src1, src2)
|
||||
|
||||
#define _tile_dpbsud(dst,src1,src2) \
|
||||
_tile_int8_dp_internal (tdpbsud, dst, src1, src2)
|
||||
|
||||
#define _tile_dpbusd(dst,src1,src2) \
|
||||
_tile_int8_dp_internal (tdpbusd, dst, src1, src2)
|
||||
|
||||
#define _tile_dpbuud(dst,src1,src2) \
|
||||
_tile_int8_dp_internal (tdpbuud, dst, src1, src2)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AMX_INT8__
|
||||
#undef __DISABLE_AMX_INT8__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AMX_INT8__ */
|
||||
|
||||
#endif /* _AMXINT8INTRIN_H_INCLUDED */
|
|
@ -0,0 +1,75 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxtileintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AMXTILEINTRIN_H_INCLUDED
|
||||
#define _AMXTILEINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AMX_TILE__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-tile")
|
||||
#define __DISABLE_AMX_TILE__
|
||||
#endif /* __AMX_TILE__ */
|
||||
|
||||
#if defined(__x86_64__) && defined(__AMX_TILE__)
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_loadconfig (const void *__config)
|
||||
{
|
||||
__asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_storeconfig (void *__config)
|
||||
{
|
||||
__asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_release (void)
|
||||
{
|
||||
__asm__ volatile ("tilerelease" ::);
|
||||
}
|
||||
|
||||
#define _tile_loadd(dst,base,stride) \
|
||||
_tile_loadd_internal (dst, base, stride)
|
||||
|
||||
#define _tile_loadd_internal(dst,base,stride) \
|
||||
__asm__ volatile \
|
||||
("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" \
|
||||
:: "r" ((const void*) base), "r" ((long) stride))
|
||||
|
||||
#define _tile_stream_loadd(dst,base,stride) \
|
||||
_tile_stream_loadd_internal (dst, base, stride)
|
||||
|
||||
#define _tile_stream_loadd_internal(dst,base,stride) \
|
||||
__asm__ volatile \
|
||||
("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" \
|
||||
:: "r" ((const void*) base), "r" ((long) stride))
|
||||
|
||||
#define _tile_stored(dst,base,stride) \
|
||||
_tile_stored_internal (dst, base, stride)
|
||||
|
||||
#define _tile_stored_internal(src,base,stride) \
|
||||
__asm__ volatile \
|
||||
("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" \
|
||||
:: "r" ((void*) base), "r" ((long) stride) \
|
||||
: "memory")
|
||||
|
||||
#define _tile_zero(dst) \
|
||||
_tile_zero_internal (dst)
|
||||
|
||||
#define _tile_zero_internal(dst) \
|
||||
__asm__ volatile \
|
||||
("tilezero\t%%tmm"#dst ::)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AMX_TILE__
|
||||
#undef __DISABLE_AMX_TILE__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AMX_TILE__ */
|
||||
|
||||
#endif /* _AMXTILEINTRIN_H_INCLUDED */
|
|
@ -127,6 +127,9 @@
|
|||
#define bit_PCONFIG (1 << 18)
|
||||
#define bit_SERIALIZE (1 << 14)
|
||||
#define bit_TSXLDTRK (1 << 16)
|
||||
#define bit_AMX_BF16 (1 << 22)
|
||||
#define bit_AMX_TILE (1 << 24)
|
||||
#define bit_AMX_INT8 (1 << 25)
|
||||
|
||||
/* XFEATURE_ENABLED_MASK register bits (%eax == 0xd, %ecx == 0) */
|
||||
#define bit_BNDREGS (1 << 3)
|
||||
|
|
|
@ -588,6 +588,13 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
def_or_undef (parse_in, "__ENQCMD__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_TSXLDTRK)
|
||||
def_or_undef (parse_in, "__TSXLDTRK__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AMX_TILE)
|
||||
def_or_undef (parse_in, "__AMX_TILE__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AMX_INT8)
|
||||
def_or_undef (parse_in, "__AMX_INT8__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AMX_BF16)
|
||||
def_or_undef (parse_in, "__AMX_BF16__");
|
||||
|
||||
if (TARGET_IAMCU)
|
||||
{
|
||||
def_or_undef (parse_in, "__iamcu");
|
||||
|
|
|
@ -209,7 +209,10 @@ static struct ix86_target_opts isa2_opts[] =
|
|||
{ "-mavx512bf16", OPTION_MASK_ISA2_AVX512BF16 },
|
||||
{ "-menqcmd", OPTION_MASK_ISA2_ENQCMD },
|
||||
{ "-mserialize", OPTION_MASK_ISA2_SERIALIZE },
|
||||
{ "-mtsxldtrk", OPTION_MASK_ISA2_TSXLDTRK }
|
||||
{ "-mtsxldtrk", OPTION_MASK_ISA2_TSXLDTRK },
|
||||
{ "-mamx-tile", OPTION_MASK_ISA2_AMX_TILE },
|
||||
{ "-mamx-int8", OPTION_MASK_ISA2_AMX_INT8 },
|
||||
{ "-mamx-bf16", OPTION_MASK_ISA2_AMX_BF16 }
|
||||
};
|
||||
static struct ix86_target_opts isa_opts[] =
|
||||
{
|
||||
|
@ -1033,6 +1036,9 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
|||
IX86_ATTR_ISA ("enqcmd", OPT_menqcmd),
|
||||
IX86_ATTR_ISA ("serialize", OPT_mserialize),
|
||||
IX86_ATTR_ISA ("tsxldtrk", OPT_mtsxldtrk),
|
||||
IX86_ATTR_ISA ("amx-tile", OPT_mamx_tile),
|
||||
IX86_ATTR_ISA ("amx-int8", OPT_mamx_int8),
|
||||
IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16),
|
||||
|
||||
/* enum options */
|
||||
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
|
||||
|
@ -2258,6 +2264,18 @@ ix86_option_override_internal (bool main_args_p,
|
|||
&& !(opts->x_ix86_isa_flags2_explicit
|
||||
& OPTION_MASK_ISA2_AVX512BF16))
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX512BF16;
|
||||
if (((processor_alias_table[i].flags & PTA_AMX_TILE) != 0)
|
||||
&& !(opts->x_ix86_isa_flags2_explicit
|
||||
& OPTION_MASK_ISA2_AMX_TILE))
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_TILE;
|
||||
if (((processor_alias_table[i].flags & PTA_AMX_INT8) != 0)
|
||||
&& !(opts->x_ix86_isa_flags2_explicit
|
||||
& OPTION_MASK_ISA2_AMX_INT8))
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_INT8;
|
||||
if (((processor_alias_table[i].flags & PTA_AMX_BF16) != 0)
|
||||
&& !(opts->x_ix86_isa_flags2_explicit
|
||||
& OPTION_MASK_ISA2_AMX_BF16))
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16;
|
||||
if (((processor_alias_table[i].flags & PTA_MOVDIRI) != 0)
|
||||
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVDIRI))
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVDIRI;
|
||||
|
|
|
@ -203,6 +203,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|||
#define TARGET_SERIALIZE_P(x) TARGET_ISA2_SERIALIZE_P(x)
|
||||
#define TARGET_TSXLDTRK TARGET_ISA2_TSXLDTRK
|
||||
#define TARGET_TSXLDTRK_P(x) TARGET_ISA2_TSXLDTRK_P(x)
|
||||
#define TARGET_AMX_TILE TARGET_ISA2_AMX_TILE
|
||||
#define TARGET_AMX_TILE_P(x) TARGET_ISA2_AMX_TILE(x)
|
||||
#define TARGET_AMX_INT8 TARGET_ISA2_AMX_INT8
|
||||
#define TARGET_AMX_INT8_P(x) TARGET_ISA2_AMX_INT8(x)
|
||||
#define TARGET_AMX_BF16 TARGET_ISA2_AMX_BF16
|
||||
#define TARGET_AMX_BF16_P(x) TARGET_ISA2_AMX_BF16(x)
|
||||
|
||||
#define TARGET_LP64 TARGET_ABI_64
|
||||
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
|
||||
|
@ -2466,6 +2472,9 @@ const wide_int_bitmask PTA_ENQCMD (0, HOST_WIDE_INT_1U << 15);
|
|||
const wide_int_bitmask PTA_CLDEMOTE (0, HOST_WIDE_INT_1U << 16);
|
||||
const wide_int_bitmask PTA_SERIALIZE (0, HOST_WIDE_INT_1U << 17);
|
||||
const wide_int_bitmask PTA_TSXLDTRK (0, HOST_WIDE_INT_1U << 18);
|
||||
const wide_int_bitmask PTA_AMX_TILE(0, HOST_WIDE_INT_1U << 19);
|
||||
const wide_int_bitmask PTA_AMX_INT8(0, HOST_WIDE_INT_1U << 20);
|
||||
const wide_int_bitmask PTA_AMX_BF16(0, HOST_WIDE_INT_1U << 21);
|
||||
|
||||
const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
|
||||
| PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
|
||||
|
@ -2499,7 +2508,8 @@ const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI
|
|||
| PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT;
|
||||
const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI
|
||||
| PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE
|
||||
| PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK;
|
||||
| PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE
|
||||
| PTA_AMX_INT8 | PTA_AMX_BF16;
|
||||
const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE
|
||||
| PTA_WAITPKG | PTA_SERIALIZE;
|
||||
const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
|
||||
|
|
|
@ -1114,4 +1114,16 @@ Support SERIALIZE built-in functions and code generation.
|
|||
|
||||
mtsxldtrk
|
||||
Target Report Mask(ISA2_TSXLDTRK) Var(ix86_isa_flags2) Save
|
||||
Support TSXLDTRK built-in functions and code generation.
|
||||
Support TSXLDTRK built-in functions and code generation.
|
||||
|
||||
mamx-tile
|
||||
Target Report Mask(ISA2_AMX_TILE) Var(ix86_isa_flags2) Save
|
||||
Support AMX-TILE built-in functions and code generation.
|
||||
|
||||
mamx-int8
|
||||
Target Report Mask(ISA2_AMX_INT8) Var(ix86_isa_flags2) Save
|
||||
Support AMX-INT8 built-in functions and code generation.
|
||||
|
||||
mamx-bf16
|
||||
Target Report Mask(ISA2_AMX_BF16) Var(ix86_isa_flags2) Save
|
||||
Support AMX-BF16 built-in functions and code generation.
|
||||
|
|
|
@ -144,6 +144,12 @@
|
|||
|
||||
#include <tsxldtrkintrin.h>
|
||||
|
||||
#include <amxtileintrin.h>
|
||||
|
||||
#include <amxint8intrin.h>
|
||||
|
||||
#include <amxbf16intrin.h>
|
||||
|
||||
#include <rdseedintrin.h>
|
||||
|
||||
#include <prfchwintrin.h>
|
||||
|
|
|
@ -6623,6 +6623,21 @@ Enable/disable the generation of the XSAVEOPT instructions.
|
|||
@cindex @code{target("xsaves")} function attribute, x86
|
||||
Enable/disable the generation of the XSAVES instructions.
|
||||
|
||||
@item amx-tile
|
||||
@itemx no-amx-tile
|
||||
@cindex @code{target("amx-tile")} function attribute, x86
|
||||
Enable/disable the generation of the AMX-TILE instructions.
|
||||
|
||||
@item amx-int8
|
||||
@itemx no-amx-int8
|
||||
@cindex @code{target("amx-int8")} function attribute, x86
|
||||
Enable/disable the generation of the AMX-INT8 instructions.
|
||||
|
||||
@item amx-bf16
|
||||
@itemx no-amx-bf16
|
||||
@cindex @code{target("amx-bf16")} function attribute, x86
|
||||
Enable/disable the generation of the AMX-BF16 instructions.
|
||||
|
||||
@item cld
|
||||
@itemx no-cld
|
||||
@cindex @code{target("cld")} function attribute, x86
|
||||
|
|
|
@ -1362,6 +1362,7 @@ See RS/6000 and PowerPC Options.
|
|||
-mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq @gol
|
||||
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
|
||||
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
|
||||
-mamx-tile -mamx-int8 -mamx-bf16@gol
|
||||
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
|
||||
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
|
||||
-mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol
|
||||
|
@ -30205,6 +30206,15 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
|||
@need 200
|
||||
@itemx -mserialize
|
||||
@opindex mserialize
|
||||
@need 200
|
||||
@itemx -mamx-tile
|
||||
@opindex mamx-tile
|
||||
@need 200
|
||||
@itemx -mamx-int8
|
||||
@opindex mamx-int8
|
||||
@need 200
|
||||
@itemx -mamx-bf16
|
||||
@opindex mamx-bf16
|
||||
These switches enable the use of instructions in the MMX, SSE,
|
||||
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
|
||||
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
|
||||
|
|
|
@ -2249,6 +2249,15 @@ Target supports the execution of @code{avx512f} instructions.
|
|||
@item avx512vp2intersect
|
||||
Target supports the execution of @code{avx512vp2intersect} instructions.
|
||||
|
||||
@item amx_tile
|
||||
Target supports the execution of @code{amx-tile} instructions.
|
||||
|
||||
@item amx_int8
|
||||
Target supports the execution of @code{amx-int8} instructions.
|
||||
|
||||
@item amx_bf16
|
||||
Target supports the execution of @code{amx-bf16} instructions.
|
||||
|
||||
@item cell_hw
|
||||
Test system can execute AltiVec and Cell PPU instructions.
|
||||
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h,
|
||||
avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h
|
||||
avx512bitalgintrin.h, avx512vp2intersectintrin.h, tsxldtrkintrin.h,
|
||||
amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h,
|
||||
avx512vp2intersectvlintrin.h and mm_malloc.h.h are usable
|
||||
with -O -pedantic-errors. */
|
||||
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h,
|
||||
avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h
|
||||
avx512bitalgintrin.h, avx512vp2intersectintrin.h, tsxldtrkintrin.h,
|
||||
amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h,
|
||||
avx512vp2intersectvlintrin.h and mm_malloc.h are usable
|
||||
with -O -fkeep-inline-functions. */
|
||||
|
||||
|
|
|
@ -0,0 +1,185 @@
|
|||
#ifndef AMX_CHECK_H_INCLUDED
|
||||
#define AMX_CHECK_H_INCLUDED
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#ifdef DEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#include "cpuid.h"
|
||||
|
||||
/* TODO: The tmm emulation is temporary for current
|
||||
AMX implementation with no tmm regclass, should
|
||||
be changed in the future. */
|
||||
typedef struct __tile_config
|
||||
{
|
||||
uint8_t palette_id;
|
||||
uint8_t start_row;
|
||||
uint8_t reserved_0[14];
|
||||
uint16_t colsb[8]; /* Colum size of each tmm register in bytes */
|
||||
uint16_t reserved_1[8];
|
||||
uint8_t rows[8]; /* Row size of each tmm reg in bytes */
|
||||
uint8_t reserved_2[8];
|
||||
} __tilecfg;
|
||||
|
||||
typedef union __union_tile_config
|
||||
{
|
||||
__tilecfg s;
|
||||
uint8_t a[64];
|
||||
} __tilecfg_u;
|
||||
|
||||
typedef struct __tile
|
||||
{
|
||||
/* Max size of tile register */
|
||||
uint8_t buf[1024];
|
||||
int rows;
|
||||
int colsb;
|
||||
} __tile;
|
||||
|
||||
/* Maxium col/row size in bytes */
|
||||
#define MAX_ROWS 16
|
||||
#define MAX_COLS 64
|
||||
|
||||
/* Stride (colum width in byte) used for tileload/store */
|
||||
#define _STRIDE 64
|
||||
|
||||
/* Initialize tile config by setting all tmm size to 16x64 */
|
||||
void init_tile_config (__tilecfg_u *dst)
|
||||
{
|
||||
int i;
|
||||
|
||||
dst->s.palette_id = 1;
|
||||
dst->s.start_row = 0;
|
||||
|
||||
for (i = 0; i < 14; i++)
|
||||
dst->s.reserved_0[i] = 0;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
dst->s.colsb[i] = _STRIDE;
|
||||
dst->s.rows[i] = 16;
|
||||
dst->s.reserved_1[i] = 0;
|
||||
dst->s.reserved_2[i] = 0;
|
||||
}
|
||||
|
||||
_tile_loadconfig (dst->a);
|
||||
}
|
||||
|
||||
/* Init __tile variable that going to be store to register
|
||||
w/o extra buffer. If buffer exists, it should be the same
|
||||
size matrix as corresponding tmm register.
|
||||
Should execute init_tile_config first */
|
||||
void init_tile_src (const int tmm_num, __tile *src, uint8_t *buffer)
|
||||
{
|
||||
int rows, colsb, i, j;
|
||||
__tilecfg_u tmp;
|
||||
|
||||
_tile_storeconfig (tmp.a);
|
||||
|
||||
src->rows = rows = tmp.s.rows[tmm_num];
|
||||
src->colsb = colsb = tmp.s.colsb[tmm_num];
|
||||
|
||||
for (i = 0; i < rows; i++)
|
||||
for (j = 0; j < colsb; j++)
|
||||
{
|
||||
if(buffer)
|
||||
src->buf[i * colsb + j] = buffer[i * colsb + j];
|
||||
else
|
||||
src->buf[i * colsb + j] = (i + 11 * j) % 256;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Init __tile src and corresponding tmm register */
|
||||
#define init_tile_reg_and_src(tmm_num, src) \
|
||||
{ \
|
||||
init_tile_src (tmm_num, &src, NULL); \
|
||||
_tile_loadd (tmm_num, src.buf, _STRIDE); \
|
||||
}
|
||||
|
||||
#define init_tile_reg_and_src_with_buffer(tmm_num, src, buffer) \
|
||||
{ \
|
||||
init_tile_src (tmm_num, &src, buffer); \
|
||||
_tile_loadd (tmm_num, src.buf, _STRIDE); \
|
||||
}
|
||||
|
||||
/* Zero __tile src. It should be init first. */
|
||||
void zero_tile_src (__tile *src)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < src->rows; i++)
|
||||
for (j = 0; j < src->colsb; j++)
|
||||
src->buf[i * src->colsb + j] = 0;
|
||||
}
|
||||
|
||||
/* Compare tile config value with __tilecfg_u dst */
|
||||
int check_tile_config (__tilecfg_u *src, __tilecfg_u *dst)
|
||||
{
|
||||
size_t size = sizeof(__tilecfg);
|
||||
uint8_t *pa_src = (uint8_t *) src->a;
|
||||
uint8_t *pa_dst = (uint8_t *) dst->a;
|
||||
|
||||
for (int i = 0; i < size; i++)
|
||||
if (pa_src[i] != pa_dst[i])
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Compare tile register value with __tile variable */
|
||||
int check_tile_register (__tile* ref, __tile* target)
|
||||
{
|
||||
/* Tile register should be stored from tmm to
|
||||
memory and compare with emulation results. */
|
||||
int rows = target->rows;
|
||||
int colsb = target->colsb;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < rows; i++)
|
||||
for (j = 0; j < colsb; j++)
|
||||
if (ref->buf[i * colsb + j] != target->buf[i * colsb + j])
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifndef DO_TEST
|
||||
#define DO_TEST do_test
|
||||
static void test_amx (void);
|
||||
__attribute__ ((noinline))
|
||||
static void
|
||||
do_test (void)
|
||||
{
|
||||
test_amx ();
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
/* Check cpu support for AMX */
|
||||
if (__builtin_cpu_supports ("amx-tile")
|
||||
#ifdef AMX_INT8
|
||||
&& __builtin_cpu_supports ("amx-int8")
|
||||
#endif
|
||||
#ifdef AMX_BF16
|
||||
&& __builtin_cpu_supports ("amx-bf16")
|
||||
#endif
|
||||
)
|
||||
{
|
||||
DO_TEST ();
|
||||
#ifdef DEBUG
|
||||
printf ("PASSED\n");
|
||||
#endif
|
||||
}
|
||||
#ifdef DEBUG
|
||||
else
|
||||
printf ("SKIPPED\n");
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-bf16" } */
|
||||
/* { dg-final { scan-assembler "tdpbf16ps\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
|
||||
#include <immintrin.h>
|
||||
|
||||
#define TMM1 1
|
||||
#define TMM2 2
|
||||
#define TMM3 3
|
||||
|
||||
void TEST ()
|
||||
{
|
||||
_tile_dpbf16ps (TMM1, TMM2, TMM3);
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-bf16 -masm=intel" } */
|
||||
/* { dg-final { scan-assembler "tdpbf16ps\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
|
||||
#include <immintrin.h>
|
||||
|
||||
void TEST ()
|
||||
{
|
||||
_tile_dpbf16ps (1, 2, 3);
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/* { dg-do run { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-tile -mamx-bf16" } */
|
||||
#include <immintrin.h>
|
||||
|
||||
#define AMX_BF16
|
||||
#define DO_TEST test_amx_bf16_dpbf16ps
|
||||
void test_amx_bf16_dpbf16ps ();
|
||||
#include "amx-check.h"
|
||||
|
||||
/* Transformation functions between bf16/float */
|
||||
static uint16_t make_bf16 (float f)
|
||||
{
|
||||
uint32_t u = (uint32_t)f;
|
||||
u = (u >> 16) & 0xffff;
|
||||
return (uint16_t)u;
|
||||
}
|
||||
|
||||
static float make_f32 (uint16_t bf)
|
||||
{
|
||||
uint32_t u = (uint32_t)(bf << 16);
|
||||
return (float)u;
|
||||
}
|
||||
|
||||
/* Init tile buffer with bf16 pairs */
|
||||
void init_bf16_max_tile_buffer (uint8_t *buf)
|
||||
{
|
||||
int i, j;
|
||||
uint16_t *ptr = (uint16_t *)buf;
|
||||
|
||||
for(i = 0; i < 16; i++)
|
||||
for(j = 0; j < 32; j++)
|
||||
{
|
||||
float f = 16.1f * i + 3.4f * j;
|
||||
ptr[i * 32 + j] = make_bf16(f);
|
||||
}
|
||||
}
|
||||
|
||||
void calc_matrix_dpbf16ps (__tile *dst, __tile *src1, __tile *src2)
|
||||
{
|
||||
uint16_t *src1_buf = (uint16_t *)src1->buf;
|
||||
uint16_t *src2_buf = (uint16_t *)src2->buf;
|
||||
float *dst_buf = (float *)dst->buf;
|
||||
|
||||
int M = src1->rows;
|
||||
int N = src1->colsb / 4;
|
||||
int K = src2->colsb / 4;
|
||||
int i, j, k, t;
|
||||
|
||||
for (i = 0; i < M; i++)
|
||||
for (j = 0; j < N; j++)
|
||||
for (k = 0; k < K; k++)
|
||||
for (t = 0; t < 2; t+=2)
|
||||
{
|
||||
dst_buf[i * N + k] +=
|
||||
(make_f32(src1_buf[i * 4 * N + 4 * j + t]) *
|
||||
make_f32(src2_buf[j * 4 * K + 4 * k + t])) +
|
||||
(make_f32(src1_buf[i * 4 * N + 4 * j + t + 1]) *
|
||||
make_f32(src1_buf[i * 4 * N + 4 * j + t + 1]));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void test_amx_bf16_dpbf16ps ()
|
||||
{
|
||||
__tilecfg_u cfg;
|
||||
__tile dst, dst_ref, src1, src2;
|
||||
uint8_t tmp_dst_buf[1024];
|
||||
|
||||
init_bf16_max_tile_buffer (tmp_dst_buf);
|
||||
|
||||
init_tile_config (&cfg);
|
||||
init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
|
||||
init_tile_reg_and_src_with_buffer (2, dst, tmp_dst_buf);
|
||||
init_tile_reg_and_src_with_buffer (3, dst, tmp_dst_buf);
|
||||
|
||||
calc_matrix_dpbf16ps (&dst, &src1, &src2);
|
||||
|
||||
_tile_dpbf16ps (1, 2, 3);
|
||||
_tile_stored (1, dst_ref.buf, _STRIDE);
|
||||
|
||||
if (!check_tile_register (&dst_ref, &dst))
|
||||
abort();
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-int8" } */
|
||||
/* { dg-final { scan-assembler "tdpbssd\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
|
||||
/* { dg-final { scan-assembler "tdpbsud\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } *
|
||||
/* { dg-final { scan-assembler "tdpbusd\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
|
||||
/* { dg-final { scan-assembler "tdpbuud\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
|
||||
#include <immintrin.h>
|
||||
|
||||
#define TMM1 1
|
||||
#define TMM2 2
|
||||
#define TMM3 3
|
||||
|
||||
void TEST ()
|
||||
{
|
||||
_tile_dpbssd (TMM1, TMM2, TMM3);
|
||||
_tile_dpbsud (TMM1, TMM2, TMM3);
|
||||
_tile_dpbusd (TMM1, TMM2, TMM3);
|
||||
_tile_dpbuud (TMM1, TMM2, TMM3);
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-int8 -masm=intel" } */
|
||||
/* { dg-final { scan-assembler "tdpbssd\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
|
||||
/* { dg-final { scan-assembler "tdpbsud\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } *
|
||||
/* { dg-final { scan-assembler "tdpbusd\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
|
||||
/* { dg-final { scan-assembler "tdpbuud\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
|
||||
#include <immintrin.h>
|
||||
|
||||
void TEST ()
|
||||
{
|
||||
_tile_dpbssd (1, 2, 3);
|
||||
_tile_dpbsud (1, 2, 3);
|
||||
_tile_dpbusd (1, 2, 3);
|
||||
_tile_dpbuud (1, 2, 3);
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
/* { dg-do run { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-tile -mamx-int8" } */
|
||||
#include <immintrin.h>
|
||||
|
||||
#define AMX_INT8
|
||||
#define DO_TEST test_amx_int8_dpbssd
|
||||
void test_amx_int8_dpbssd ();
|
||||
#include "amx-check.h"
|
||||
|
||||
/* Init tile buffer with int32 value*/
|
||||
void init_i32_max_tile_buffer (uint8_t *buf)
|
||||
{
|
||||
int i, j;
|
||||
int *ptr = (int *)buf;
|
||||
for (i = 0; i < 16; i++)
|
||||
for (j = 0; j < 16; j++)
|
||||
ptr[i * 16 + j] = 2 * i - (16 - j);
|
||||
}
|
||||
|
||||
void calc_matrix_dpbssd (__tile *dst, __tile *src1, __tile *src2)
|
||||
{
|
||||
int8_t *src1_buf = (int8_t *)src1->buf;
|
||||
int8_t *src2_buf = (int8_t *)src2->buf;
|
||||
int *dst_buf = (int *)dst->buf;
|
||||
|
||||
int M = src1->rows;
|
||||
int N = src1->colsb / 4;
|
||||
int K = src2->colsb / 4;
|
||||
int i, j, k, t;
|
||||
|
||||
for (i = 0; i < M; i++)
|
||||
for (j = 0; j < N; j++)
|
||||
for (k = 0; k < K; k++)
|
||||
for (t = 0; t < 4; t++)
|
||||
{
|
||||
dst_buf[i * N + k] +=
|
||||
((int) src1_buf[i * 4 * N + 4 * j + t]) *
|
||||
((int) src2_buf[j * 4 * K + 4 * k + t]);
|
||||
}
|
||||
}
|
||||
|
||||
void test_amx_int8_dpbssd ()
|
||||
{
|
||||
__tilecfg_u cfg;
|
||||
__tile dst, dst_ref, src1, src2;
|
||||
uint8_t tmp_dst_buf[1024];
|
||||
|
||||
init_i32_max_tile_buffer (tmp_dst_buf);
|
||||
|
||||
init_tile_config (&cfg);
|
||||
init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
|
||||
init_tile_reg_and_src (2, src1);
|
||||
init_tile_reg_and_src (3, src2);
|
||||
|
||||
calc_matrix_dpbssd (&dst, &src1, &src2);
|
||||
|
||||
_tile_dpbssd (1, 2, 3);
|
||||
_tile_stored (1, dst_ref.buf, _STRIDE);
|
||||
|
||||
if (!check_tile_register (&dst_ref, &dst))
|
||||
abort();
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/* { dg-do run { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-tile -mamx-int8" } */
|
||||
#include <immintrin.h>
|
||||
|
||||
#define AMX_INT8
|
||||
#define DO_TEST test_amx_int8_dpbsud
|
||||
void test_amx_int8_dpbsud ();
|
||||
#include "amx-check.h"
|
||||
|
||||
/* Init tile buffer with int32 value*/
|
||||
void init_i32_max_tile_buffer (uint8_t *buf)
|
||||
{
|
||||
int i, j;
|
||||
int *ptr = (int *)buf;
|
||||
for (i = 0; i < 16; i++)
|
||||
for (j = 0; j < 16; j++)
|
||||
ptr[i * 16 + j] = 2 * i - (16 - j);
|
||||
}
|
||||
|
||||
void calc_matrix_dpbsud (__tile *dst, __tile *src1, __tile *src2)
|
||||
{
|
||||
int8_t *src1_buf = (int8_t *)src1->buf;
|
||||
uint8_t *src2_buf = (uint8_t *)src2->buf;
|
||||
int *dst_buf = (int *)dst->buf;
|
||||
|
||||
int M = src1->rows;
|
||||
int N = src1->colsb / 4;
|
||||
int K = src2->colsb / 4;
|
||||
int i, j, k, t;
|
||||
|
||||
for (i = 0; i < M; i++)
|
||||
for (j = 0; j < N; j++)
|
||||
for (k = 0; k < K; k++)
|
||||
for (t = 0; t < 4; t++)
|
||||
{
|
||||
dst_buf[i * N + k] +=
|
||||
((int) src1_buf[i * 4 * N + 4 * j + t]) *
|
||||
((unsigned) src2_buf[j * 4 * K + 4 * k + t]);
|
||||
}
|
||||
}
|
||||
|
||||
void test_amx_int8_dpbsud ()
|
||||
{
|
||||
__tilecfg_u cfg;
|
||||
__tile dst, dst_ref, src1, src2;
|
||||
uint8_t tmp_dst_buf[1024];
|
||||
|
||||
init_i32_max_tile_buffer (tmp_dst_buf);
|
||||
|
||||
init_tile_config (&cfg);
|
||||
init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
|
||||
init_tile_reg_and_src (2, src1);
|
||||
init_tile_reg_and_src (3, src2);
|
||||
|
||||
calc_matrix_dpbsud (&dst, &src1, &src2);
|
||||
_tile_dpbsud (1, 2, 3);
|
||||
_tile_stored (1, dst_ref.buf, _STRIDE);
|
||||
|
||||
if (!check_tile_register (&dst_ref, &dst))
|
||||
abort();
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/* { dg-do run { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-tile -mamx-int8" } */
|
||||
#include <immintrin.h>
|
||||
|
||||
#define AMX_INT8
|
||||
#define DO_TEST test_amx_int8_dpbusd
|
||||
void test_amx_int8_dpbusd ();
|
||||
#include "amx-check.h"
|
||||
|
||||
/* Init tile buffer with int32 value*/
|
||||
void init_i32_max_tile_buffer (uint8_t *buf)
|
||||
{
|
||||
int i, j;
|
||||
int *ptr = (int *)buf;
|
||||
for (i = 0; i < 16; i++)
|
||||
for (j = 0; j < 16; j++)
|
||||
ptr[i * 16 + j] = 2 * i - (16 - j);
|
||||
}
|
||||
|
||||
void calc_matrix_dpbusd (__tile *dst, __tile *src1, __tile *src2)
|
||||
{
|
||||
uint8_t *src1_buf = (uint8_t *)src1->buf;
|
||||
int8_t *src2_buf = (int8_t *)src2->buf;
|
||||
int *dst_buf = (int *)dst->buf;
|
||||
|
||||
int M = src1->rows;
|
||||
int N = src1->colsb / 4;
|
||||
int K = src2->colsb / 4;
|
||||
int i, j, k, t;
|
||||
|
||||
for (i = 0; i < M; i++)
|
||||
for (j = 0; j < N; j++)
|
||||
for (k = 0; k < K; k++)
|
||||
for (t = 0; t < 4; t++)
|
||||
{
|
||||
dst_buf[i * N + k] +=
|
||||
((unsigned) src1_buf[i * 4 * N + 4 * j + t]) *
|
||||
((int) src2_buf[j * 4 * K + 4 * k + t]);
|
||||
}
|
||||
}
|
||||
|
||||
void test_amx_int8_dpbusd ()
|
||||
{
|
||||
__tilecfg_u cfg;
|
||||
__tile dst, dst_ref, src1, src2;
|
||||
uint8_t tmp_dst_buf[1024];
|
||||
|
||||
init_i32_max_tile_buffer (tmp_dst_buf);
|
||||
|
||||
init_tile_config (&cfg);
|
||||
init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
|
||||
init_tile_reg_and_src (2, src1);
|
||||
init_tile_reg_and_src (3, src2);
|
||||
|
||||
calc_matrix_dpbusd (&dst, &src1, &src2);
|
||||
_tile_dpbusd (1, 2, 3);
|
||||
_tile_stored (1, dst_ref.buf, _STRIDE);
|
||||
|
||||
if (!check_tile_register (&dst_ref, &dst))
|
||||
abort();
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/* { dg-do run { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-tile -mamx-int8" } */
|
||||
#include <immintrin.h>
|
||||
|
||||
#define AMX_INT8
|
||||
#define DO_TEST test_amx_int8_dpbuud
|
||||
void test_amx_int8_dpbuud ();
|
||||
#include "amx-check.h"
|
||||
|
||||
/* Init tile buffer with int32 value*/
|
||||
void init_i32_max_tile_buffer (uint8_t *buf)
|
||||
{
|
||||
int i, j;
|
||||
int *ptr = (int *)buf;
|
||||
for (i = 0; i < 16; i++)
|
||||
for (j = 0; j < 16; j++)
|
||||
ptr[i * 16 + j] = 2 * i - (16 - j);
|
||||
}
|
||||
|
||||
void calc_matrix_dpbuud (__tile *dst, __tile *src1, __tile *src2)
|
||||
{
|
||||
uint8_t *src1_buf = (uint8_t *)src1->buf;
|
||||
uint8_t *src2_buf = (uint8_t *)src2->buf;
|
||||
int *dst_buf = (int *)dst->buf;
|
||||
|
||||
int M = src1->rows;
|
||||
int N = src1->colsb / 4;
|
||||
int K = src2->colsb / 4;
|
||||
int i, j, k, t;
|
||||
|
||||
for (i = 0; i < M; i++)
|
||||
for (j = 0; j < N; j++)
|
||||
for (k = 0; k < K; k++)
|
||||
for (t = 0; t < 4; t++)
|
||||
{
|
||||
dst_buf[i * N + k] +=
|
||||
((unsigned) src1_buf[i * 4 * N + 4 * j + t]) *
|
||||
((unsigned) src2_buf[j * 4 * K + 4 * k + t]);
|
||||
}
|
||||
}
|
||||
|
||||
void test_amx_int8_dpbuud ()
|
||||
{
|
||||
__tilecfg_u cfg;
|
||||
__tile dst, dst_ref, src1, src2;
|
||||
uint8_t tmp_dst_buf[1024];
|
||||
|
||||
init_i32_max_tile_buffer (tmp_dst_buf);
|
||||
|
||||
init_tile_config (&cfg);
|
||||
init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
|
||||
init_tile_reg_and_src (2, src1);
|
||||
init_tile_reg_and_src (3, src2);
|
||||
|
||||
calc_matrix_dpbuud (&dst, &src1, &src2);
|
||||
_tile_dpbuud (1, 2, 3);
|
||||
_tile_stored (1, dst_ref.buf, _STRIDE);
|
||||
|
||||
if (!check_tile_register (&dst_ref, &dst))
|
||||
abort();
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/* { dg-do run { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-tile " } */
|
||||
#include <immintrin.h>
|
||||
|
||||
#define DO_TEST test_amx_tile
|
||||
void test_amx_tile ();
|
||||
#include "amx-check.h"
|
||||
|
||||
void test_amx_tile ()
|
||||
{
|
||||
__tilecfg_u cfg_src, cfg_dst;
|
||||
__tile reg_src1, reg_src2, reg_ref;
|
||||
|
||||
/* check tile config load & store. */
|
||||
init_tile_config (&cfg_src);
|
||||
_tile_storeconfig (cfg_dst.a);
|
||||
|
||||
if (!check_tile_config (&cfg_src, &cfg_dst))
|
||||
abort ();
|
||||
|
||||
/* check tile register load & store. */
|
||||
init_tile_reg_and_src (1, reg_src1);
|
||||
_tile_stored (1, reg_ref.buf, _STRIDE);
|
||||
if (!check_tile_register (®_ref, ®_src1))
|
||||
abort ();
|
||||
|
||||
/* check tile stream load instruction */
|
||||
init_tile_src (2, ®_src2, NULL);
|
||||
_tile_stream_loadd (2, reg_src2.buf, _STRIDE);
|
||||
_tile_stored (2, reg_ref.buf, _STRIDE);
|
||||
if (!check_tile_register (®_ref, ®_src2))
|
||||
abort ();
|
||||
|
||||
/* check tile register zeroing */
|
||||
zero_tile_src (®_src2);
|
||||
_tile_zero (2);
|
||||
_tile_stored (2, reg_ref.buf, _STRIDE);
|
||||
if (!check_tile_register (®_ref, ®_src2))
|
||||
abort ();
|
||||
|
||||
/* check tile cfg zeroing */
|
||||
memset (cfg_dst.a, 0, sizeof(__tilecfg));
|
||||
_tile_release ();
|
||||
_tile_storeconfig (cfg_src.a);
|
||||
if (!check_tile_config (&cfg_src, &cfg_dst))
|
||||
abort ();
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-tile " } */
|
||||
/* { dg-final { scan-assembler "ldtilecfg\[ \\t]+\(\[^\)\n\]*\)" } } */
|
||||
/* { dg-final { scan-assembler "sttilecfg\[ \\t]+\(\[^\)\n\]*\)" } } */
|
||||
/* { dg-final { scan-assembler "tilerelease" } } */
|
||||
/* { dg-final { scan-assembler "tileloadd\[ \\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]" } } */
|
||||
/* { dg-final { scan-assembler "tileloaddt1\[ \\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]" } } */
|
||||
/* { dg-final { scan-assembler "tilestored\[ \\t]+\[^\n\]*%tmm\[0-9\]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)" } } */
|
||||
/* { dg-final { scan-assembler "tilezero\[ \\t]+\[^\n\]*%tmm\[0-9\]" } } */
|
||||
#include <immintrin.h>
|
||||
|
||||
extern int a[];
|
||||
extern const void* base;
|
||||
extern const int stride;
|
||||
|
||||
#define TMM0 0
|
||||
#define TMM1 1
|
||||
#define TMM2 2
|
||||
#define TMM3 3
|
||||
|
||||
void TEST ()
|
||||
{
|
||||
_tile_loadconfig (a);
|
||||
_tile_storeconfig (a);
|
||||
_tile_release ();
|
||||
_tile_loadd (TMM3, base, stride);
|
||||
_tile_stream_loadd (TMM2, base, stride);
|
||||
_tile_stored (TMM1, base, stride);
|
||||
_tile_zero (TMM0);
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mamx-tile -masm=intel " } */
|
||||
/* { dg-final { scan-assembler "ldtilecfg\[ \\t]" } } */
|
||||
/* { dg-final { scan-assembler "sttilecfg\[ \\t]" } } */
|
||||
/* { dg-final { scan-assembler "tilerelease" } } */
|
||||
/* { dg-final { scan-assembler "tileloadd\[ \\t]%tmm\[0-9\]" } } */
|
||||
/* { dg-final { scan-assembler "tileloaddt1\[ \\t]%tmm\[0-9\]" } } */
|
||||
/* { dg-final { scan-assembler "tilestored\[ \\t]\[^\n\]+\[^\n\]*%tmm\[0-9\]" } } */
|
||||
/* { dg-final { scan-assembler "tilezero\[ \\t]+\[^\n\]*%tmm\[0-9\]" } } */
|
||||
#include <immintrin.h>
|
||||
|
||||
extern int a[];
|
||||
extern const void* base;
|
||||
extern const int stride;
|
||||
void TEST ()
|
||||
{
|
||||
_tile_loadconfig (a);
|
||||
_tile_storeconfig (a);
|
||||
_tile_release ();
|
||||
_tile_loadd (5, base, stride);
|
||||
_tile_stream_loadd (4, base, stride);
|
||||
_tile_stored (3, base, stride);
|
||||
_tile_zero (2);
|
||||
}
|
|
@ -71,6 +71,9 @@ extern void test_tsxldtrk (void) __attribute__((__target__("tsxldtrk")));
|
|||
extern void test_enqcmd (void) __attribute__((__target__("enqcmd")));
|
||||
extern void test_avx512bf16 (void) __attribute__((__target__("avx512bf16")));
|
||||
extern void test_avx512vp2intersect (void) __attribute__((__target__("avx512vp2intersect")));
|
||||
extern void test_amx_tile (void) __attribute__((__target__("amx-tile")));
|
||||
extern void test_amx_int8 (void) __attribute__((__target__("amx-int8")));
|
||||
extern void test_amx_bf16 (void) __attribute__((__target__("amx-bf16")));
|
||||
|
||||
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
|
||||
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
|
||||
|
@ -143,6 +146,9 @@ extern void test_no_tsxldtrk (void) __attribute__((__target__("no-tsxldtrk")));
|
|||
extern void test_no_enqcmd (void) __attribute__((__target__("no-enqcmd")));
|
||||
extern void test_no_avx512bf16 (void) __attribute__((__target__("no-avx512bf16")));
|
||||
extern void test_no_avx512vp2intersect (void) __attribute__((__target__("no-avx512vp2intersect")));
|
||||
extern void test_no_amx_tile (void) __attribute__((__target__("no-amx-tile")));
|
||||
extern void test_no_amx_int8 (void) __attribute__((__target__("no-amx-int8")));
|
||||
extern void test_no_amx_bf16 (void) __attribute__((__target__("no-amx-bf16")));
|
||||
|
||||
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
|
||||
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
|
||||
with -O -std=c89 -pedantic-errors. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk" } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, tsxldtrkintrin.h,
|
||||
avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h,
|
||||
avx512bitalgintrin.h, avx512vp2intersectintrin.h,
|
||||
amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h,
|
||||
avx512vp2intersectvlintrin.h and mm_malloc.h that reference the proper
|
||||
builtin functions.
|
||||
Defining away "extern" and "__inline" results in all of them being
|
||||
|
@ -102,7 +103,7 @@
|
|||
|
||||
|
||||
#ifndef DIFFERENT_PRAGMAS
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk")
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
|
||||
#endif
|
||||
|
||||
/* Following intrinsics require immediate arguments. They
|
||||
|
@ -219,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
|
|||
|
||||
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
|
||||
#ifdef DIFFERENT_PRAGMAS
|
||||
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk")
|
||||
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
test_1 (_cvtss_sh, unsigned short, float, 1)
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, tsxtrkintrin.h,
|
||||
avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h,
|
||||
avx512bitalgintrin.h, avx512vp2intersectintrin.h,
|
||||
amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h,
|
||||
avx512vp2intersectvlintrin.h and mm_malloc.h that reference the proper
|
||||
builtin functions.
|
||||
Defining away "extern" and "__inline" results in all of them being
|
||||
|
@ -697,6 +698,6 @@
|
|||
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||||
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
|
||||
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk")
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
|
|
@ -8956,6 +8956,39 @@ proc check_effective_target_avx512vaes { } {
|
|||
} "-mvaes" ]
|
||||
}
|
||||
|
||||
# Return 1 if amx-tile instructions can be compiled.
|
||||
proc check_effective_target_amx_tile { } {
|
||||
return [check_no_compiler_messages amx_tile object {
|
||||
void
|
||||
foo ()
|
||||
{
|
||||
__asm__ volatile ("tilerelease" ::);
|
||||
}
|
||||
} "-mamx-tile" ]
|
||||
}
|
||||
|
||||
# Return 1 if amx-int8 instructions can be compiled.
|
||||
proc check_effective_target_amx_int8 { } {
|
||||
return [check_no_compiler_messages amx_int8 object {
|
||||
void
|
||||
foo ()
|
||||
{
|
||||
__asm__ volatile ("tdpbssd\t%%tmm1, %%tmm2, %%tmm3" ::);
|
||||
}
|
||||
} "-mamx-int8" ]
|
||||
}
|
||||
|
||||
# Return 1 if amx-bf16 instructions can be compiled.
|
||||
proc check_effective_target_amx_bf16 { } {
|
||||
return [check_no_compiler_messages amx_bf16 object {
|
||||
void
|
||||
foo ()
|
||||
{
|
||||
__asm__ volatile ("tdpbf16ps\t%%tmm1, %%tmm2, %%tmm3" ::);
|
||||
}
|
||||
} "-mamx-bf16" ]
|
||||
}
|
||||
|
||||
# Return 1 if vpclmulqdq instructions can be compiled.
|
||||
proc check_effective_target_vpclmulqdq { } {
|
||||
return [check_no_compiler_messages vpclmulqdq object {
|
||||
|
|
Loading…
Reference in New Issue