AVX512FP16: Add vreduceph/vreducesh/vrndscaleph/vrndscalesh.

gcc/ChangeLog:

	* config/i386/avx512fp16intrin.h (_mm512_reduce_ph):
	New intrinsic.
	(_mm512_mask_reduce_ph): Likewise.
	(_mm512_maskz_reduce_ph): Likewise.
	(_mm512_reduce_round_ph): Likewise.
	(_mm512_mask_reduce_round_ph): Likewise.
	(_mm512_maskz_reduce_round_ph): Likewise.
	(_mm_reduce_sh): Likewise.
	(_mm_mask_reduce_sh): Likewise.
	(_mm_maskz_reduce_sh): Likewise.
	(_mm_reduce_round_sh): Likewise.
	(_mm_mask_reduce_round_sh): Likewise.
	(_mm_maskz_reduce_round_sh): Likewise.
	(_mm512_roundscale_ph): Likewise.
	(_mm512_mask_roundscale_ph): Likewise.
	(_mm512_maskz_roundscale_ph): Likewise.
	(_mm512_roundscale_round_ph): Likewise.
	(_mm512_mask_roundscale_round_ph): Likewise.
	(_mm512_maskz_roundscale_round_ph): Likewise.
	(_mm_roundscale_sh): Likewise.
	(_mm_mask_roundscale_sh): Likewise.
	(_mm_maskz_roundscale_sh): Likewise.
	(_mm_roundscale_round_sh): Likewise.
	(_mm_mask_roundscale_round_sh): Likewise.
	(_mm_maskz_roundscale_round_sh): Likewise.
	* config/i386/avx512fp16vlintrin.h: (_mm_reduce_ph):
	New intrinsic.
	(_mm_mask_reduce_ph): Likewise.
	(_mm_maskz_reduce_ph): Likewise.
	(_mm256_reduce_ph): Likewise.
	(_mm256_mask_reduce_ph): Likewise.
	(_mm256_maskz_reduce_ph): Likewise.
	(_mm_roundscale_ph): Likewise.
	(_mm_mask_roundscale_ph): Likewise.
	(_mm_maskz_roundscale_ph): Likewise.
	(_mm256_roundscale_ph): Likewise.
	(_mm256_mask_roundscale_ph): Likewise.
	(_mm256_maskz_roundscale_ph): Likewise.
	* config/i386/i386-builtin-types.def: Add corresponding builtin types.
	* config/i386/i386-builtin.def: Add corresponding new builtins.
	* config/i386/i386-expand.c
	(ix86_expand_args_builtin): Handle new builtin types.
	(ix86_expand_round_builtin): Ditto.
	* config/i386/sse.md (<mask_codefor>reducep<mode><mask_name>):
	Renamed to ...
	(<mask_codefor>reducep<mode><mask_name><round_saeonly_name>):
	... this, and adjust for round operands.
	(reduces<mode><mask_scalar_name>): Likewise, with ...
	(reduces<mode><mask_scalar_name><round_saeonly_scalar_name):
	... this.
	(<avx512>_rndscale<mode><mask_name><round_saeonly_name>):
	Adjust for HF vector modes.
	(avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>):
	Ditto.
	(*avx512f_rndscale<mode><round_saeonly_name>): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx-1.c: Add test for new builtins.
	* gcc.target/i386/sse-13.c: Ditto.
	* gcc.target/i386/sse-23.c: Ditto.
	* gcc.target/i386/sse-14.c: Add test for new intrinsics.
	* gcc.target/i386/sse-22.c: Ditto.
This commit is contained in:
liuhongt 2019-02-26 13:59:00 -08:00
parent 03f0cbccb6
commit 8bed761796
11 changed files with 646 additions and 22 deletions

View File

@ -1623,6 +1623,365 @@ _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
#endif /* __OPTIMIZE__ */
/* Intrinsics vreduceph. */
#ifdef __OPTIMIZE__
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_ph (__m512h __A, int __B)
{
return __builtin_ia32_vreduceph_v32hf_mask_round (__A, __B,
_mm512_setzero_ph (),
(__mmask32) -1,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
{
return __builtin_ia32_vreduceph_v32hf_mask_round (__C, __D, __A, __B,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
{
return __builtin_ia32_vreduceph_v32hf_mask_round (__B, __C,
_mm512_setzero_ph (),
__A,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
{
return __builtin_ia32_vreduceph_v32hf_mask_round (__A, __B,
_mm512_setzero_ph (),
(__mmask32) -1, __C);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
int __D, const int __E)
{
return __builtin_ia32_vreduceph_v32hf_mask_round (__C, __D, __A, __B,
__E);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
const int __D)
{
return __builtin_ia32_vreduceph_v32hf_mask_round (__B, __C,
_mm512_setzero_ph (),
__A, __D);
}
#else
#define _mm512_reduce_ph(A, B) \
(__builtin_ia32_vreduceph_v32hf_mask_round ((A), (B), \
_mm512_setzero_ph (), \
(__mmask32)-1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_reduce_ph(A, B, C, D) \
(__builtin_ia32_vreduceph_v32hf_mask_round ((C), (D), (A), (B), \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_reduce_ph(A, B, C) \
(__builtin_ia32_vreduceph_v32hf_mask_round ((B), (C), \
_mm512_setzero_ph (), \
(A), _MM_FROUND_CUR_DIRECTION))
#define _mm512_reduce_round_ph(A, B, C) \
(__builtin_ia32_vreduceph_v32hf_mask_round ((A), (B), \
_mm512_setzero_ph (), \
(__mmask32)-1, (C)))
#define _mm512_mask_reduce_round_ph(A, B, C, D, E) \
(__builtin_ia32_vreduceph_v32hf_mask_round ((C), (D), (A), (B), (E)))
#define _mm512_maskz_reduce_round_ph(A, B, C, D) \
(__builtin_ia32_vreduceph_v32hf_mask_round ((B), (C), \
_mm512_setzero_ph (), \
(A), (D)))
#endif /* __OPTIMIZE__ */
/* Intrinsics vreducesh. */
#ifdef __OPTIMIZE__
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_reduce_sh (__m128h __A, __m128h __B, int __C)
{
return __builtin_ia32_vreducesh_v8hf_mask_round (__A, __B, __C,
_mm_setzero_ph (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
__m128h __D, int __E)
{
return __builtin_ia32_vreducesh_v8hf_mask_round (__C, __D, __E, __A, __B,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
{
return __builtin_ia32_vreducesh_v8hf_mask_round (__B, __C, __D,
_mm_setzero_ph (), __A,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
{
return __builtin_ia32_vreducesh_v8hf_mask_round (__A, __B, __C,
_mm_setzero_ph (),
(__mmask8) -1, __D);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
__m128h __D, int __E, const int __F)
{
return __builtin_ia32_vreducesh_v8hf_mask_round (__C, __D, __E, __A,
__B, __F);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
int __D, const int __E)
{
return __builtin_ia32_vreducesh_v8hf_mask_round (__B, __C, __D,
_mm_setzero_ph (),
__A, __E);
}
#else
#define _mm_reduce_sh(A, B, C) \
(__builtin_ia32_vreducesh_v8hf_mask_round ((A), (B), (C), \
_mm_setzero_ph (), \
(__mmask8)-1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm_mask_reduce_sh(A, B, C, D, E) \
(__builtin_ia32_vreducesh_v8hf_mask_round ((C), (D), (E), (A), (B), \
_MM_FROUND_CUR_DIRECTION))
#define _mm_maskz_reduce_sh(A, B, C, D) \
(__builtin_ia32_vreducesh_v8hf_mask_round ((B), (C), (D), \
_mm_setzero_ph (), \
(A), _MM_FROUND_CUR_DIRECTION))
#define _mm_reduce_round_sh(A, B, C, D) \
(__builtin_ia32_vreducesh_v8hf_mask_round ((A), (B), (C), \
_mm_setzero_ph (), \
(__mmask8)-1, (D)))
#define _mm_mask_reduce_round_sh(A, B, C, D, E, F) \
(__builtin_ia32_vreducesh_v8hf_mask_round ((C), (D), (E), (A), (B), (F)))
#define _mm_maskz_reduce_round_sh(A, B, C, D, E) \
(__builtin_ia32_vreducesh_v8hf_mask_round ((B), (C), (D), \
_mm_setzero_ph (), \
(A), (E)))
#endif /* __OPTIMIZE__ */
/* Intrinsics vrndscaleph. */
#ifdef __OPTIMIZE__
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_roundscale_ph (__m512h __A, int __B)
{
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__A, __B,
_mm512_setzero_ph (),
(__mmask32) -1,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
__m512h __C, int __D)
{
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__C, __D, __A, __B,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
{
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__B, __C,
_mm512_setzero_ph (),
__A,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
{
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__A, __B,
_mm512_setzero_ph (),
(__mmask32) -1,
__C);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
__m512h __C, int __D, const int __E)
{
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__C, __D, __A,
__B, __E);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
const int __D)
{
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__B, __C,
_mm512_setzero_ph (),
__A, __D);
}
#else
#define _mm512_roundscale_ph(A, B) \
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((A), (B), \
_mm512_setzero_ph (), \
(__mmask32)-1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_roundscale_ph(A, B, C, D) \
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((C), (D), (A), (B), \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_roundscale_ph(A, B, C) \
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((B), (C), \
_mm512_setzero_ph (), \
(A), \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_roundscale_round_ph(A, B, C) \
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((A), (B), \
_mm512_setzero_ph (), \
(__mmask32)-1, (C)))
#define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((C), (D), (A), (B), (E)))
#define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((B), (C), \
_mm512_setzero_ph (), \
(A), (D)))
#endif /* __OPTIMIZE__ */
/* Intrinsics vrndscalesh. */
#ifdef __OPTIMIZE__
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
{
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__A, __B, __C,
_mm_setzero_ph (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
__m128h __D, int __E)
{
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__C, __D, __E, __A, __B,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
{
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__B, __C, __D,
_mm_setzero_ph (), __A,
_MM_FROUND_CUR_DIRECTION);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
{
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__A, __B, __C,
_mm_setzero_ph (),
(__mmask8) -1,
__D);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
__m128h __D, int __E, const int __F)
{
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__C, __D, __E,
__A, __B, __F);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
int __D, const int __E)
{
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__B, __C, __D,
_mm_setzero_ph (),
__A, __E);
}
#else
#define _mm_roundscale_sh(A, B, C) \
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((A), (B), (C), \
_mm_setzero_ph (), \
(__mmask8)-1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm_mask_roundscale_sh(A, B, C, D, E) \
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((C), (D), (E), (A), (B), \
_MM_FROUND_CUR_DIRECTION))
#define _mm_maskz_roundscale_sh(A, B, C, D) \
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((B), (C), (D), \
_mm_setzero_ph (), \
(A), _MM_FROUND_CUR_DIRECTION))
#define _mm_roundscale_round_sh(A, B, C, D) \
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((A), (B), (C), \
_mm_setzero_ph (), \
(__mmask8)-1, (D)))
#define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) \
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((C), (D), (E), (A), (B), (F)))
#define _mm_maskz_roundscale_round_sh(A, B, C, D, E) \
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((B), (C), (D), \
_mm_setzero_ph (), \
(A), (E)))
#endif /* __OPTIMIZE__ */
#ifdef __DISABLE_AVX512FP16__
#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options

View File

@ -548,6 +548,159 @@ _mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
__A);
}
/* Intrinsics vreduceph. */
#ifdef __OPTIMIZE__
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_reduce_ph (__m128h __A, int __B)
{
return __builtin_ia32_vreduceph_v8hf_mask (__A, __B,
_mm_setzero_ph (),
(__mmask8) -1);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
{
return __builtin_ia32_vreduceph_v8hf_mask (__C, __D, __A, __B);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C)
{
return __builtin_ia32_vreduceph_v8hf_mask (__B, __C,
_mm_setzero_ph (), __A);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_reduce_ph (__m256h __A, int __B)
{
return __builtin_ia32_vreduceph_v16hf_mask (__A, __B,
_mm256_setzero_ph (),
(__mmask16) -1);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D)
{
return __builtin_ia32_vreduceph_v16hf_mask (__C, __D, __A, __B);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C)
{
return __builtin_ia32_vreduceph_v16hf_mask (__B, __C,
_mm256_setzero_ph (),
__A);
}
#else
#define _mm_reduce_ph(A, B) \
(__builtin_ia32_vreduceph_v8hf_mask ((A), (B),\
_mm_setzero_ph (), \
((__mmask8)-1)))
#define _mm_mask_reduce_ph(A, B, C, D) \
(__builtin_ia32_vreduceph_v8hf_mask ((C), (D), (A), (B)))
#define _mm_maskz_reduce_ph(A, B, C) \
(__builtin_ia32_vreduceph_v8hf_mask ((B), (C), _mm_setzero_ph (), (A)))
#define _mm256_reduce_ph(A, B) \
(__builtin_ia32_vreduceph_v16hf_mask ((A), (B),\
_mm256_setzero_ph (), \
((__mmask16)-1)))
#define _mm256_mask_reduce_ph(A, B, C, D) \
(__builtin_ia32_vreduceph_v16hf_mask ((C), (D), (A), (B)))
#define _mm256_maskz_reduce_ph(A, B, C) \
(__builtin_ia32_vreduceph_v16hf_mask ((B), (C), _mm256_setzero_ph (), (A)))
#endif /* __OPTIMIZE__ */
/* Intrinsics vrndscaleph. */
#ifdef __OPTIMIZE__
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_roundscale_ph (__m128h __A, int __B)
{
return __builtin_ia32_vrndscaleph_v8hf_mask (__A, __B,
_mm_setzero_ph (),
(__mmask8) -1);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
{
return __builtin_ia32_vrndscaleph_v8hf_mask (__C, __D, __A, __B);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C)
{
return __builtin_ia32_vrndscaleph_v8hf_mask (__B, __C,
_mm_setzero_ph (), __A);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_roundscale_ph (__m256h __A, int __B)
{
return __builtin_ia32_vrndscaleph_v16hf_mask (__A, __B,
_mm256_setzero_ph (),
(__mmask16) -1);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C,
int __D)
{
return __builtin_ia32_vrndscaleph_v16hf_mask (__C, __D, __A, __B);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
{
return __builtin_ia32_vrndscaleph_v16hf_mask (__B, __C,
_mm256_setzero_ph (),
__A);
}
#else
#define _mm_roundscale_ph(A, B) \
(__builtin_ia32_vrndscaleph_v8hf_mask ((A), (B), _mm_setzero_ph (), \
((__mmask8)-1)))
#define _mm_mask_roundscale_ph(A, B, C, D) \
(__builtin_ia32_vrndscaleph_v8hf_mask ((C), (D), (A), (B)))
#define _mm_maskz_roundscale_ph(A, B, C) \
(__builtin_ia32_vrndscaleph_v8hf_mask ((B), (C), _mm_setzero_ph (), (A)))
#define _mm256_roundscale_ph(A, B) \
(__builtin_ia32_vrndscaleph_v16hf_mask ((A), (B), \
_mm256_setzero_ph(), \
((__mmask16)-1)))
#define _mm256_mask_roundscale_ph(A, B, C, D) \
(__builtin_ia32_vrndscaleph_v16hf_mask ((C), (D), (A), (B)))
#define _mm256_maskz_roundscale_ph(A, B, C) \
(__builtin_ia32_vrndscaleph_v16hf_mask ((B), (C), \
_mm256_setzero_ph (), (A)))
#endif /* __OPTIMIZE__ */
#ifdef __DISABLE_AVX512FP16VL__
#undef __DISABLE_AVX512FP16VL__
#pragma GCC pop_options

View File

@ -1307,12 +1307,15 @@ DEF_FUNCTION_TYPE (V8HF, V8HI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, UQI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT)
DEF_FUNCTION_TYPE (V8HF, V8HF, INT, V8HF, UQI)
DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI)
DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI, INT)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI, INT)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI, INT)
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF)
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, UHI)
DEF_FUNCTION_TYPE (V16HF, V16HF, INT, V16HF, UHI)
DEF_FUNCTION_TYPE (UHI, V16HF, V16HF, INT, UHI)
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI)
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI)
@ -1322,3 +1325,4 @@ DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI, INT)
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI)
DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI, INT)
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT)
DEF_FUNCTION_TYPE (V32HF, V32HF, INT, V32HF, USI, INT)

View File

@ -2814,6 +2814,10 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__bu
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_vrcpsh_v8hf_mask", IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_vscalefph_v8hf_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_vscalefph_v16hf_mask", IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv8hf_mask, "__builtin_ia32_vreduceph_v8hf_mask", IX86_BUILTIN_VREDUCEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv16hf_mask, "__builtin_ia32_vreduceph_v16hf_mask", IX86_BUILTIN_VREDUCEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf_mask, "__builtin_ia32_vrndscaleph_v8hf_mask", IX86_BUILTIN_VRNDSCALEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_vrndscaleph_v16hf_mask", IX86_BUILTIN_VRNDSCALEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
@ -3033,6 +3037,10 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_vsqrtsh_v8hf_mask_round", IX86_BUILTIN_VSQRTSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_vscalefph_v32hf_mask_round", IX86_BUILTIN_VSCALEFPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_vscalefsh_v8hf_mask_round", IX86_BUILTIN_VSCALEFSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_vreduceph_v32hf_mask_round", IX86_BUILTIN_VREDUCEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_vreducesh_v8hf_mask_round", IX86_BUILTIN_VREDUCESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_vrndscaleph_v32hf_mask_round", IX86_BUILTIN_VRNDSCALEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_vrndscalesh_v8hf_mask_round", IX86_BUILTIN_VRNDSCALESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
BDESC_END (ROUND_ARGS, MULTI_ARG)

View File

@ -10061,6 +10061,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
case V16HF_FTYPE_V16HF_INT_V16HF_UHI:
case V8HF_FTYPE_V8HF_INT_V8HF_UQI:
case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
@ -10709,6 +10711,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT:
nargs = 5;
break;
case V32HF_FTYPE_V32HF_INT_V32HF_USI_INT:
case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
case V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT:
@ -10731,6 +10734,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT:
nargs = 6;
nargs_constant = 4;
break;

View File

@ -3257,28 +3257,28 @@
})
(define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
(unspec:VF_AVX512VL
[(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
[(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
(unspec:VFH_AVX512VL
[(match_operand:VFH_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_REDUCE))]
"TARGET_AVX512DQ"
"TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))"
"vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
(define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "v")
(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
[(set (match_operand:VFH_128 0 "register_operand" "=v")
(vec_merge:VFH_128
(unspec:VFH_128
[(match_operand:VFH_128 1 "register_operand" "v")
(match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_REDUCE)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512DQ"
"TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))"
"vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
@ -10794,9 +10794,9 @@
(set_attr "mode" "<ssescalarmode>")])
(define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
(unspec:VF_AVX512VL
[(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
[(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
(unspec:VFH_AVX512VL
[(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_ROUND))]
"TARGET_AVX512F"
@ -10806,13 +10806,13 @@
(set_attr "mode" "<MODE>")])
(define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
[(set (match_operand:VFH_128 0 "register_operand" "=v")
(vec_merge:VFH_128
(unspec:VFH_128
[(match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_ROUND)
(match_operand:VF_128 1 "register_operand" "v")
(match_operand:VFH_128 1 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512F"
"vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
@ -10821,14 +10821,14 @@
(set_attr "mode" "<MODE>")])
(define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(vec_duplicate:VF_128
[(set (match_operand:VFH_128 0 "register_operand" "=v")
(vec_merge:VFH_128
(vec_duplicate:VFH_128
(unspec:<ssescalarmode>
[(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_ROUND))
(match_operand:VF_128 1 "register_operand" "v")
(match_operand:VFH_128 1 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512F"
"vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"

View File

@ -705,6 +705,14 @@
#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)

View File

@ -722,6 +722,14 @@
#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)

View File

@ -671,6 +671,14 @@ test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 8)
/* avx512fp16intrin.h */
test_1 (_mm512_sqrt_round_ph, __m512h, __m512h, 8)
test_1 (_mm_reduce_ph, __m128h, __m128h, 123)
test_1 (_mm256_reduce_ph, __m256h, __m256h, 123)
test_1 (_mm512_reduce_ph, __m512h, __m512h, 123)
test_1 (_mm_roundscale_ph, __m128h, __m128h, 123)
test_1 (_mm256_roundscale_ph, __m256h, __m256h, 123)
test_1 (_mm512_roundscale_ph, __m512h, __m512h, 123)
test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8)
@ -689,9 +697,21 @@ test_2 (_mm512_maskz_sqrt_round_ph, __m512h, __mmask32, __m512h, 8)
test_2 (_mm_sqrt_round_sh, __m128h, __m128h, __m128h, 8)
test_2 (_mm512_scalef_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm_scalef_round_sh, __m128h, __m128h, __m128h, 8)
test_2 (_mm_maskz_reduce_ph, __m128h, __mmask8, __m128h, 123)
test_2 (_mm256_maskz_reduce_ph, __m256h, __mmask16, __m256h, 123)
test_2 (_mm512_maskz_reduce_ph, __m512h, __mmask32, __m512h, 123)
test_2 (_mm_reduce_sh, __m128h, __m128h, __m128h, 123)
test_2 (_mm_maskz_roundscale_ph, __m128h, __mmask8, __m128h, 123)
test_2 (_mm256_maskz_roundscale_ph, __m256h, __mmask16, __m256h, 123)
test_2 (_mm512_maskz_roundscale_ph, __m512h, __mmask32, __m512h, 123)
test_2 (_mm_roundscale_sh, __m128h, __m128h, __m128h, 123)
test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
test_2x (_mm512_maskz_reduce_round_ph, __m512h, __mmask32, __m512h, 123, 8)
test_2x (_mm512_maskz_roundscale_round_ph, __m512h, __mmask32, __m512h, 123, 8)
test_2x (_mm_reduce_round_sh, __m128h, __m128h, __m128h, 123, 8)
test_2x (_mm_roundscale_round_sh, __m128h, __m128h, __m128h, 123, 8)
test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
@ -709,8 +729,20 @@ test_3 (_mm512_mask_sqrt_round_ph, __m512h, __m512h, __mmask32, __m512h, 8)
test_3 (_mm_maskz_sqrt_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
test_3 (_mm512_maskz_scalef_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm_maskz_scalef_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
test_3 (_mm_mask_reduce_ph, __m128h, __m128h, __mmask8, __m128h, 123)
test_3 (_mm256_mask_reduce_ph, __m256h, __m256h, __mmask16, __m256h, 123)
test_3 (_mm512_mask_reduce_ph, __m512h, __m512h, __mmask32, __m512h, 123)
test_3 (_mm_maskz_reduce_sh, __m128h, __mmask8, __m128h, __m128h, 123)
test_3 (_mm_mask_roundscale_ph, __m128h, __m128h, __mmask8, __m128h, 123)
test_3 (_mm256_mask_roundscale_ph, __m256h, __m256h, __mmask16, __m256h, 123)
test_3 (_mm512_mask_roundscale_ph, __m512h, __m512h, __mmask32, __m512h, 123)
test_3 (_mm_maskz_roundscale_sh, __m128h, __mmask8, __m128h, __m128h, 123)
test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
test_3x (_mm512_mask_roundscale_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
test_3x (_mm_maskz_reduce_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8)
test_3x (_mm_maskz_roundscale_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8)
test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
@ -726,6 +758,10 @@ test_4 (_mm_mask_min_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
test_4 (_mm_mask_sqrt_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm_mask_scalef_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
test_4 (_mm_mask_reduce_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
test_4 (_mm_mask_roundscale_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
/* avx512fp16vlintrin.h */
test_2 (_mm_cmp_ph_mask, __mmask8, __m128h, __m128h, 1)

View File

@ -776,6 +776,14 @@ test_2 (_mm_rsqrt28_round_ss, __m128, __m128, __m128, 8)
/* avx512fp16intrin.h */
test_1 (_mm512_sqrt_round_ph, __m512h, __m512h, 8)
test_1 (_mm_reduce_ph, __m128h, __m128h, 123)
test_1 (_mm256_reduce_ph, __m256h, __m256h, 123)
test_1 (_mm512_reduce_ph, __m512h, __m512h, 123)
test_1 (_mm_roundscale_ph, __m128h, __m128h, 123)
test_1 (_mm256_roundscale_ph, __m256h, __m256h, 123)
test_1 (_mm512_roundscale_ph, __m512h, __m512h, 123)
test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8)
@ -793,9 +801,21 @@ test_2 (_mm_comi_sh, int, __m128h, __m128h, 1)
test_2 (_mm512_maskz_sqrt_round_ph, __m512h, __mmask32, __m512h, 8)
test_2 (_mm_sqrt_round_sh, __m128h, __m128h, __m128h, 8)
test_2 (_mm512_scalef_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm_maskz_reduce_ph, __m128h, __mmask8, __m128h, 123)
test_2 (_mm256_maskz_reduce_ph, __m256h, __mmask16, __m256h, 123)
test_2 (_mm512_maskz_reduce_ph, __m512h, __mmask32, __m512h, 123)
test_2 (_mm_reduce_sh, __m128h, __m128h, __m128h, 123)
test_2 (_mm_maskz_roundscale_ph, __m128h, __mmask8, __m128h, 123)
test_2 (_mm256_maskz_roundscale_ph, __m256h, __mmask16, __m256h, 123)
test_2 (_mm512_maskz_roundscale_ph, __m512h, __mmask32, __m512h, 123)
test_2 (_mm_roundscale_sh, __m128h, __m128h, __m128h, 123)
test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
test_2x (_mm512_maskz_reduce_round_ph, __m512h, __mmask32, __m512h, 123, 8)
test_2x (_mm512_maskz_roundscale_round_ph, __m512h, __mmask32, __m512h, 123, 8)
test_2x (_mm_reduce_round_sh, __m128h, __m128h, __m128h, 123, 8)
test_2x (_mm_roundscale_round_sh, __m128h, __m128h, __m128h, 123, 8)
test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
@ -812,8 +832,20 @@ test_3 (_mm512_mask_cmp_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1)
test_3 (_mm512_mask_sqrt_round_ph, __m512h, __m512h, __mmask32, __m512h, 8)
test_3 (_mm_maskz_sqrt_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
test_3 (_mm512_maskz_scalef_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm_mask_reduce_ph, __m128h, __m128h, __mmask8, __m128h, 123)
test_3 (_mm256_mask_reduce_ph, __m256h, __m256h, __mmask16, __m256h, 123)
test_3 (_mm512_mask_reduce_ph, __m512h, __m512h, __mmask32, __m512h, 123)
test_3 (_mm_maskz_reduce_sh, __m128h, __mmask8, __m128h, __m128h, 123)
test_3 (_mm_mask_roundscale_ph, __m128h, __m128h, __mmask8, __m128h, 123)
test_3 (_mm256_mask_roundscale_ph, __m256h, __m256h, __mmask16, __m256h, 123)
test_3 (_mm512_mask_roundscale_ph, __m512h, __m512h, __mmask32, __m512h, 123)
test_3 (_mm_maskz_roundscale_sh, __m128h, __mmask8, __m128h, __m128h, 123)
test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
test_3x (_mm512_mask_roundscale_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
test_3x (_mm_maskz_reduce_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8)
test_3x (_mm_maskz_roundscale_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8)
test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
@ -828,6 +860,10 @@ test_4 (_mm_mask_max_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
test_4 (_mm_mask_min_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
test_4 (_mm_mask_sqrt_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm_mask_reduce_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
test_4 (_mm_mask_roundscale_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
/* avx512fp16vlintrin.h */
test_2 (_mm_cmp_ph_mask, __mmask8, __m128h, __m128h, 1)

View File

@ -723,6 +723,14 @@
#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)