AVX512FP16: Add vreduceph/vreducesh/vrndscaleph/vrndscalesh.
gcc/ChangeLog: * config/i386/avx512fp16intrin.h (_mm512_reduce_ph): New intrinsic. (_mm512_mask_reduce_ph): Likewise. (_mm512_maskz_reduce_ph): Likewise. (_mm512_reduce_round_ph): Likewise. (_mm512_mask_reduce_round_ph): Likewise. (_mm512_maskz_reduce_round_ph): Likewise. (_mm_reduce_sh): Likewise. (_mm_mask_reduce_sh): Likewise. (_mm_maskz_reduce_sh): Likewise. (_mm_reduce_round_sh): Likewise. (_mm_mask_reduce_round_sh): Likewise. (_mm_maskz_reduce_round_sh): Likewise. (_mm512_roundscale_ph): Likewise. (_mm512_mask_roundscale_ph): Likewise. (_mm512_maskz_roundscale_ph): Likewise. (_mm512_roundscale_round_ph): Likewise. (_mm512_mask_roundscale_round_ph): Likewise. (_mm512_maskz_roundscale_round_ph): Likewise. (_mm_roundscale_sh): Likewise. (_mm_mask_roundscale_sh): Likewise. (_mm_maskz_roundscale_sh): Likewise. (_mm_roundscale_round_sh): Likewise. (_mm_mask_roundscale_round_sh): Likewise. (_mm_maskz_roundscale_round_sh): Likewise. * config/i386/avx512fp16vlintrin.h: (_mm_reduce_ph): New intrinsic. (_mm_mask_reduce_ph): Likewise. (_mm_maskz_reduce_ph): Likewise. (_mm256_reduce_ph): Likewise. (_mm256_mask_reduce_ph): Likewise. (_mm256_maskz_reduce_ph): Likewise. (_mm_roundscale_ph): Likewise. (_mm_mask_roundscale_ph): Likewise. (_mm_maskz_roundscale_ph): Likewise. (_mm256_roundscale_ph): Likewise. (_mm256_mask_roundscale_ph): Likewise. (_mm256_maskz_roundscale_ph): Likewise. * config/i386/i386-builtin-types.def: Add corresponding builtin types. * config/i386/i386-builtin.def: Add corresponding new builtins. * config/i386/i386-expand.c (ix86_expand_args_builtin): Handle new builtin types. (ix86_expand_round_builtin): Ditto. * config/i386/sse.md (<mask_codefor>reducep<mode><mask_name>): Renamed to ... (<mask_codefor>reducep<mode><mask_name><round_saeonly_name>): ... this, and adjust for round operands. (reduces<mode><mask_scalar_name>): Likewise, with ... (reduces<mode><mask_scalar_name><round_saeonly_scalar_name): ... this. (<avx512>_rndscale<mode><mask_name><round_saeonly_name>): Adjust for HF vector modes. (avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>): Ditto. (*avx512f_rndscale<mode><round_saeonly_name>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Add test for new builtins. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/sse-14.c: Add test for new intrinsics. * gcc.target/i386/sse-22.c: Ditto.
This commit is contained in:
parent
03f0cbccb6
commit
8bed761796
@ -1623,6 +1623,365 @@ _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
|
||||
|
||||
#endif /* __OPTIMIZE__ */
|
||||
|
||||
/* Intrinsics vreduceph. */
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_reduce_ph (__m512h __A, int __B)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v32hf_mask_round (__A, __B,
|
||||
_mm512_setzero_ph (),
|
||||
(__mmask32) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v32hf_mask_round (__C, __D, __A, __B,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v32hf_mask_round (__B, __C,
|
||||
_mm512_setzero_ph (),
|
||||
__A,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v32hf_mask_round (__A, __B,
|
||||
_mm512_setzero_ph (),
|
||||
(__mmask32) -1, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
|
||||
int __D, const int __E)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v32hf_mask_round (__C, __D, __A, __B,
|
||||
__E);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
|
||||
const int __D)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v32hf_mask_round (__B, __C,
|
||||
_mm512_setzero_ph (),
|
||||
__A, __D);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_reduce_ph(A, B) \
|
||||
(__builtin_ia32_vreduceph_v32hf_mask_round ((A), (B), \
|
||||
_mm512_setzero_ph (), \
|
||||
(__mmask32)-1, \
|
||||
_MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm512_mask_reduce_ph(A, B, C, D) \
|
||||
(__builtin_ia32_vreduceph_v32hf_mask_round ((C), (D), (A), (B), \
|
||||
_MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm512_maskz_reduce_ph(A, B, C) \
|
||||
(__builtin_ia32_vreduceph_v32hf_mask_round ((B), (C), \
|
||||
_mm512_setzero_ph (), \
|
||||
(A), _MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm512_reduce_round_ph(A, B, C) \
|
||||
(__builtin_ia32_vreduceph_v32hf_mask_round ((A), (B), \
|
||||
_mm512_setzero_ph (), \
|
||||
(__mmask32)-1, (C)))
|
||||
|
||||
#define _mm512_mask_reduce_round_ph(A, B, C, D, E) \
|
||||
(__builtin_ia32_vreduceph_v32hf_mask_round ((C), (D), (A), (B), (E)))
|
||||
|
||||
#define _mm512_maskz_reduce_round_ph(A, B, C, D) \
|
||||
(__builtin_ia32_vreduceph_v32hf_mask_round ((B), (C), \
|
||||
_mm512_setzero_ph (), \
|
||||
(A), (D)))
|
||||
|
||||
#endif /* __OPTIMIZE__ */
|
||||
|
||||
/* Intrinsics vreducesh. */
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_reduce_sh (__m128h __A, __m128h __B, int __C)
|
||||
{
|
||||
return __builtin_ia32_vreducesh_v8hf_mask_round (__A, __B, __C,
|
||||
_mm_setzero_ph (),
|
||||
(__mmask8) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
|
||||
__m128h __D, int __E)
|
||||
{
|
||||
return __builtin_ia32_vreducesh_v8hf_mask_round (__C, __D, __E, __A, __B,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
|
||||
{
|
||||
return __builtin_ia32_vreducesh_v8hf_mask_round (__B, __C, __D,
|
||||
_mm_setzero_ph (), __A,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
|
||||
{
|
||||
return __builtin_ia32_vreducesh_v8hf_mask_round (__A, __B, __C,
|
||||
_mm_setzero_ph (),
|
||||
(__mmask8) -1, __D);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
|
||||
__m128h __D, int __E, const int __F)
|
||||
{
|
||||
return __builtin_ia32_vreducesh_v8hf_mask_round (__C, __D, __E, __A,
|
||||
__B, __F);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
|
||||
int __D, const int __E)
|
||||
{
|
||||
return __builtin_ia32_vreducesh_v8hf_mask_round (__B, __C, __D,
|
||||
_mm_setzero_ph (),
|
||||
__A, __E);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm_reduce_sh(A, B, C) \
|
||||
(__builtin_ia32_vreducesh_v8hf_mask_round ((A), (B), (C), \
|
||||
_mm_setzero_ph (), \
|
||||
(__mmask8)-1, \
|
||||
_MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm_mask_reduce_sh(A, B, C, D, E) \
|
||||
(__builtin_ia32_vreducesh_v8hf_mask_round ((C), (D), (E), (A), (B), \
|
||||
_MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm_maskz_reduce_sh(A, B, C, D) \
|
||||
(__builtin_ia32_vreducesh_v8hf_mask_round ((B), (C), (D), \
|
||||
_mm_setzero_ph (), \
|
||||
(A), _MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm_reduce_round_sh(A, B, C, D) \
|
||||
(__builtin_ia32_vreducesh_v8hf_mask_round ((A), (B), (C), \
|
||||
_mm_setzero_ph (), \
|
||||
(__mmask8)-1, (D)))
|
||||
|
||||
#define _mm_mask_reduce_round_sh(A, B, C, D, E, F) \
|
||||
(__builtin_ia32_vreducesh_v8hf_mask_round ((C), (D), (E), (A), (B), (F)))
|
||||
|
||||
#define _mm_maskz_reduce_round_sh(A, B, C, D, E) \
|
||||
(__builtin_ia32_vreducesh_v8hf_mask_round ((B), (C), (D), \
|
||||
_mm_setzero_ph (), \
|
||||
(A), (E)))
|
||||
|
||||
#endif /* __OPTIMIZE__ */
|
||||
|
||||
/* Intrinsics vrndscaleph. */
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_roundscale_ph (__m512h __A, int __B)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__A, __B,
|
||||
_mm512_setzero_ph (),
|
||||
(__mmask32) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
|
||||
__m512h __C, int __D)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__C, __D, __A, __B,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__B, __C,
|
||||
_mm512_setzero_ph (),
|
||||
__A,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__A, __B,
|
||||
_mm512_setzero_ph (),
|
||||
(__mmask32) -1,
|
||||
__C);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
|
||||
__m512h __C, int __D, const int __E)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__C, __D, __A,
|
||||
__B, __E);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
|
||||
const int __D)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v32hf_mask_round (__B, __C,
|
||||
_mm512_setzero_ph (),
|
||||
__A, __D);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_roundscale_ph(A, B) \
|
||||
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((A), (B), \
|
||||
_mm512_setzero_ph (), \
|
||||
(__mmask32)-1, \
|
||||
_MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm512_mask_roundscale_ph(A, B, C, D) \
|
||||
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((C), (D), (A), (B), \
|
||||
_MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm512_maskz_roundscale_ph(A, B, C) \
|
||||
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((B), (C), \
|
||||
_mm512_setzero_ph (), \
|
||||
(A), \
|
||||
_MM_FROUND_CUR_DIRECTION))
|
||||
#define _mm512_roundscale_round_ph(A, B, C) \
|
||||
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((A), (B), \
|
||||
_mm512_setzero_ph (), \
|
||||
(__mmask32)-1, (C)))
|
||||
|
||||
#define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \
|
||||
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((C), (D), (A), (B), (E)))
|
||||
|
||||
#define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
|
||||
(__builtin_ia32_vrndscaleph_v32hf_mask_round ((B), (C), \
|
||||
_mm512_setzero_ph (), \
|
||||
(A), (D)))
|
||||
|
||||
#endif /* __OPTIMIZE__ */
|
||||
|
||||
/* Intrinsics vrndscalesh. */
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
|
||||
{
|
||||
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__A, __B, __C,
|
||||
_mm_setzero_ph (),
|
||||
(__mmask8) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
|
||||
__m128h __D, int __E)
|
||||
{
|
||||
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__C, __D, __E, __A, __B,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
|
||||
{
|
||||
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__B, __C, __D,
|
||||
_mm_setzero_ph (), __A,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
|
||||
{
|
||||
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__A, __B, __C,
|
||||
_mm_setzero_ph (),
|
||||
(__mmask8) -1,
|
||||
__D);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
|
||||
__m128h __D, int __E, const int __F)
|
||||
{
|
||||
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__C, __D, __E,
|
||||
__A, __B, __F);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
|
||||
int __D, const int __E)
|
||||
{
|
||||
return __builtin_ia32_vrndscalesh_v8hf_mask_round (__B, __C, __D,
|
||||
_mm_setzero_ph (),
|
||||
__A, __E);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm_roundscale_sh(A, B, C) \
|
||||
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((A), (B), (C), \
|
||||
_mm_setzero_ph (), \
|
||||
(__mmask8)-1, \
|
||||
_MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm_mask_roundscale_sh(A, B, C, D, E) \
|
||||
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((C), (D), (E), (A), (B), \
|
||||
_MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm_maskz_roundscale_sh(A, B, C, D) \
|
||||
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((B), (C), (D), \
|
||||
_mm_setzero_ph (), \
|
||||
(A), _MM_FROUND_CUR_DIRECTION))
|
||||
|
||||
#define _mm_roundscale_round_sh(A, B, C, D) \
|
||||
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((A), (B), (C), \
|
||||
_mm_setzero_ph (), \
|
||||
(__mmask8)-1, (D)))
|
||||
|
||||
#define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) \
|
||||
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((C), (D), (E), (A), (B), (F)))
|
||||
|
||||
#define _mm_maskz_roundscale_round_sh(A, B, C, D, E) \
|
||||
(__builtin_ia32_vrndscalesh_v8hf_mask_round ((B), (C), (D), \
|
||||
_mm_setzero_ph (), \
|
||||
(A), (E)))
|
||||
|
||||
#endif /* __OPTIMIZE__ */
|
||||
|
||||
#ifdef __DISABLE_AVX512FP16__
|
||||
#undef __DISABLE_AVX512FP16__
|
||||
#pragma GCC pop_options
|
||||
|
@ -548,6 +548,159 @@ _mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
|
||||
__A);
|
||||
}
|
||||
|
||||
/* Intrinsics vreduceph. */
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_reduce_ph (__m128h __A, int __B)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v8hf_mask (__A, __B,
|
||||
_mm_setzero_ph (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v8hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v8hf_mask (__B, __C,
|
||||
_mm_setzero_ph (), __A);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_reduce_ph (__m256h __A, int __B)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v16hf_mask (__A, __B,
|
||||
_mm256_setzero_ph (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v16hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C)
|
||||
{
|
||||
return __builtin_ia32_vreduceph_v16hf_mask (__B, __C,
|
||||
_mm256_setzero_ph (),
|
||||
__A);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm_reduce_ph(A, B) \
|
||||
(__builtin_ia32_vreduceph_v8hf_mask ((A), (B),\
|
||||
_mm_setzero_ph (), \
|
||||
((__mmask8)-1)))
|
||||
|
||||
#define _mm_mask_reduce_ph(A, B, C, D) \
|
||||
(__builtin_ia32_vreduceph_v8hf_mask ((C), (D), (A), (B)))
|
||||
|
||||
#define _mm_maskz_reduce_ph(A, B, C) \
|
||||
(__builtin_ia32_vreduceph_v8hf_mask ((B), (C), _mm_setzero_ph (), (A)))
|
||||
|
||||
#define _mm256_reduce_ph(A, B) \
|
||||
(__builtin_ia32_vreduceph_v16hf_mask ((A), (B),\
|
||||
_mm256_setzero_ph (), \
|
||||
((__mmask16)-1)))
|
||||
|
||||
#define _mm256_mask_reduce_ph(A, B, C, D) \
|
||||
(__builtin_ia32_vreduceph_v16hf_mask ((C), (D), (A), (B)))
|
||||
|
||||
#define _mm256_maskz_reduce_ph(A, B, C) \
|
||||
(__builtin_ia32_vreduceph_v16hf_mask ((B), (C), _mm256_setzero_ph (), (A)))
|
||||
|
||||
#endif /* __OPTIMIZE__ */
|
||||
|
||||
/* Intrinsics vrndscaleph. */
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_roundscale_ph (__m128h __A, int __B)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v8hf_mask (__A, __B,
|
||||
_mm_setzero_ph (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v8hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v8hf_mask (__B, __C,
|
||||
_mm_setzero_ph (), __A);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_roundscale_ph (__m256h __A, int __B)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v16hf_mask (__A, __B,
|
||||
_mm256_setzero_ph (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C,
|
||||
int __D)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v16hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
|
||||
{
|
||||
return __builtin_ia32_vrndscaleph_v16hf_mask (__B, __C,
|
||||
_mm256_setzero_ph (),
|
||||
__A);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm_roundscale_ph(A, B) \
|
||||
(__builtin_ia32_vrndscaleph_v8hf_mask ((A), (B), _mm_setzero_ph (), \
|
||||
((__mmask8)-1)))
|
||||
|
||||
#define _mm_mask_roundscale_ph(A, B, C, D) \
|
||||
(__builtin_ia32_vrndscaleph_v8hf_mask ((C), (D), (A), (B)))
|
||||
|
||||
#define _mm_maskz_roundscale_ph(A, B, C) \
|
||||
(__builtin_ia32_vrndscaleph_v8hf_mask ((B), (C), _mm_setzero_ph (), (A)))
|
||||
|
||||
#define _mm256_roundscale_ph(A, B) \
|
||||
(__builtin_ia32_vrndscaleph_v16hf_mask ((A), (B), \
|
||||
_mm256_setzero_ph(), \
|
||||
((__mmask16)-1)))
|
||||
|
||||
#define _mm256_mask_roundscale_ph(A, B, C, D) \
|
||||
(__builtin_ia32_vrndscaleph_v16hf_mask ((C), (D), (A), (B)))
|
||||
|
||||
#define _mm256_maskz_roundscale_ph(A, B, C) \
|
||||
(__builtin_ia32_vrndscaleph_v16hf_mask ((B), (C), \
|
||||
_mm256_setzero_ph (), (A)))
|
||||
|
||||
#endif /* __OPTIMIZE__ */
|
||||
|
||||
#ifdef __DISABLE_AVX512FP16VL__
|
||||
#undef __DISABLE_AVX512FP16VL__
|
||||
#pragma GCC pop_options
|
||||
|
@ -1307,12 +1307,15 @@ DEF_FUNCTION_TYPE (V8HF, V8HI)
|
||||
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF)
|
||||
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, UQI)
|
||||
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT)
|
||||
DEF_FUNCTION_TYPE (V8HF, V8HF, INT, V8HF, UQI)
|
||||
DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI)
|
||||
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI)
|
||||
DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI, INT)
|
||||
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI, INT)
|
||||
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI, INT)
|
||||
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF)
|
||||
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, UHI)
|
||||
DEF_FUNCTION_TYPE (V16HF, V16HF, INT, V16HF, UHI)
|
||||
DEF_FUNCTION_TYPE (UHI, V16HF, V16HF, INT, UHI)
|
||||
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI)
|
||||
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI)
|
||||
@ -1322,3 +1325,4 @@ DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI, INT)
|
||||
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI)
|
||||
DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI, INT)
|
||||
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT)
|
||||
DEF_FUNCTION_TYPE (V32HF, V32HF, INT, V32HF, USI, INT)
|
||||
|
@ -2814,6 +2814,10 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__bu
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_vrcpsh_v8hf_mask", IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_vscalefph_v8hf_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_vscalefph_v16hf_mask", IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv8hf_mask, "__builtin_ia32_vreduceph_v8hf_mask", IX86_BUILTIN_VREDUCEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv16hf_mask, "__builtin_ia32_vreduceph_v16hf_mask", IX86_BUILTIN_VREDUCEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf_mask, "__builtin_ia32_vrndscaleph_v8hf_mask", IX86_BUILTIN_VRNDSCALEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_vrndscaleph_v16hf_mask", IX86_BUILTIN_VRNDSCALEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
|
||||
|
||||
/* Builtins with rounding support. */
|
||||
BDESC_END (ARGS, ROUND_ARGS)
|
||||
@ -3033,6 +3037,10 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_vsqrtsh_v8hf_mask_round", IX86_BUILTIN_VSQRTSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_vscalefph_v32hf_mask_round", IX86_BUILTIN_VSCALEFPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_vscalefsh_v8hf_mask_round", IX86_BUILTIN_VSCALEFSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_vreduceph_v32hf_mask_round", IX86_BUILTIN_VREDUCEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_vreducesh_v8hf_mask_round", IX86_BUILTIN_VREDUCESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_vrndscaleph_v32hf_mask_round", IX86_BUILTIN_VRNDSCALEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_vrndscalesh_v8hf_mask_round", IX86_BUILTIN_VRNDSCALESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
|
||||
|
||||
BDESC_END (ROUND_ARGS, MULTI_ARG)
|
||||
|
||||
|
@ -10061,6 +10061,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
||||
case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
|
||||
case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
|
||||
case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
|
||||
case V16HF_FTYPE_V16HF_INT_V16HF_UHI:
|
||||
case V8HF_FTYPE_V8HF_INT_V8HF_UQI:
|
||||
case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
|
||||
case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
|
||||
case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
|
||||
@ -10709,6 +10711,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
|
||||
case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT:
|
||||
nargs = 5;
|
||||
break;
|
||||
case V32HF_FTYPE_V32HF_INT_V32HF_USI_INT:
|
||||
case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
|
||||
case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
|
||||
case V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT:
|
||||
@ -10731,6 +10734,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
|
||||
case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
|
||||
case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
|
||||
case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
|
||||
case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT:
|
||||
nargs = 6;
|
||||
nargs_constant = 4;
|
||||
break;
|
||||
|
@ -3257,28 +3257,28 @@
|
||||
})
|
||||
|
||||
(define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
|
||||
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
|
||||
(unspec:VF_AVX512VL
|
||||
[(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
|
||||
[(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
|
||||
(unspec:VFH_AVX512VL
|
||||
[(match_operand:VFH_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
|
||||
(match_operand:SI 2 "const_0_to_255_operand")]
|
||||
UNSPEC_REDUCE))]
|
||||
"TARGET_AVX512DQ"
|
||||
"TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))"
|
||||
"vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=v")
|
||||
(vec_merge:VF_128
|
||||
(unspec:VF_128
|
||||
[(match_operand:VF_128 1 "register_operand" "v")
|
||||
(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
|
||||
[(set (match_operand:VFH_128 0 "register_operand" "=v")
|
||||
(vec_merge:VFH_128
|
||||
(unspec:VFH_128
|
||||
[(match_operand:VFH_128 1 "register_operand" "v")
|
||||
(match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
|
||||
(match_operand:SI 3 "const_0_to_255_operand")]
|
||||
UNSPEC_REDUCE)
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
"TARGET_AVX512DQ"
|
||||
"TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))"
|
||||
"vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "prefix" "evex")
|
||||
@ -10794,9 +10794,9 @@
|
||||
(set_attr "mode" "<ssescalarmode>")])
|
||||
|
||||
(define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
|
||||
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
|
||||
(unspec:VF_AVX512VL
|
||||
[(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
|
||||
[(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
|
||||
(unspec:VFH_AVX512VL
|
||||
[(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
|
||||
(match_operand:SI 2 "const_0_to_255_operand")]
|
||||
UNSPEC_ROUND))]
|
||||
"TARGET_AVX512F"
|
||||
@ -10806,13 +10806,13 @@
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=v")
|
||||
(vec_merge:VF_128
|
||||
(unspec:VF_128
|
||||
[(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
|
||||
[(set (match_operand:VFH_128 0 "register_operand" "=v")
|
||||
(vec_merge:VFH_128
|
||||
(unspec:VFH_128
|
||||
[(match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
|
||||
(match_operand:SI 3 "const_0_to_255_operand")]
|
||||
UNSPEC_ROUND)
|
||||
(match_operand:VF_128 1 "register_operand" "v")
|
||||
(match_operand:VFH_128 1 "register_operand" "v")
|
||||
(const_int 1)))]
|
||||
"TARGET_AVX512F"
|
||||
"vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
|
||||
@ -10821,14 +10821,14 @@
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=v")
|
||||
(vec_merge:VF_128
|
||||
(vec_duplicate:VF_128
|
||||
[(set (match_operand:VFH_128 0 "register_operand" "=v")
|
||||
(vec_merge:VFH_128
|
||||
(vec_duplicate:VFH_128
|
||||
(unspec:<ssescalarmode>
|
||||
[(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
|
||||
(match_operand:SI 3 "const_0_to_255_operand")]
|
||||
UNSPEC_ROUND))
|
||||
(match_operand:VF_128 1 "register_operand" "v")
|
||||
(match_operand:VFH_128 1 "register_operand" "v")
|
||||
(const_int 1)))]
|
||||
"TARGET_AVX512F"
|
||||
"vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
|
||||
|
@ -705,6 +705,14 @@
|
||||
#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
|
||||
#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
|
||||
#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
|
||||
#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
|
||||
#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
|
||||
|
||||
/* avx512fp16vlintrin.h */
|
||||
#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
|
||||
|
@ -722,6 +722,14 @@
|
||||
#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
|
||||
#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
|
||||
#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
|
||||
#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
|
||||
#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
|
||||
|
||||
/* avx512fp16vlintrin.h */
|
||||
#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
|
||||
|
@ -671,6 +671,14 @@ test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 8)
|
||||
|
||||
/* avx512fp16intrin.h */
|
||||
test_1 (_mm512_sqrt_round_ph, __m512h, __m512h, 8)
|
||||
test_1 (_mm_reduce_ph, __m128h, __m128h, 123)
|
||||
test_1 (_mm256_reduce_ph, __m256h, __m256h, 123)
|
||||
test_1 (_mm512_reduce_ph, __m512h, __m512h, 123)
|
||||
test_1 (_mm_roundscale_ph, __m128h, __m128h, 123)
|
||||
test_1 (_mm256_roundscale_ph, __m256h, __m256h, 123)
|
||||
test_1 (_mm512_roundscale_ph, __m512h, __m512h, 123)
|
||||
test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
|
||||
test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
|
||||
test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
@ -689,9 +697,21 @@ test_2 (_mm512_maskz_sqrt_round_ph, __m512h, __mmask32, __m512h, 8)
|
||||
test_2 (_mm_sqrt_round_sh, __m128h, __m128h, __m128h, 8)
|
||||
test_2 (_mm512_scalef_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm_scalef_round_sh, __m128h, __m128h, __m128h, 8)
|
||||
test_2 (_mm_maskz_reduce_ph, __m128h, __mmask8, __m128h, 123)
|
||||
test_2 (_mm256_maskz_reduce_ph, __m256h, __mmask16, __m256h, 123)
|
||||
test_2 (_mm512_maskz_reduce_ph, __m512h, __mmask32, __m512h, 123)
|
||||
test_2 (_mm_reduce_sh, __m128h, __m128h, __m128h, 123)
|
||||
test_2 (_mm_maskz_roundscale_ph, __m128h, __mmask8, __m128h, 123)
|
||||
test_2 (_mm256_maskz_roundscale_ph, __m256h, __mmask16, __m256h, 123)
|
||||
test_2 (_mm512_maskz_roundscale_ph, __m512h, __mmask32, __m512h, 123)
|
||||
test_2 (_mm_roundscale_sh, __m128h, __m128h, __m128h, 123)
|
||||
test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
|
||||
test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
|
||||
test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
|
||||
test_2x (_mm512_maskz_reduce_round_ph, __m512h, __mmask32, __m512h, 123, 8)
|
||||
test_2x (_mm512_maskz_roundscale_round_ph, __m512h, __mmask32, __m512h, 123, 8)
|
||||
test_2x (_mm_reduce_round_sh, __m128h, __m128h, __m128h, 123, 8)
|
||||
test_2x (_mm_roundscale_round_sh, __m128h, __m128h, __m128h, 123, 8)
|
||||
test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
@ -709,8 +729,20 @@ test_3 (_mm512_mask_sqrt_round_ph, __m512h, __m512h, __mmask32, __m512h, 8)
|
||||
test_3 (_mm_maskz_sqrt_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
|
||||
test_3 (_mm512_maskz_scalef_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm_maskz_scalef_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
|
||||
test_3 (_mm_mask_reduce_ph, __m128h, __m128h, __mmask8, __m128h, 123)
|
||||
test_3 (_mm256_mask_reduce_ph, __m256h, __m256h, __mmask16, __m256h, 123)
|
||||
test_3 (_mm512_mask_reduce_ph, __m512h, __m512h, __mmask32, __m512h, 123)
|
||||
test_3 (_mm_maskz_reduce_sh, __m128h, __mmask8, __m128h, __m128h, 123)
|
||||
test_3 (_mm_mask_roundscale_ph, __m128h, __m128h, __mmask8, __m128h, 123)
|
||||
test_3 (_mm256_mask_roundscale_ph, __m256h, __m256h, __mmask16, __m256h, 123)
|
||||
test_3 (_mm512_mask_roundscale_ph, __m512h, __m512h, __mmask32, __m512h, 123)
|
||||
test_3 (_mm_maskz_roundscale_sh, __m128h, __mmask8, __m128h, __m128h, 123)
|
||||
test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
|
||||
test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
|
||||
test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
|
||||
test_3x (_mm512_mask_roundscale_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
|
||||
test_3x (_mm_maskz_reduce_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8)
|
||||
test_3x (_mm_maskz_roundscale_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8)
|
||||
test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
@ -726,6 +758,10 @@ test_4 (_mm_mask_min_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
|
||||
test_4 (_mm_mask_sqrt_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
|
||||
test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm_mask_scalef_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
|
||||
test_4 (_mm_mask_reduce_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
|
||||
test_4 (_mm_mask_roundscale_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
|
||||
test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
|
||||
test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
|
||||
|
||||
/* avx512fp16vlintrin.h */
|
||||
test_2 (_mm_cmp_ph_mask, __mmask8, __m128h, __m128h, 1)
|
||||
|
@ -776,6 +776,14 @@ test_2 (_mm_rsqrt28_round_ss, __m128, __m128, __m128, 8)
|
||||
|
||||
/* avx512fp16intrin.h */
|
||||
test_1 (_mm512_sqrt_round_ph, __m512h, __m512h, 8)
|
||||
test_1 (_mm_reduce_ph, __m128h, __m128h, 123)
|
||||
test_1 (_mm256_reduce_ph, __m256h, __m256h, 123)
|
||||
test_1 (_mm512_reduce_ph, __m512h, __m512h, 123)
|
||||
test_1 (_mm_roundscale_ph, __m128h, __m128h, 123)
|
||||
test_1 (_mm256_roundscale_ph, __m256h, __m256h, 123)
|
||||
test_1 (_mm512_roundscale_ph, __m512h, __m512h, 123)
|
||||
test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
|
||||
test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
|
||||
test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
@ -793,9 +801,21 @@ test_2 (_mm_comi_sh, int, __m128h, __m128h, 1)
|
||||
test_2 (_mm512_maskz_sqrt_round_ph, __m512h, __mmask32, __m512h, 8)
|
||||
test_2 (_mm_sqrt_round_sh, __m128h, __m128h, __m128h, 8)
|
||||
test_2 (_mm512_scalef_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm_maskz_reduce_ph, __m128h, __mmask8, __m128h, 123)
|
||||
test_2 (_mm256_maskz_reduce_ph, __m256h, __mmask16, __m256h, 123)
|
||||
test_2 (_mm512_maskz_reduce_ph, __m512h, __mmask32, __m512h, 123)
|
||||
test_2 (_mm_reduce_sh, __m128h, __m128h, __m128h, 123)
|
||||
test_2 (_mm_maskz_roundscale_ph, __m128h, __mmask8, __m128h, 123)
|
||||
test_2 (_mm256_maskz_roundscale_ph, __m256h, __mmask16, __m256h, 123)
|
||||
test_2 (_mm512_maskz_roundscale_ph, __m512h, __mmask32, __m512h, 123)
|
||||
test_2 (_mm_roundscale_sh, __m128h, __m128h, __m128h, 123)
|
||||
test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
|
||||
test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
|
||||
test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
|
||||
test_2x (_mm512_maskz_reduce_round_ph, __m512h, __mmask32, __m512h, 123, 8)
|
||||
test_2x (_mm512_maskz_roundscale_round_ph, __m512h, __mmask32, __m512h, 123, 8)
|
||||
test_2x (_mm_reduce_round_sh, __m128h, __m128h, __m128h, 123, 8)
|
||||
test_2x (_mm_roundscale_round_sh, __m128h, __m128h, __m128h, 123, 8)
|
||||
test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
@ -812,8 +832,20 @@ test_3 (_mm512_mask_cmp_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1)
|
||||
test_3 (_mm512_mask_sqrt_round_ph, __m512h, __m512h, __mmask32, __m512h, 8)
|
||||
test_3 (_mm_maskz_sqrt_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
|
||||
test_3 (_mm512_maskz_scalef_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm_mask_reduce_ph, __m128h, __m128h, __mmask8, __m128h, 123)
|
||||
test_3 (_mm256_mask_reduce_ph, __m256h, __m256h, __mmask16, __m256h, 123)
|
||||
test_3 (_mm512_mask_reduce_ph, __m512h, __m512h, __mmask32, __m512h, 123)
|
||||
test_3 (_mm_maskz_reduce_sh, __m128h, __mmask8, __m128h, __m128h, 123)
|
||||
test_3 (_mm_mask_roundscale_ph, __m128h, __m128h, __mmask8, __m128h, 123)
|
||||
test_3 (_mm256_mask_roundscale_ph, __m256h, __m256h, __mmask16, __m256h, 123)
|
||||
test_3 (_mm512_mask_roundscale_ph, __m512h, __m512h, __mmask32, __m512h, 123)
|
||||
test_3 (_mm_maskz_roundscale_sh, __m128h, __mmask8, __m128h, __m128h, 123)
|
||||
test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
|
||||
test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
|
||||
test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
|
||||
test_3x (_mm512_mask_roundscale_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
|
||||
test_3x (_mm_maskz_reduce_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8)
|
||||
test_3x (_mm_maskz_roundscale_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8)
|
||||
test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
@ -828,6 +860,10 @@ test_4 (_mm_mask_max_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
|
||||
test_4 (_mm_mask_min_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
|
||||
test_4 (_mm_mask_sqrt_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
|
||||
test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm_mask_reduce_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
|
||||
test_4 (_mm_mask_roundscale_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
|
||||
test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
|
||||
test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
|
||||
|
||||
/* avx512fp16vlintrin.h */
|
||||
test_2 (_mm_cmp_ph_mask, __mmask8, __m128h, __m128h, 1)
|
||||
|
@ -723,6 +723,14 @@
|
||||
#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
|
||||
#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
|
||||
#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
|
||||
#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
|
||||
#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
|
||||
#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
|
||||
|
||||
/* avx512fp16vlintrin.h */
|
||||
#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
|
||||
|
Loading…
Reference in New Issue
Block a user