AVX512FP16: Add vaddph/vsubph/vdivph/vmulph.
gcc/ChangeLog: * config.gcc: Add avx512fp16vlintrin.h. * config/i386/avx512fp16intrin.h: (_mm512_add_ph): New intrinsic. (_mm512_mask_add_ph): Likewise. (_mm512_maskz_add_ph): Likewise. (_mm512_sub_ph): Likewise. (_mm512_mask_sub_ph): Likewise. (_mm512_maskz_sub_ph): Likewise. (_mm512_mul_ph): Likewise. (_mm512_mask_mul_ph): Likewise. (_mm512_maskz_mul_ph): Likewise. (_mm512_div_ph): Likewise. (_mm512_mask_div_ph): Likewise. (_mm512_maskz_div_ph): Likewise. (_mm512_add_round_ph): Likewise. (_mm512_mask_add_round_ph): Likewise. (_mm512_maskz_add_round_ph): Likewise. (_mm512_sub_round_ph): Likewise. (_mm512_mask_sub_round_ph): Likewise. (_mm512_maskz_sub_round_ph): Likewise. (_mm512_mul_round_ph): Likewise. (_mm512_mask_mul_round_ph): Likewise. (_mm512_maskz_mul_round_ph): Likewise. (_mm512_div_round_ph): Likewise. (_mm512_mask_div_round_ph): Likewise. (_mm512_maskz_div_round_ph): Likewise. * config/i386/avx512fp16vlintrin.h: New header. * config/i386/i386-builtin-types.def (V16HF, V8HF, V32HF): Add new builtin types. * config/i386/i386-builtin.def: Add corresponding builtins. * config/i386/i386-expand.c (ix86_expand_args_builtin): Handle new builtin types. (ix86_expand_round_builtin): Likewise. * config/i386/immintrin.h: Include avx512fp16vlintrin.h * config/i386/sse.md (VFH): New mode_iterator. (VF2H): Likewise. (avx512fmaskmode): Add HF vector modes. (avx512fmaskhalfmode): Likewise. (<plusminus_insn><mode>3<mask_name><round_name>): Adjust to for HF vector modes. (*<plusminus_insn><mode>3<mask_name><round_name>): Likewise. (mul<mode>3<mask_name><round_name>): Likewise. (*mul<mode>3<mask_name><round_name>): Likewise. (div<mode>3): Likewise. (<sse>_div<mode>3<mask_name><round_name>): Likewise. * config/i386/subst.md (SUBST_V): Add HF vector modes. (SUBST_A): Likewise. (round_mode512bit_condition): Adjust for V32HFmode. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Add -mavx512vl and test for new intrinsics. * gcc.target/i386/avx-2.c: Add -mavx512vl. * gcc.target/i386/avx512fp16-11a.c: New test. * gcc.target/i386/avx512fp16-11b.c: Ditto. * gcc.target/i386/avx512vlfp16-11a.c: Ditto. * gcc.target/i386/avx512vlfp16-11b.c: Ditto. * gcc.target/i386/sse-13.c: Add test for new builtins. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/sse-14.c: Add test for new intrinsics. * gcc.target/i386/sse-22.c: Ditto.
This commit is contained in:
parent
8f323c712e
commit
bd7a34ef55
@ -416,7 +416,7 @@ i[34567]86-*-* | x86_64-*-*)
|
||||
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
|
||||
amxbf16intrin.h x86gprintrin.h uintrintrin.h
|
||||
hresetintrin.h keylockerintrin.h avxvnniintrin.h
|
||||
mwaitintrin.h avx512fp16intrin.h"
|
||||
mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h"
|
||||
;;
|
||||
ia64-*-*)
|
||||
extra_headers=ia64intrin.h
|
||||
|
@ -217,6 +217,257 @@ _mm_store_sh (void *__P, __m128h __A)
|
||||
*(_Float16 *) __P = ((__v8hf)__A)[0];
|
||||
}
|
||||
|
||||
/* Intrinsics v[add,sub,mul,div]ph. */
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_add_ph (__m512h __A, __m512h __B)
|
||||
{
|
||||
return (__m512h) ((__v32hf) __A + (__v32hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
|
||||
{
|
||||
return __builtin_ia32_vaddph_v32hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
|
||||
{
|
||||
return __builtin_ia32_vaddph_v32hf_mask (__B, __C,
|
||||
_mm512_setzero_ph (), __A);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_sub_ph (__m512h __A, __m512h __B)
|
||||
{
|
||||
return (__m512h) ((__v32hf) __A - (__v32hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
|
||||
{
|
||||
return __builtin_ia32_vsubph_v32hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
|
||||
{
|
||||
return __builtin_ia32_vsubph_v32hf_mask (__B, __C,
|
||||
_mm512_setzero_ph (), __A);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mul_ph (__m512h __A, __m512h __B)
|
||||
{
|
||||
return (__m512h) ((__v32hf) __A * (__v32hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
|
||||
{
|
||||
return __builtin_ia32_vmulph_v32hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
|
||||
{
|
||||
return __builtin_ia32_vmulph_v32hf_mask (__B, __C,
|
||||
_mm512_setzero_ph (), __A);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_div_ph (__m512h __A, __m512h __B)
|
||||
{
|
||||
return (__m512h) ((__v32hf) __A / (__v32hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
|
||||
{
|
||||
return __builtin_ia32_vdivph_v32hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
|
||||
{
|
||||
return __builtin_ia32_vdivph_v32hf_mask (__B, __C,
|
||||
_mm512_setzero_ph (), __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
|
||||
{
|
||||
return __builtin_ia32_vaddph_v32hf_mask_round (__A, __B,
|
||||
_mm512_setzero_ph (),
|
||||
(__mmask32) -1, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
|
||||
__m512h __D, const int __E)
|
||||
{
|
||||
return __builtin_ia32_vaddph_v32hf_mask_round (__C, __D, __A, __B, __E);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
|
||||
const int __D)
|
||||
{
|
||||
return __builtin_ia32_vaddph_v32hf_mask_round (__B, __C,
|
||||
_mm512_setzero_ph (),
|
||||
__A, __D);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
|
||||
{
|
||||
return __builtin_ia32_vsubph_v32hf_mask_round (__A, __B,
|
||||
_mm512_setzero_ph (),
|
||||
(__mmask32) -1, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
|
||||
__m512h __D, const int __E)
|
||||
{
|
||||
return __builtin_ia32_vsubph_v32hf_mask_round (__C, __D, __A, __B, __E);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
|
||||
const int __D)
|
||||
{
|
||||
return __builtin_ia32_vsubph_v32hf_mask_round (__B, __C,
|
||||
_mm512_setzero_ph (),
|
||||
__A, __D);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
|
||||
{
|
||||
return __builtin_ia32_vmulph_v32hf_mask_round (__A, __B,
|
||||
_mm512_setzero_ph (),
|
||||
(__mmask32) -1, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
|
||||
__m512h __D, const int __E)
|
||||
{
|
||||
return __builtin_ia32_vmulph_v32hf_mask_round (__C, __D, __A, __B, __E);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
|
||||
const int __D)
|
||||
{
|
||||
return __builtin_ia32_vmulph_v32hf_mask_round (__B, __C,
|
||||
_mm512_setzero_ph (),
|
||||
__A, __D);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
|
||||
{
|
||||
return __builtin_ia32_vdivph_v32hf_mask_round (__A, __B,
|
||||
_mm512_setzero_ph (),
|
||||
(__mmask32) -1, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
|
||||
__m512h __D, const int __E)
|
||||
{
|
||||
return __builtin_ia32_vdivph_v32hf_mask_round (__C, __D, __A, __B, __E);
|
||||
}
|
||||
|
||||
extern __inline __m512h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
|
||||
const int __D)
|
||||
{
|
||||
return __builtin_ia32_vdivph_v32hf_mask_round (__B, __C,
|
||||
_mm512_setzero_ph (),
|
||||
__A, __D);
|
||||
}
|
||||
#else
|
||||
#define _mm512_add_round_ph(A, B, C) \
|
||||
((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((A), (B), \
|
||||
_mm512_setzero_ph (),\
|
||||
(__mmask32)-1, (C)))
|
||||
|
||||
#define _mm512_mask_add_round_ph(A, B, C, D, E) \
|
||||
((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((C), (D), (A), (B), (E)))
|
||||
|
||||
#define _mm512_maskz_add_round_ph(A, B, C, D) \
|
||||
((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((B), (C), \
|
||||
_mm512_setzero_ph (),\
|
||||
(A), (D)))
|
||||
|
||||
#define _mm512_sub_round_ph(A, B, C) \
|
||||
((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((A), (B), \
|
||||
_mm512_setzero_ph (),\
|
||||
(__mmask32)-1, (C)))
|
||||
|
||||
#define _mm512_mask_sub_round_ph(A, B, C, D, E) \
|
||||
((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((C), (D), (A), (B), (E)))
|
||||
|
||||
#define _mm512_maskz_sub_round_ph(A, B, C, D) \
|
||||
((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((B), (C), \
|
||||
_mm512_setzero_ph (),\
|
||||
(A), (D)))
|
||||
|
||||
#define _mm512_mul_round_ph(A, B, C) \
|
||||
((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((A), (B), \
|
||||
_mm512_setzero_ph (),\
|
||||
(__mmask32)-1, (C)))
|
||||
|
||||
#define _mm512_mask_mul_round_ph(A, B, C, D, E) \
|
||||
((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((C), (D), (A), (B), (E)))
|
||||
|
||||
#define _mm512_maskz_mul_round_ph(A, B, C, D) \
|
||||
((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((B), (C), \
|
||||
_mm512_setzero_ph (),\
|
||||
(A), (D)))
|
||||
|
||||
#define _mm512_div_round_ph(A, B, C) \
|
||||
((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((A), (B), \
|
||||
_mm512_setzero_ph (),\
|
||||
(__mmask32)-1, (C)))
|
||||
|
||||
#define _mm512_mask_div_round_ph(A, B, C, D, E) \
|
||||
((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((C), (D), (A), (B), (E)))
|
||||
|
||||
#define _mm512_maskz_div_round_ph(A, B, C, D) \
|
||||
((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((B), (C), \
|
||||
_mm512_setzero_ph (),\
|
||||
(A), (D)))
|
||||
#endif /* __OPTIMIZE__ */
|
||||
|
||||
#ifdef __DISABLE_AVX512FP16__
|
||||
#undef __DISABLE_AVX512FP16__
|
||||
#pragma GCC pop_options
|
||||
|
219
gcc/config/i386/avx512fp16vlintrin.h
Normal file
219
gcc/config/i386/avx512fp16vlintrin.h
Normal file
@ -0,0 +1,219 @@
|
||||
/* Copyright (C) 2019 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512fp16vlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512FP16VLINTRIN_H_INCLUDED
|
||||
#define __AVX512FP16VLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512fp16,avx512vl")
|
||||
#define __DISABLE_AVX512FP16VL__
|
||||
#endif /* __AVX512FP16VL__ */
|
||||
|
||||
/* Intrinsics v[add,sub,mul,div]ph. */
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_add_ph (__m128h __A, __m128h __B)
|
||||
{
|
||||
return (__m128h) ((__v8hf) __A + (__v8hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_add_ph (__m256h __A, __m256h __B)
|
||||
{
|
||||
return (__m256h) ((__v16hf) __A + (__v16hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
|
||||
{
|
||||
return __builtin_ia32_vaddph_v8hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
|
||||
{
|
||||
return __builtin_ia32_vaddph_v16hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
|
||||
{
|
||||
return __builtin_ia32_vaddph_v8hf_mask (__B, __C, _mm_setzero_ph (),
|
||||
__A);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
|
||||
{
|
||||
return __builtin_ia32_vaddph_v16hf_mask (__B, __C,
|
||||
_mm256_setzero_ph (), __A);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sub_ph (__m128h __A, __m128h __B)
|
||||
{
|
||||
return (__m128h) ((__v8hf) __A - (__v8hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_sub_ph (__m256h __A, __m256h __B)
|
||||
{
|
||||
return (__m256h) ((__v16hf) __A - (__v16hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
|
||||
{
|
||||
return __builtin_ia32_vsubph_v8hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
|
||||
{
|
||||
return __builtin_ia32_vsubph_v16hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
|
||||
{
|
||||
return __builtin_ia32_vsubph_v8hf_mask (__B, __C, _mm_setzero_ph (),
|
||||
__A);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
|
||||
{
|
||||
return __builtin_ia32_vsubph_v16hf_mask (__B, __C,
|
||||
_mm256_setzero_ph (), __A);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mul_ph (__m128h __A, __m128h __B)
|
||||
{
|
||||
return (__m128h) ((__v8hf) __A * (__v8hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mul_ph (__m256h __A, __m256h __B)
|
||||
{
|
||||
return (__m256h) ((__v16hf) __A * (__v16hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
|
||||
{
|
||||
return __builtin_ia32_vmulph_v8hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
|
||||
{
|
||||
return __builtin_ia32_vmulph_v16hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
|
||||
{
|
||||
return __builtin_ia32_vmulph_v8hf_mask (__B, __C, _mm_setzero_ph (),
|
||||
__A);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
|
||||
{
|
||||
return __builtin_ia32_vmulph_v16hf_mask (__B, __C,
|
||||
_mm256_setzero_ph (), __A);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_div_ph (__m128h __A, __m128h __B)
|
||||
{
|
||||
return (__m128h) ((__v8hf) __A / (__v8hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_div_ph (__m256h __A, __m256h __B)
|
||||
{
|
||||
return (__m256h) ((__v16hf) __A / (__v16hf) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
|
||||
{
|
||||
return __builtin_ia32_vdivph_v8hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
|
||||
{
|
||||
return __builtin_ia32_vdivph_v16hf_mask (__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
|
||||
{
|
||||
return __builtin_ia32_vdivph_v8hf_mask (__B, __C, _mm_setzero_ph (),
|
||||
__A);
|
||||
}
|
||||
|
||||
extern __inline __m256h
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
|
||||
{
|
||||
return __builtin_ia32_vdivph_v16hf_mask (__B, __C,
|
||||
_mm256_setzero_ph (), __A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512FP16VL__
|
||||
#undef __DISABLE_AVX512FP16VL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512FP16VL__ */
|
||||
|
||||
#endif /* __AVX512FP16VLINTRIN_H_INCLUDED */
|
@ -98,6 +98,7 @@ DEF_VECTOR_TYPE (V16UQI, UQI, V16QI)
|
||||
# AVX vectors
|
||||
DEF_VECTOR_TYPE (V4DF, DOUBLE)
|
||||
DEF_VECTOR_TYPE (V8SF, FLOAT)
|
||||
DEF_VECTOR_TYPE (V16HF, FLOAT16)
|
||||
DEF_VECTOR_TYPE (V4DI, DI)
|
||||
DEF_VECTOR_TYPE (V8SI, SI)
|
||||
DEF_VECTOR_TYPE (V16HI, HI)
|
||||
@ -108,6 +109,7 @@ DEF_VECTOR_TYPE (V16UHI, UHI, V16HI)
|
||||
|
||||
# AVX512F vectors
|
||||
DEF_VECTOR_TYPE (V32SF, FLOAT)
|
||||
DEF_VECTOR_TYPE (V32HF, FLOAT16)
|
||||
DEF_VECTOR_TYPE (V16SF, FLOAT)
|
||||
DEF_VECTOR_TYPE (V8DF, DOUBLE)
|
||||
DEF_VECTOR_TYPE (V8DI, DI)
|
||||
@ -1302,3 +1304,8 @@ DEF_FUNCTION_TYPE (UINT8, PV2DI, PCV2DI, PCVOID)
|
||||
|
||||
# FP16 builtins
|
||||
DEF_FUNCTION_TYPE (V8HF, V8HI)
|
||||
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI)
|
||||
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI)
|
||||
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT)
|
||||
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI)
|
||||
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT)
|
||||
|
@ -2774,6 +2774,20 @@ BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf, "__builti
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__builtin_ia32_dpbf16ps_v4sf_mask", IX86_BUILTIN_DPHI16PS_V4SF_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
|
||||
|
||||
/* AVX512FP16. */
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_vaddph_v8hf_mask", IX86_BUILTIN_VADDPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_vaddph_v16hf_mask", IX86_BUILTIN_VADDPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_vaddph_v32hf_mask", IX86_BUILTIN_VADDPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_vsubph_v8hf_mask", IX86_BUILTIN_VSUBPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_vsubph_v16hf_mask", IX86_BUILTIN_VSUBPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_vsubph_v32hf_mask", IX86_BUILTIN_VSUBPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_vmulph_v8hf_mask", IX86_BUILTIN_VMULPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_vmulph_v16hf_mask", IX86_BUILTIN_VMULPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_vmulph_v32hf_mask", IX86_BUILTIN_VMULPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_vdivph_v8hf_mask", IX86_BUILTIN_VDIVPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_vdivph_v16hf_mask", IX86_BUILTIN_VDIVPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_vdivph_v32hf_mask", IX86_BUILTIN_VDIVPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
|
||||
|
||||
/* Builtins with rounding support. */
|
||||
BDESC_END (ARGS, ROUND_ARGS)
|
||||
|
||||
@ -2973,6 +2987,12 @@ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fixuns_truncv8dfv8di2_mask_round, "
|
||||
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
|
||||
|
||||
/* AVX512FP16. */
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_vaddph_v32hf_mask_round", IX86_BUILTIN_VADDPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_vsubph_v32hf_mask_round", IX86_BUILTIN_VSUBPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_vmulph_v32hf_mask_round", IX86_BUILTIN_VMULPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_vdivph_v32hf_mask_round", IX86_BUILTIN_VDIVPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
|
||||
|
||||
BDESC_END (ROUND_ARGS, MULTI_ARG)
|
||||
|
||||
/* FMA4 and XOP. */
|
||||
|
@ -10038,6 +10038,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
||||
case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
|
||||
case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
|
||||
case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
|
||||
case V32HF_FTYPE_V32HF_V32HF_V32HF_USI:
|
||||
case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
|
||||
case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
|
||||
case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
|
||||
@ -10055,6 +10056,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
||||
case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
|
||||
case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
|
||||
case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
|
||||
case V16HF_FTYPE_V16HF_V16HF_V16HF_UHI:
|
||||
case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
|
||||
case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
|
||||
case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
|
||||
@ -10062,6 +10064,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
||||
case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
|
||||
case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
|
||||
case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
|
||||
case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI:
|
||||
case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
|
||||
case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
|
||||
case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
|
||||
@ -10738,6 +10741,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
|
||||
case INT_FTYPE_V4SF_INT:
|
||||
nargs = 2;
|
||||
break;
|
||||
case V32HF_FTYPE_V32HF_V32HF_INT:
|
||||
case V4SF_FTYPE_V4SF_UINT_INT:
|
||||
case V4SF_FTYPE_V4SF_UINT64_INT:
|
||||
case V2DF_FTYPE_V2DF_UINT64_INT:
|
||||
@ -10778,6 +10782,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
|
||||
case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
|
||||
case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
|
||||
case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
|
||||
case V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT:
|
||||
case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
|
||||
case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
|
||||
case V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT:
|
||||
|
@ -96,6 +96,8 @@
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <avx512fp16intrin.h>
|
||||
|
||||
#include <avx512fp16vlintrin.h>
|
||||
#endif
|
||||
|
||||
#include <shaintrin.h>
|
||||
|
@ -298,6 +298,13 @@
|
||||
[(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
|
||||
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
|
||||
|
||||
(define_mode_iterator VFH
|
||||
[(V32HF "TARGET_AVX512FP16")
|
||||
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
|
||||
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
|
||||
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
|
||||
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
|
||||
|
||||
;; 128- and 256-bit float vector modes
|
||||
(define_mode_iterator VF_128_256
|
||||
[(V8SF "TARGET_AVX") V4SF
|
||||
@ -321,6 +328,13 @@
|
||||
(define_mode_iterator VF2
|
||||
[(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
|
||||
|
||||
;; All DFmode & HFmode vector float modes
|
||||
(define_mode_iterator VF2H
|
||||
[(V32HF "TARGET_AVX512FP16")
|
||||
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
|
||||
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
|
||||
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
|
||||
|
||||
;; 128- and 256-bit DF vector modes
|
||||
(define_mode_iterator VF2_128_256
|
||||
[(V4DF "TARGET_AVX") V2DF])
|
||||
@ -885,6 +899,7 @@
|
||||
(V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
|
||||
(V16SI "QI") (V8SI "QI") (V4SI "QI")
|
||||
(V8DI "QI") (V4DI "QI") (V2DI "QI")
|
||||
(V32HF "HI") (V16HF "QI") (V8HF "QI")
|
||||
(V16SF "QI") (V8SF "QI") (V4SF "QI")
|
||||
(V8DF "QI") (V4DF "QI") (V2DF "QI")])
|
||||
|
||||
@ -2032,18 +2047,18 @@
|
||||
})
|
||||
|
||||
(define_expand "<insn><mode>3<mask_name><round_name>"
|
||||
[(set (match_operand:VF 0 "register_operand")
|
||||
(plusminus:VF
|
||||
(match_operand:VF 1 "<round_nimm_predicate>")
|
||||
(match_operand:VF 2 "<round_nimm_predicate>")))]
|
||||
[(set (match_operand:VFH 0 "register_operand")
|
||||
(plusminus:VFH
|
||||
(match_operand:VFH 1 "<round_nimm_predicate>")
|
||||
(match_operand:VFH 2 "<round_nimm_predicate>")))]
|
||||
"TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
|
||||
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
|
||||
|
||||
(define_insn "*<insn><mode>3<mask_name><round_name>"
|
||||
[(set (match_operand:VF 0 "register_operand" "=x,v")
|
||||
(plusminus:VF
|
||||
(match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
|
||||
(match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
|
||||
[(set (match_operand:VFH 0 "register_operand" "=x,v")
|
||||
(plusminus:VFH
|
||||
(match_operand:VFH 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
|
||||
(match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
|
||||
"TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
|
||||
&& <mask_mode512bit_condition> && <round_mode512bit_condition>"
|
||||
"@
|
||||
@ -2121,18 +2136,18 @@
|
||||
})
|
||||
|
||||
(define_expand "mul<mode>3<mask_name><round_name>"
|
||||
[(set (match_operand:VF 0 "register_operand")
|
||||
(mult:VF
|
||||
(match_operand:VF 1 "<round_nimm_predicate>")
|
||||
(match_operand:VF 2 "<round_nimm_predicate>")))]
|
||||
[(set (match_operand:VFH 0 "register_operand")
|
||||
(mult:VFH
|
||||
(match_operand:VFH 1 "<round_nimm_predicate>")
|
||||
(match_operand:VFH 2 "<round_nimm_predicate>")))]
|
||||
"TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
|
||||
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
|
||||
|
||||
(define_insn "*mul<mode>3<mask_name><round_name>"
|
||||
[(set (match_operand:VF 0 "register_operand" "=x,v")
|
||||
(mult:VF
|
||||
(match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
|
||||
(match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
|
||||
[(set (match_operand:VFH 0 "register_operand" "=x,v")
|
||||
(mult:VFH
|
||||
(match_operand:VFH 1 "<bcst_round_nimm_predicate>" "%0,v")
|
||||
(match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
|
||||
"TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
|
||||
&& <mask_mode512bit_condition> && <round_mode512bit_condition>"
|
||||
"@
|
||||
@ -2195,9 +2210,9 @@
|
||||
(set_attr "mode" "<ssescalarmode>")])
|
||||
|
||||
(define_expand "div<mode>3"
|
||||
[(set (match_operand:VF2 0 "register_operand")
|
||||
(div:VF2 (match_operand:VF2 1 "register_operand")
|
||||
(match_operand:VF2 2 "vector_operand")))]
|
||||
[(set (match_operand:VF2H 0 "register_operand")
|
||||
(div:VF2H (match_operand:VF2H 1 "register_operand")
|
||||
(match_operand:VF2H 2 "vector_operand")))]
|
||||
"TARGET_SSE2")
|
||||
|
||||
(define_expand "div<mode>3"
|
||||
@ -2236,10 +2251,10 @@
|
||||
})
|
||||
|
||||
(define_insn "<sse>_div<mode>3<mask_name><round_name>"
|
||||
[(set (match_operand:VF 0 "register_operand" "=x,v")
|
||||
(div:VF
|
||||
(match_operand:VF 1 "register_operand" "0,v")
|
||||
(match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
|
||||
[(set (match_operand:VFH 0 "register_operand" "=x,v")
|
||||
(div:VFH
|
||||
(match_operand:VFH 1 "register_operand" "0,v")
|
||||
(match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
|
||||
"TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
|
||||
"@
|
||||
div<ssemodesuffix>\t{%2, %0|%0, %2}
|
||||
|
@ -24,6 +24,7 @@
|
||||
V32HI V16HI V8HI
|
||||
V16SI V8SI V4SI
|
||||
V8DI V4DI V2DI
|
||||
V32HF V16HF V8HF
|
||||
V16SF V8SF V4SF
|
||||
V8DF V4DF V2DF])
|
||||
|
||||
@ -35,6 +36,7 @@
|
||||
V32HI V16HI V8HI
|
||||
V16SI V8SI V4SI
|
||||
V8DI V4DI V2DI
|
||||
V32HF V16HF V8HF
|
||||
V16SF V8SF V4SF
|
||||
V8DF V4DF V2DF
|
||||
QI HI SI DI SF DF])
|
||||
@ -161,7 +163,9 @@
|
||||
(define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode
|
||||
|| <MODE>mode == V8DFmode
|
||||
|| <MODE>mode == V8DImode
|
||||
|| <MODE>mode == V16SImode)")
|
||||
|| <MODE>mode == V16SImode
|
||||
|| <MODE>mode == V32HFmode)")
|
||||
|
||||
(define_subst_attr "round_modev8sf_condition" "round" "1" "(<MODE>mode == V8SFmode)")
|
||||
(define_subst_attr "round_modev4sf_condition" "round" "1" "(<MODE>mode == V4SFmode)")
|
||||
(define_subst_attr "round_codefor" "round" "*" "")
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16" } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16 -mavx512vl" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
@ -685,6 +685,12 @@
|
||||
#define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1)
|
||||
#define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
|
||||
|
||||
/* avx512fp16intrin.h */
|
||||
#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
|
||||
|
||||
/* vpclmulqdqintrin.h */
|
||||
#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1)
|
||||
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16" } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16 -mavx512vl" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
36
gcc/testsuite/gcc.target/i386/avx512fp16-11a.c
Normal file
36
gcc/testsuite/gcc.target/i386/avx512fp16-11a.c
Normal file
@ -0,0 +1,36 @@
|
||||
/* { dg-do compile} */
|
||||
/* { dg-options "-O2 -mavx512fp16" } */
|
||||
|
||||
#include <immintrin.h>
|
||||
__m512h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vadd512 (__m512h a, __m512h b)
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
|
||||
__m512h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vsub512 (__m512h a, __m512h b)
|
||||
{
|
||||
return a - b;
|
||||
}
|
||||
|
||||
__m512h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vmul512 (__m512h a, __m512h b)
|
||||
{
|
||||
return a * b;
|
||||
}
|
||||
|
||||
__m512h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vdiv512 (__m512h a, __m512h b)
|
||||
{
|
||||
return a / b;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
|
75
gcc/testsuite/gcc.target/i386/avx512fp16-11b.c
Normal file
75
gcc/testsuite/gcc.target/i386/avx512fp16-11b.c
Normal file
@ -0,0 +1,75 @@
|
||||
/* { dg-do run { target avx512fp16 } } */
|
||||
/* { dg-options "-O2 -mavx512fp16 -mfpmath=sse" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
static void do_test (void);
|
||||
|
||||
#define DO_TEST do_test
|
||||
#define AVX512FP16
|
||||
#include "avx512-check.h"
|
||||
#include "avx512fp16-11a.c"
|
||||
|
||||
/* Get random float16 between -50.x to 50.x. */
|
||||
_Float16
|
||||
get_float16_noround()
|
||||
{
|
||||
return ((int) (100.0 * rand ()/ (RAND_MAX + 1.0)) - 50)
|
||||
+ 0.1f * (int) (10 * rand() / (RAND_MAX + 1.0));
|
||||
}
|
||||
|
||||
static void
|
||||
do_test (void)
|
||||
{
|
||||
_Float16 x[32];
|
||||
_Float16 y[32];
|
||||
_Float16 res_add[32];
|
||||
_Float16 res_sub[32];
|
||||
_Float16 res_mul[32];
|
||||
_Float16 res_div[32];
|
||||
for (int i = 0 ; i != 32; i++)
|
||||
{
|
||||
x[i] = get_float16_noround ();
|
||||
y[i] = get_float16_noround ();
|
||||
if (y[i] == 0)
|
||||
y[i] = 1.0f;
|
||||
res_add[i] = x[i] + y[i];
|
||||
res_sub[i] = x[i] - y[i];
|
||||
res_mul[i] = x[i] * y[i];
|
||||
res_div[i] = x[i] / y[i];
|
||||
|
||||
}
|
||||
|
||||
union512h u512 = { x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
|
||||
x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15],
|
||||
x[16], x[17], x[18], x[19], x[20], x[21], x[22], x[23],
|
||||
x[24], x[25], x[26], x[27], x[28], x[29], x[30], x[31] };
|
||||
union512h u512_1 = {y[0], y[1], y[2], y[3], y[4], y[5], y[6], y[7],
|
||||
y[8], y[9], y[10], y[11], y[12], y[13], y[14], y[15],
|
||||
y[16], y[17], y[18], y[19], y[20], y[21], y[22], y[23],
|
||||
y[24], y[25], y[26], y[27], y[28], y[29], y[30], y[31] };
|
||||
|
||||
__m512h v512;
|
||||
union512h a512;
|
||||
|
||||
memset (&v512, -1, sizeof (v512));
|
||||
v512 = vadd512 (u512.x, u512_1.x);
|
||||
a512.x = v512;
|
||||
if (check_union512h (a512, res_add))
|
||||
abort ();
|
||||
memset (&v512, -1, sizeof (v512));
|
||||
v512 = vsub512 (u512.x, u512_1.x);
|
||||
a512.x = v512;
|
||||
if (check_union512h (a512, res_sub))
|
||||
abort ();
|
||||
memset (&v512, -1, sizeof (v512));
|
||||
v512 = vmul512 (u512.x, u512_1.x);
|
||||
a512.x = v512;
|
||||
if (check_union512h (a512, res_mul))
|
||||
abort ();
|
||||
memset (&v512, -1, sizeof (v512));
|
||||
v512 = vdiv512 (u512.x, u512_1.x);
|
||||
a512.x = v512;
|
||||
if (check_union512h (a512, res_div))
|
||||
abort ();
|
||||
}
|
68
gcc/testsuite/gcc.target/i386/avx512vlfp16-11a.c
Normal file
68
gcc/testsuite/gcc.target/i386/avx512vlfp16-11a.c
Normal file
@ -0,0 +1,68 @@
|
||||
/* { dg-do compile} */
|
||||
/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
|
||||
|
||||
#include <immintrin.h>
|
||||
__m128h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vadd128 (__m128h a, __m128h b)
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
|
||||
__m256h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vadd256 (__m256h a, __m256h b)
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
|
||||
__m128h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vsub128 (__m128h a, __m128h b)
|
||||
{
|
||||
return a - b;
|
||||
}
|
||||
|
||||
__m256h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vsub256 (__m256h a, __m256h b)
|
||||
{
|
||||
return a - b;
|
||||
}
|
||||
|
||||
__m128h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vmul128 (__m128h a, __m128h b)
|
||||
{
|
||||
return a * b;
|
||||
}
|
||||
|
||||
__m256h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vmul256 (__m256h a, __m256h b)
|
||||
{
|
||||
return a * b;
|
||||
}
|
||||
|
||||
__m128h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vdiv128 (__m128h a, __m128h b)
|
||||
{
|
||||
return a / b;
|
||||
}
|
||||
|
||||
__m256h
|
||||
__attribute__ ((noinline, noclone))
|
||||
vdiv256 (__m256h a, __m256h b)
|
||||
{
|
||||
return a / b;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
|
96
gcc/testsuite/gcc.target/i386/avx512vlfp16-11b.c
Normal file
96
gcc/testsuite/gcc.target/i386/avx512vlfp16-11b.c
Normal file
@ -0,0 +1,96 @@
|
||||
/* { dg-do run { target avx512fp16 } } */
|
||||
/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
static void do_test (void);
|
||||
|
||||
#define DO_TEST do_test
|
||||
#define AVX512FP16
|
||||
#include "avx512-check.h"
|
||||
#include "avx512vlfp16-11a.c"
|
||||
|
||||
/* Get random float16 between -50.x to 50.x. */
|
||||
_Float16
|
||||
get_float16_noround()
|
||||
{
|
||||
return ((int) (100.0 * rand ()/ (RAND_MAX + 1.0)) - 50)
|
||||
+ 0.1f * (int) (10 * rand() / (RAND_MAX + 1.0));
|
||||
}
|
||||
|
||||
static void
|
||||
do_test (void)
|
||||
{
|
||||
_Float16 x[16];
|
||||
_Float16 y[16];
|
||||
_Float16 res_add[16];
|
||||
_Float16 res_sub[16];
|
||||
_Float16 res_mul[16];
|
||||
_Float16 res_div[16];
|
||||
for (int i = 0 ; i != 16; i++)
|
||||
{
|
||||
x[i] = get_float16_noround ();
|
||||
y[i] = get_float16_noround ();
|
||||
if (y[i] == 0)
|
||||
y[i] = 1.0f;
|
||||
res_add[i] = x[i] + y[i];
|
||||
res_sub[i] = x[i] - y[i];
|
||||
res_mul[i] = x[i] * y[i];
|
||||
res_div[i] = x[i] / y[i];
|
||||
|
||||
}
|
||||
|
||||
union128h u128 = { x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7] };
|
||||
union128h u128_1 = { y[0], y[1], y[2], y[3], y[4], y[5], y[6], y[7] };
|
||||
union256h u256 = { x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
|
||||
x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15] };
|
||||
union256h u256_1 = { y[0], y[1], y[2], y[3], y[4], y[5], y[6], y[7],
|
||||
y[8], y[9], y[10], y[11], y[12], y[13], y[14], y[15]};
|
||||
|
||||
__m128h v128;
|
||||
__m256h v256;
|
||||
union128h a128;
|
||||
union256h a256;
|
||||
|
||||
memset (&v128, -1, sizeof (v128));
|
||||
v128 = vadd128 (u128.x, u128_1.x);
|
||||
a128.x = v128;
|
||||
if (check_union128h (a128, res_add))
|
||||
abort ();
|
||||
memset (&v128, -1, sizeof (v128));
|
||||
v128 = vsub128 (u128.x, u128_1.x);
|
||||
a128.x = v128;
|
||||
if (check_union128h (a128, res_sub))
|
||||
abort ();
|
||||
memset (&v128, -1, sizeof (v128));
|
||||
v128 = vmul128 (u128.x, u128_1.x);
|
||||
a128.x = v128;
|
||||
if (check_union128h (a128, res_mul))
|
||||
abort ();
|
||||
memset (&v128, -1, sizeof (v128));
|
||||
v128 = vdiv128 (u128.x, u128_1.x);
|
||||
a128.x = v128;
|
||||
if (check_union128h (a128, res_div))
|
||||
abort ();
|
||||
|
||||
memset (&v256, -1, sizeof (v256));
|
||||
v256 = vadd256 (u256.x, u256_1.x);
|
||||
a256.x = v256;
|
||||
if (check_union256h (a256, res_add))
|
||||
abort ();
|
||||
memset (&v256, -1, sizeof (v256));
|
||||
v256 = vsub256 (u256.x, u256_1.x);
|
||||
a256.x = v256;
|
||||
if (check_union256h (a256, res_sub))
|
||||
abort ();
|
||||
memset (&v256, -1, sizeof (v256));
|
||||
v256 = vmul256 (u256.x, u256_1.x);
|
||||
a256.x = v256;
|
||||
if (check_union256h (a256, res_mul))
|
||||
abort ();
|
||||
memset (&v256, -1, sizeof (v256));
|
||||
v256 = vdiv256 (u256.x, u256_1.x);
|
||||
a256.x = v256;
|
||||
if (check_union256h (a256, res_div))
|
||||
abort ();
|
||||
}
|
@ -702,6 +702,12 @@
|
||||
#define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1)
|
||||
#define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
|
||||
|
||||
/* avx512fp16intrin.h */
|
||||
#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
|
||||
|
||||
/* vpclmulqdqintrin.h */
|
||||
#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1)
|
||||
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||||
|
@ -667,6 +667,20 @@ test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 8)
|
||||
test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 8)
|
||||
test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 8)
|
||||
|
||||
/* avx512fp16intrin.h */
|
||||
test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm512_div_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_div_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_div_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
|
||||
/* shaintrin.h */
|
||||
test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)
|
||||
|
||||
|
@ -772,6 +772,20 @@ test_2 (_mm_rcp28_round_ss, __m128, __m128, __m128, 8)
|
||||
test_2 (_mm_rsqrt28_round_sd, __m128d, __m128d, __m128d, 8)
|
||||
test_2 (_mm_rsqrt28_round_ss, __m128, __m128, __m128, 8)
|
||||
|
||||
/* avx512fp16intrin.h */
|
||||
test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_2 (_mm512_div_round_ph, __m512h, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_3 (_mm512_maskz_div_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
test_4 (_mm512_mask_div_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
|
||||
|
||||
/* shaintrin.h */
|
||||
test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)
|
||||
|
||||
|
@ -703,6 +703,12 @@
|
||||
#define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1)
|
||||
#define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
|
||||
|
||||
/* avx512fp16intrin.h */
|
||||
#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
|
||||
#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
|
||||
|
||||
/* vpclmulqdqintrin.h */
|
||||
#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1)
|
||||
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||||
|
Loading…
Reference in New Issue
Block a user