AVX512FP16: Add vaddph/vsubph/vdivph/vmulph.

gcc/ChangeLog:

	* config.gcc: Add avx512fp16vlintrin.h.
	* config/i386/avx512fp16intrin.h: (_mm512_add_ph): New intrinsic.
	(_mm512_mask_add_ph): Likewise.
	(_mm512_maskz_add_ph): Likewise.
	(_mm512_sub_ph): Likewise.
	(_mm512_mask_sub_ph): Likewise.
	(_mm512_maskz_sub_ph): Likewise.
	(_mm512_mul_ph): Likewise.
	(_mm512_mask_mul_ph): Likewise.
	(_mm512_maskz_mul_ph): Likewise.
	(_mm512_div_ph): Likewise.
	(_mm512_mask_div_ph): Likewise.
	(_mm512_maskz_div_ph): Likewise.
	(_mm512_add_round_ph): Likewise.
	(_mm512_mask_add_round_ph): Likewise.
	(_mm512_maskz_add_round_ph): Likewise.
	(_mm512_sub_round_ph): Likewise.
	(_mm512_mask_sub_round_ph): Likewise.
	(_mm512_maskz_sub_round_ph): Likewise.
	(_mm512_mul_round_ph): Likewise.
	(_mm512_mask_mul_round_ph): Likewise.
	(_mm512_maskz_mul_round_ph): Likewise.
	(_mm512_div_round_ph): Likewise.
	(_mm512_mask_div_round_ph): Likewise.
	(_mm512_maskz_div_round_ph): Likewise.
	* config/i386/avx512fp16vlintrin.h: New header.
	* config/i386/i386-builtin-types.def (V16HF, V8HF, V32HF):
	Add new builtin types.
	* config/i386/i386-builtin.def: Add corresponding builtins.
	* config/i386/i386-expand.c
	(ix86_expand_args_builtin): Handle new builtin types.
	(ix86_expand_round_builtin): Likewise.
	* config/i386/immintrin.h: Include avx512fp16vlintrin.h
	* config/i386/sse.md (VFH): New mode_iterator.
	(VF2H): Likewise.
	(avx512fmaskmode): Add HF vector modes.
	(avx512fmaskhalfmode): Likewise.
	(<plusminus_insn><mode>3<mask_name><round_name>): Adjust to for
	HF vector modes.
	(*<plusminus_insn><mode>3<mask_name><round_name>): Likewise.
	(mul<mode>3<mask_name><round_name>): Likewise.
	(*mul<mode>3<mask_name><round_name>): Likewise.
	(div<mode>3): Likewise.
	(<sse>_div<mode>3<mask_name><round_name>): Likewise.
	* config/i386/subst.md (SUBST_V): Add HF vector modes.
	(SUBST_A): Likewise.
	(round_mode512bit_condition): Adjust for V32HFmode.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx-1.c: Add -mavx512vl and test for new intrinsics.
	* gcc.target/i386/avx-2.c: Add -mavx512vl.
	* gcc.target/i386/avx512fp16-11a.c: New test.
	* gcc.target/i386/avx512fp16-11b.c: Ditto.
	* gcc.target/i386/avx512vlfp16-11a.c: Ditto.
	* gcc.target/i386/avx512vlfp16-11b.c: Ditto.
	* gcc.target/i386/sse-13.c: Add test for new builtins.
	* gcc.target/i386/sse-23.c: Ditto.
	* gcc.target/i386/sse-14.c: Add test for new intrinsics.
	* gcc.target/i386/sse-22.c: Ditto.
This commit is contained in:
liuhongt 2019-01-18 14:09:24 -08:00
parent 8f323c712e
commit bd7a34ef55
19 changed files with 871 additions and 27 deletions

View File

@ -416,7 +416,7 @@ i[34567]86-*-* | x86_64-*-*)
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
amxbf16intrin.h x86gprintrin.h uintrintrin.h
hresetintrin.h keylockerintrin.h avxvnniintrin.h
mwaitintrin.h avx512fp16intrin.h"
mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h

View File

@ -217,6 +217,257 @@ _mm_store_sh (void *__P, __m128h __A)
*(_Float16 *) __P = ((__v8hf)__A)[0];
}
/* Intrinsics v[add,sub,mul,div]ph. */
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_ph (__m512h __A, __m512h __B)
{
return (__m512h) ((__v32hf) __A + (__v32hf) __B);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
{
return __builtin_ia32_vaddph_v32hf_mask (__C, __D, __A, __B);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
{
return __builtin_ia32_vaddph_v32hf_mask (__B, __C,
_mm512_setzero_ph (), __A);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_ph (__m512h __A, __m512h __B)
{
return (__m512h) ((__v32hf) __A - (__v32hf) __B);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
{
return __builtin_ia32_vsubph_v32hf_mask (__C, __D, __A, __B);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
{
return __builtin_ia32_vsubph_v32hf_mask (__B, __C,
_mm512_setzero_ph (), __A);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_ph (__m512h __A, __m512h __B)
{
return (__m512h) ((__v32hf) __A * (__v32hf) __B);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
{
return __builtin_ia32_vmulph_v32hf_mask (__C, __D, __A, __B);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
{
return __builtin_ia32_vmulph_v32hf_mask (__B, __C,
_mm512_setzero_ph (), __A);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_ph (__m512h __A, __m512h __B)
{
return (__m512h) ((__v32hf) __A / (__v32hf) __B);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
{
return __builtin_ia32_vdivph_v32hf_mask (__C, __D, __A, __B);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
{
return __builtin_ia32_vdivph_v32hf_mask (__B, __C,
_mm512_setzero_ph (), __A);
}
#ifdef __OPTIMIZE__
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
{
return __builtin_ia32_vaddph_v32hf_mask_round (__A, __B,
_mm512_setzero_ph (),
(__mmask32) -1, __C);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
__m512h __D, const int __E)
{
return __builtin_ia32_vaddph_v32hf_mask_round (__C, __D, __A, __B, __E);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
const int __D)
{
return __builtin_ia32_vaddph_v32hf_mask_round (__B, __C,
_mm512_setzero_ph (),
__A, __D);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
{
return __builtin_ia32_vsubph_v32hf_mask_round (__A, __B,
_mm512_setzero_ph (),
(__mmask32) -1, __C);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
__m512h __D, const int __E)
{
return __builtin_ia32_vsubph_v32hf_mask_round (__C, __D, __A, __B, __E);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
const int __D)
{
return __builtin_ia32_vsubph_v32hf_mask_round (__B, __C,
_mm512_setzero_ph (),
__A, __D);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
{
return __builtin_ia32_vmulph_v32hf_mask_round (__A, __B,
_mm512_setzero_ph (),
(__mmask32) -1, __C);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
__m512h __D, const int __E)
{
return __builtin_ia32_vmulph_v32hf_mask_round (__C, __D, __A, __B, __E);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
const int __D)
{
return __builtin_ia32_vmulph_v32hf_mask_round (__B, __C,
_mm512_setzero_ph (),
__A, __D);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
{
return __builtin_ia32_vdivph_v32hf_mask_round (__A, __B,
_mm512_setzero_ph (),
(__mmask32) -1, __C);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
__m512h __D, const int __E)
{
return __builtin_ia32_vdivph_v32hf_mask_round (__C, __D, __A, __B, __E);
}
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
const int __D)
{
return __builtin_ia32_vdivph_v32hf_mask_round (__B, __C,
_mm512_setzero_ph (),
__A, __D);
}
#else
#define _mm512_add_round_ph(A, B, C) \
((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((A), (B), \
_mm512_setzero_ph (),\
(__mmask32)-1, (C)))
#define _mm512_mask_add_round_ph(A, B, C, D, E) \
((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((C), (D), (A), (B), (E)))
#define _mm512_maskz_add_round_ph(A, B, C, D) \
((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((B), (C), \
_mm512_setzero_ph (),\
(A), (D)))
#define _mm512_sub_round_ph(A, B, C) \
((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((A), (B), \
_mm512_setzero_ph (),\
(__mmask32)-1, (C)))
#define _mm512_mask_sub_round_ph(A, B, C, D, E) \
((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((C), (D), (A), (B), (E)))
#define _mm512_maskz_sub_round_ph(A, B, C, D) \
((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((B), (C), \
_mm512_setzero_ph (),\
(A), (D)))
#define _mm512_mul_round_ph(A, B, C) \
((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((A), (B), \
_mm512_setzero_ph (),\
(__mmask32)-1, (C)))
#define _mm512_mask_mul_round_ph(A, B, C, D, E) \
((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((C), (D), (A), (B), (E)))
#define _mm512_maskz_mul_round_ph(A, B, C, D) \
((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((B), (C), \
_mm512_setzero_ph (),\
(A), (D)))
#define _mm512_div_round_ph(A, B, C) \
((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((A), (B), \
_mm512_setzero_ph (),\
(__mmask32)-1, (C)))
#define _mm512_mask_div_round_ph(A, B, C, D, E) \
((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((C), (D), (A), (B), (E)))
#define _mm512_maskz_div_round_ph(A, B, C, D) \
((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((B), (C), \
_mm512_setzero_ph (),\
(A), (D)))
#endif /* __OPTIMIZE__ */
#ifdef __DISABLE_AVX512FP16__
#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options

View File

@ -0,0 +1,219 @@
/* Copyright (C) 2019 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512fp16vlintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512FP16VLINTRIN_H_INCLUDED
#define __AVX512FP16VLINTRIN_H_INCLUDED
#if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
#pragma GCC push_options
#pragma GCC target("avx512fp16,avx512vl")
#define __DISABLE_AVX512FP16VL__
#endif /* __AVX512FP16VL__ */
/* Intrinsics v[add,sub,mul,div]ph. */
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_ph (__m128h __A, __m128h __B)
{
return (__m128h) ((__v8hf) __A + (__v8hf) __B);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_ph (__m256h __A, __m256h __B)
{
return (__m256h) ((__v16hf) __A + (__v16hf) __B);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
{
return __builtin_ia32_vaddph_v8hf_mask (__C, __D, __A, __B);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
{
return __builtin_ia32_vaddph_v16hf_mask (__C, __D, __A, __B);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
{
return __builtin_ia32_vaddph_v8hf_mask (__B, __C, _mm_setzero_ph (),
__A);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
{
return __builtin_ia32_vaddph_v16hf_mask (__B, __C,
_mm256_setzero_ph (), __A);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_ph (__m128h __A, __m128h __B)
{
return (__m128h) ((__v8hf) __A - (__v8hf) __B);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_ph (__m256h __A, __m256h __B)
{
return (__m256h) ((__v16hf) __A - (__v16hf) __B);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
{
return __builtin_ia32_vsubph_v8hf_mask (__C, __D, __A, __B);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
{
return __builtin_ia32_vsubph_v16hf_mask (__C, __D, __A, __B);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
{
return __builtin_ia32_vsubph_v8hf_mask (__B, __C, _mm_setzero_ph (),
__A);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
{
return __builtin_ia32_vsubph_v16hf_mask (__B, __C,
_mm256_setzero_ph (), __A);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_ph (__m128h __A, __m128h __B)
{
return (__m128h) ((__v8hf) __A * (__v8hf) __B);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_ph (__m256h __A, __m256h __B)
{
return (__m256h) ((__v16hf) __A * (__v16hf) __B);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
{
return __builtin_ia32_vmulph_v8hf_mask (__C, __D, __A, __B);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
{
return __builtin_ia32_vmulph_v16hf_mask (__C, __D, __A, __B);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
{
return __builtin_ia32_vmulph_v8hf_mask (__B, __C, _mm_setzero_ph (),
__A);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
{
return __builtin_ia32_vmulph_v16hf_mask (__B, __C,
_mm256_setzero_ph (), __A);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_ph (__m128h __A, __m128h __B)
{
return (__m128h) ((__v8hf) __A / (__v8hf) __B);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_div_ph (__m256h __A, __m256h __B)
{
return (__m256h) ((__v16hf) __A / (__v16hf) __B);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
{
return __builtin_ia32_vdivph_v8hf_mask (__C, __D, __A, __B);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
{
return __builtin_ia32_vdivph_v16hf_mask (__C, __D, __A, __B);
}
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
{
return __builtin_ia32_vdivph_v8hf_mask (__B, __C, _mm_setzero_ph (),
__A);
}
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
{
return __builtin_ia32_vdivph_v16hf_mask (__B, __C,
_mm256_setzero_ph (), __A);
}
#ifdef __DISABLE_AVX512FP16VL__
#undef __DISABLE_AVX512FP16VL__
#pragma GCC pop_options
#endif /* __DISABLE_AVX512FP16VL__ */
#endif /* __AVX512FP16VLINTRIN_H_INCLUDED */

View File

@ -98,6 +98,7 @@ DEF_VECTOR_TYPE (V16UQI, UQI, V16QI)
# AVX vectors
DEF_VECTOR_TYPE (V4DF, DOUBLE)
DEF_VECTOR_TYPE (V8SF, FLOAT)
DEF_VECTOR_TYPE (V16HF, FLOAT16)
DEF_VECTOR_TYPE (V4DI, DI)
DEF_VECTOR_TYPE (V8SI, SI)
DEF_VECTOR_TYPE (V16HI, HI)
@ -108,6 +109,7 @@ DEF_VECTOR_TYPE (V16UHI, UHI, V16HI)
# AVX512F vectors
DEF_VECTOR_TYPE (V32SF, FLOAT)
DEF_VECTOR_TYPE (V32HF, FLOAT16)
DEF_VECTOR_TYPE (V16SF, FLOAT)
DEF_VECTOR_TYPE (V8DF, DOUBLE)
DEF_VECTOR_TYPE (V8DI, DI)
@ -1302,3 +1304,8 @@ DEF_FUNCTION_TYPE (UINT8, PV2DI, PCV2DI, PCVOID)
# FP16 builtins
DEF_FUNCTION_TYPE (V8HF, V8HI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI)
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI)
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT)
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI)
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT)

View File

@ -2774,6 +2774,20 @@ BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf, "__builti
BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__builtin_ia32_dpbf16ps_v4sf_mask", IX86_BUILTIN_DPHI16PS_V4SF_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
/* AVX512FP16. */
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_vaddph_v8hf_mask", IX86_BUILTIN_VADDPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_vaddph_v16hf_mask", IX86_BUILTIN_VADDPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_vaddph_v32hf_mask", IX86_BUILTIN_VADDPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_vsubph_v8hf_mask", IX86_BUILTIN_VSUBPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_vsubph_v16hf_mask", IX86_BUILTIN_VSUBPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_vsubph_v32hf_mask", IX86_BUILTIN_VSUBPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_vmulph_v8hf_mask", IX86_BUILTIN_VMULPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_vmulph_v16hf_mask", IX86_BUILTIN_VMULPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_vmulph_v32hf_mask", IX86_BUILTIN_VMULPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_vdivph_v8hf_mask", IX86_BUILTIN_VDIVPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_vdivph_v16hf_mask", IX86_BUILTIN_VDIVPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_vdivph_v32hf_mask", IX86_BUILTIN_VDIVPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
@ -2973,6 +2987,12 @@ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fixuns_truncv8dfv8di2_mask_round, "
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
/* AVX512FP16. */
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_vaddph_v32hf_mask_round", IX86_BUILTIN_VADDPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_vsubph_v32hf_mask_round", IX86_BUILTIN_VSUBPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_vmulph_v32hf_mask_round", IX86_BUILTIN_VMULPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_vdivph_v32hf_mask_round", IX86_BUILTIN_VDIVPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC_END (ROUND_ARGS, MULTI_ARG)
/* FMA4 and XOP. */

View File

@ -10038,6 +10038,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
case V32HF_FTYPE_V32HF_V32HF_V32HF_USI:
case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
@ -10055,6 +10056,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
case V16HF_FTYPE_V16HF_V16HF_V16HF_UHI:
case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
@ -10062,6 +10064,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI:
case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
@ -10738,6 +10741,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case INT_FTYPE_V4SF_INT:
nargs = 2;
break;
case V32HF_FTYPE_V32HF_V32HF_INT:
case V4SF_FTYPE_V4SF_UINT_INT:
case V4SF_FTYPE_V4SF_UINT64_INT:
case V2DF_FTYPE_V2DF_UINT64_INT:
@ -10778,6 +10782,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
case V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT:
case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
case V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT:

View File

@ -96,6 +96,8 @@
#ifdef __SSE2__
#include <avx512fp16intrin.h>
#include <avx512fp16vlintrin.h>
#endif
#include <shaintrin.h>

View File

@ -298,6 +298,13 @@
[(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
(define_mode_iterator VFH
[(V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
;; 128- and 256-bit float vector modes
(define_mode_iterator VF_128_256
[(V8SF "TARGET_AVX") V4SF
@ -321,6 +328,13 @@
(define_mode_iterator VF2
[(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
;; All DFmode & HFmode vector float modes
(define_mode_iterator VF2H
[(V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
;; 128- and 256-bit DF vector modes
(define_mode_iterator VF2_128_256
[(V4DF "TARGET_AVX") V2DF])
@ -885,6 +899,7 @@
(V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
(V16SI "QI") (V8SI "QI") (V4SI "QI")
(V8DI "QI") (V4DI "QI") (V2DI "QI")
(V32HF "HI") (V16HF "QI") (V8HF "QI")
(V16SF "QI") (V8SF "QI") (V4SF "QI")
(V8DF "QI") (V4DF "QI") (V2DF "QI")])
@ -2032,18 +2047,18 @@
})
(define_expand "<insn><mode>3<mask_name><round_name>"
[(set (match_operand:VF 0 "register_operand")
(plusminus:VF
(match_operand:VF 1 "<round_nimm_predicate>")
(match_operand:VF 2 "<round_nimm_predicate>")))]
[(set (match_operand:VFH 0 "register_operand")
(plusminus:VFH
(match_operand:VFH 1 "<round_nimm_predicate>")
(match_operand:VFH 2 "<round_nimm_predicate>")))]
"TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<insn><mode>3<mask_name><round_name>"
[(set (match_operand:VF 0 "register_operand" "=x,v")
(plusminus:VF
(match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
(match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
[(set (match_operand:VFH 0 "register_operand" "=x,v")
(plusminus:VFH
(match_operand:VFH 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
(match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
"TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@ -2121,18 +2136,18 @@
})
(define_expand "mul<mode>3<mask_name><round_name>"
[(set (match_operand:VF 0 "register_operand")
(mult:VF
(match_operand:VF 1 "<round_nimm_predicate>")
(match_operand:VF 2 "<round_nimm_predicate>")))]
[(set (match_operand:VFH 0 "register_operand")
(mult:VFH
(match_operand:VFH 1 "<round_nimm_predicate>")
(match_operand:VFH 2 "<round_nimm_predicate>")))]
"TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
(define_insn "*mul<mode>3<mask_name><round_name>"
[(set (match_operand:VF 0 "register_operand" "=x,v")
(mult:VF
(match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
(match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
[(set (match_operand:VFH 0 "register_operand" "=x,v")
(mult:VFH
(match_operand:VFH 1 "<bcst_round_nimm_predicate>" "%0,v")
(match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
"TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
&& <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@ -2195,9 +2210,9 @@
(set_attr "mode" "<ssescalarmode>")])
(define_expand "div<mode>3"
[(set (match_operand:VF2 0 "register_operand")
(div:VF2 (match_operand:VF2 1 "register_operand")
(match_operand:VF2 2 "vector_operand")))]
[(set (match_operand:VF2H 0 "register_operand")
(div:VF2H (match_operand:VF2H 1 "register_operand")
(match_operand:VF2H 2 "vector_operand")))]
"TARGET_SSE2")
(define_expand "div<mode>3"
@ -2236,10 +2251,10 @@
})
(define_insn "<sse>_div<mode>3<mask_name><round_name>"
[(set (match_operand:VF 0 "register_operand" "=x,v")
(div:VF
(match_operand:VF 1 "register_operand" "0,v")
(match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
[(set (match_operand:VFH 0 "register_operand" "=x,v")
(div:VFH
(match_operand:VFH 1 "register_operand" "0,v")
(match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
"TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
div<ssemodesuffix>\t{%2, %0|%0, %2}

View File

@ -24,6 +24,7 @@
V32HI V16HI V8HI
V16SI V8SI V4SI
V8DI V4DI V2DI
V32HF V16HF V8HF
V16SF V8SF V4SF
V8DF V4DF V2DF])
@ -35,6 +36,7 @@
V32HI V16HI V8HI
V16SI V8SI V4SI
V8DI V4DI V2DI
V32HF V16HF V8HF
V16SF V8SF V4SF
V8DF V4DF V2DF
QI HI SI DI SF DF])
@ -161,7 +163,9 @@
(define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode
|| <MODE>mode == V8DFmode
|| <MODE>mode == V8DImode
|| <MODE>mode == V16SImode)")
|| <MODE>mode == V16SImode
|| <MODE>mode == V32HFmode)")
(define_subst_attr "round_modev8sf_condition" "round" "1" "(<MODE>mode == V8SFmode)")
(define_subst_attr "round_modev4sf_condition" "round" "1" "(<MODE>mode == V4SFmode)")
(define_subst_attr "round_codefor" "round" "*" "")

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16" } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16 -mavx512vl" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@ -685,6 +685,12 @@
#define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1)
#define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
/* avx512fp16intrin.h */
#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
/* vpclmulqdqintrin.h */
#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16" } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16 -mavx512vl" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>

View File

@ -0,0 +1,36 @@
/* { dg-do compile} */
/* { dg-options "-O2 -mavx512fp16" } */
#include <immintrin.h>
__m512h
__attribute__ ((noinline, noclone))
vadd512 (__m512h a, __m512h b)
{
return a + b;
}
__m512h
__attribute__ ((noinline, noclone))
vsub512 (__m512h a, __m512h b)
{
return a - b;
}
__m512h
__attribute__ ((noinline, noclone))
vmul512 (__m512h a, __m512h b)
{
return a * b;
}
__m512h
__attribute__ ((noinline, noclone))
vdiv512 (__m512h a, __m512h b)
{
return a / b;
}
/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */

View File

@ -0,0 +1,75 @@
/* { dg-do run { target avx512fp16 } } */
/* { dg-options "-O2 -mavx512fp16 -mfpmath=sse" } */
#include <string.h>
#include <stdlib.h>
static void do_test (void);
#define DO_TEST do_test
#define AVX512FP16
#include "avx512-check.h"
#include "avx512fp16-11a.c"
/* Get random float16 between -50.x to 50.x. */
_Float16
get_float16_noround()
{
return ((int) (100.0 * rand ()/ (RAND_MAX + 1.0)) - 50)
+ 0.1f * (int) (10 * rand() / (RAND_MAX + 1.0));
}
static void
do_test (void)
{
_Float16 x[32];
_Float16 y[32];
_Float16 res_add[32];
_Float16 res_sub[32];
_Float16 res_mul[32];
_Float16 res_div[32];
for (int i = 0 ; i != 32; i++)
{
x[i] = get_float16_noround ();
y[i] = get_float16_noround ();
if (y[i] == 0)
y[i] = 1.0f;
res_add[i] = x[i] + y[i];
res_sub[i] = x[i] - y[i];
res_mul[i] = x[i] * y[i];
res_div[i] = x[i] / y[i];
}
union512h u512 = { x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15],
x[16], x[17], x[18], x[19], x[20], x[21], x[22], x[23],
x[24], x[25], x[26], x[27], x[28], x[29], x[30], x[31] };
union512h u512_1 = {y[0], y[1], y[2], y[3], y[4], y[5], y[6], y[7],
y[8], y[9], y[10], y[11], y[12], y[13], y[14], y[15],
y[16], y[17], y[18], y[19], y[20], y[21], y[22], y[23],
y[24], y[25], y[26], y[27], y[28], y[29], y[30], y[31] };
__m512h v512;
union512h a512;
memset (&v512, -1, sizeof (v512));
v512 = vadd512 (u512.x, u512_1.x);
a512.x = v512;
if (check_union512h (a512, res_add))
abort ();
memset (&v512, -1, sizeof (v512));
v512 = vsub512 (u512.x, u512_1.x);
a512.x = v512;
if (check_union512h (a512, res_sub))
abort ();
memset (&v512, -1, sizeof (v512));
v512 = vmul512 (u512.x, u512_1.x);
a512.x = v512;
if (check_union512h (a512, res_mul))
abort ();
memset (&v512, -1, sizeof (v512));
v512 = vdiv512 (u512.x, u512_1.x);
a512.x = v512;
if (check_union512h (a512, res_div))
abort ();
}

View File

@ -0,0 +1,68 @@
/* { dg-do compile} */
/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
#include <immintrin.h>
__m128h
__attribute__ ((noinline, noclone))
vadd128 (__m128h a, __m128h b)
{
return a + b;
}
__m256h
__attribute__ ((noinline, noclone))
vadd256 (__m256h a, __m256h b)
{
return a + b;
}
__m128h
__attribute__ ((noinline, noclone))
vsub128 (__m128h a, __m128h b)
{
return a - b;
}
__m256h
__attribute__ ((noinline, noclone))
vsub256 (__m256h a, __m256h b)
{
return a - b;
}
__m128h
__attribute__ ((noinline, noclone))
vmul128 (__m128h a, __m128h b)
{
return a * b;
}
__m256h
__attribute__ ((noinline, noclone))
vmul256 (__m256h a, __m256h b)
{
return a * b;
}
__m128h
__attribute__ ((noinline, noclone))
vdiv128 (__m128h a, __m128h b)
{
return a / b;
}
__m256h
__attribute__ ((noinline, noclone))
vdiv256 (__m256h a, __m256h b)
{
return a / b;
}
/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */

View File

@ -0,0 +1,96 @@
/* { dg-do run { target avx512fp16 } } */
/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
#include <string.h>
#include <stdlib.h>
static void do_test (void);
#define DO_TEST do_test
#define AVX512FP16
#include "avx512-check.h"
#include "avx512vlfp16-11a.c"
/* Get random float16 between -50.x to 50.x. */
_Float16
get_float16_noround()
{
return ((int) (100.0 * rand ()/ (RAND_MAX + 1.0)) - 50)
+ 0.1f * (int) (10 * rand() / (RAND_MAX + 1.0));
}
static void
do_test (void)
{
_Float16 x[16];
_Float16 y[16];
_Float16 res_add[16];
_Float16 res_sub[16];
_Float16 res_mul[16];
_Float16 res_div[16];
for (int i = 0 ; i != 16; i++)
{
x[i] = get_float16_noround ();
y[i] = get_float16_noround ();
if (y[i] == 0)
y[i] = 1.0f;
res_add[i] = x[i] + y[i];
res_sub[i] = x[i] - y[i];
res_mul[i] = x[i] * y[i];
res_div[i] = x[i] / y[i];
}
union128h u128 = { x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7] };
union128h u128_1 = { y[0], y[1], y[2], y[3], y[4], y[5], y[6], y[7] };
union256h u256 = { x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15] };
union256h u256_1 = { y[0], y[1], y[2], y[3], y[4], y[5], y[6], y[7],
y[8], y[9], y[10], y[11], y[12], y[13], y[14], y[15]};
__m128h v128;
__m256h v256;
union128h a128;
union256h a256;
memset (&v128, -1, sizeof (v128));
v128 = vadd128 (u128.x, u128_1.x);
a128.x = v128;
if (check_union128h (a128, res_add))
abort ();
memset (&v128, -1, sizeof (v128));
v128 = vsub128 (u128.x, u128_1.x);
a128.x = v128;
if (check_union128h (a128, res_sub))
abort ();
memset (&v128, -1, sizeof (v128));
v128 = vmul128 (u128.x, u128_1.x);
a128.x = v128;
if (check_union128h (a128, res_mul))
abort ();
memset (&v128, -1, sizeof (v128));
v128 = vdiv128 (u128.x, u128_1.x);
a128.x = v128;
if (check_union128h (a128, res_div))
abort ();
memset (&v256, -1, sizeof (v256));
v256 = vadd256 (u256.x, u256_1.x);
a256.x = v256;
if (check_union256h (a256, res_add))
abort ();
memset (&v256, -1, sizeof (v256));
v256 = vsub256 (u256.x, u256_1.x);
a256.x = v256;
if (check_union256h (a256, res_sub))
abort ();
memset (&v256, -1, sizeof (v256));
v256 = vmul256 (u256.x, u256_1.x);
a256.x = v256;
if (check_union256h (a256, res_mul))
abort ();
memset (&v256, -1, sizeof (v256));
v256 = vdiv256 (u256.x, u256_1.x);
a256.x = v256;
if (check_union256h (a256, res_div))
abort ();
}

View File

@ -702,6 +702,12 @@
#define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1)
#define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
/* avx512fp16intrin.h */
#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
/* vpclmulqdqintrin.h */
#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)

View File

@ -667,6 +667,20 @@ test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 8)
test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 8)
test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 8)
/* avx512fp16intrin.h */
test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm512_div_round_ph, __m512h, __m512h, __m512h, 8)
test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm512_maskz_div_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_div_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
/* shaintrin.h */
test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)

View File

@ -772,6 +772,20 @@ test_2 (_mm_rcp28_round_ss, __m128, __m128, __m128, 8)
test_2 (_mm_rsqrt28_round_sd, __m128d, __m128d, __m128d, 8)
test_2 (_mm_rsqrt28_round_ss, __m128, __m128, __m128, 8)
/* avx512fp16intrin.h */
test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8)
test_2 (_mm512_div_round_ph, __m512h, __m512h, __m512h, 8)
test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3 (_mm512_maskz_div_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
test_4 (_mm512_mask_div_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
/* shaintrin.h */
test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)

View File

@ -703,6 +703,12 @@
#define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1)
#define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
/* avx512fp16intrin.h */
#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
/* vpclmulqdqintrin.h */
#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)