arm: Add vstN_lane_bf16 + vstNq_lane_bf16 intrisics
gcc/ChangeLog 2020-10-29 Andrea Corallo <andrea.corallo@arm.com> * config/arm/arm_neon.h (vst2_lane_bf16, vst2q_lane_bf16) (vst3_lane_bf16, vst3q_lane_bf16, vst4_lane_bf16) (vst4q_lane_bf16): New intrinsics. * config/arm/arm_neon_builtins.def: Touch it for: __builtin_neon_vst2_lanev4bf, __builtin_neon_vst2_lanev8bf, __builtin_neon_vst3_lanev4bf, __builtin_neon_vst3_lanev8bf, __builtin_neon_vst4_lanev4bf,__builtin_neon_vst4_lanev8bf. gcc/testsuite/ChangeLog 2020-10-29 Andrea Corallo <andrea.corallo@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_bf16_indices_1.c: Run it also for arm-*-*. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_bf16_indices_1.c: Likewise. * gcc.target/arm/simd/vstn_lane_bf16_1.c: New test.
This commit is contained in:
parent
1528f34341
commit
ed62f3668b
@ -19783,6 +19783,54 @@ vld4q_lane_bf16 (const bfloat16_t * __a, bfloat16x8x4_t __b, const int __c)
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst2_lane_bf16 (bfloat16_t * __a, bfloat16x4x2_t __b, const int __c)
|
||||
{
|
||||
union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
|
||||
__builtin_neon_vst2_lanev4bf (__a, __bu.__o, __c);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst2q_lane_bf16 (bfloat16_t * __a, bfloat16x8x2_t __b, const int __c)
|
||||
{
|
||||
union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
|
||||
__builtin_neon_vst2_lanev8bf (__a, __bu.__o, __c);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst3_lane_bf16 (bfloat16_t * __a, bfloat16x4x3_t __b, const int __c)
|
||||
{
|
||||
union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
|
||||
__builtin_neon_vst3_lanev4bf (__a, __bu.__o, __c);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst3q_lane_bf16 (bfloat16_t * __a, bfloat16x8x3_t __b, const int __c)
|
||||
{
|
||||
union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
|
||||
__builtin_neon_vst3_lanev8bf (__a, __bu.__o, __c);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst4_lane_bf16 (bfloat16_t * __a, bfloat16x4x4_t __b, const int __c)
|
||||
{
|
||||
union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
|
||||
__builtin_neon_vst4_lanev4bf (__a, __bu.__o, __c);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst4q_lane_bf16 (bfloat16_t * __a, bfloat16x8x4_t __b, const int __c)
|
||||
{
|
||||
union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
|
||||
__builtin_neon_vst4_lanev8bf (__a, __bu.__o, __c);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -329,8 +329,8 @@ VAR11 (LOAD1LANE, vld2_lane,
|
||||
VAR8 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
|
||||
VAR13 (STORE1, vst2,
|
||||
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
|
||||
VAR9 (STORE1LANE, vst2_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
|
||||
VAR11 (STORE1LANE, vst2_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
|
||||
VAR13 (LOAD1, vld3,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
|
||||
VAR11 (LOAD1LANE, vld3_lane,
|
||||
@ -338,8 +338,8 @@ VAR11 (LOAD1LANE, vld3_lane,
|
||||
VAR8 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
|
||||
VAR13 (STORE1, vst3,
|
||||
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
|
||||
VAR9 (STORE1LANE, vst3_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
|
||||
VAR11 (STORE1LANE, vst3_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
|
||||
VAR13 (LOAD1, vld4,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
|
||||
VAR11 (LOAD1LANE, vld4_lane,
|
||||
@ -347,8 +347,8 @@ VAR11 (LOAD1LANE, vld4_lane,
|
||||
VAR8 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
|
||||
VAR13 (STORE1, vst4,
|
||||
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
|
||||
VAR9 (STORE1LANE, vst4_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
|
||||
VAR11 (STORE1LANE, vst4_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
|
||||
VAR2 (TERNOP, sdot, v8qi, v16qi)
|
||||
VAR2 (UTERNOP, udot, v8qi, v16qi)
|
||||
VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi)
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
73
gcc/testsuite/gcc.target/arm/simd/vstn_lane_bf16_1.c
Normal file
73
gcc/testsuite/gcc.target/arm/simd/vstn_lane_bf16_1.c
Normal file
@ -0,0 +1,73 @@
|
||||
/* { dg-do assemble } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
/*
|
||||
**test_vst2_lane_bf16:
|
||||
** vst2.16 {d0\[2\], d1\[2\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
void
|
||||
test_vst2_lane_bf16 (bfloat16_t *a, bfloat16x4x2_t b)
|
||||
{
|
||||
return vst2_lane_bf16 (a, b, 2);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vst2q_lane_bf16:
|
||||
** vst2.16 {d0\[2\], d2\[2\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
void
|
||||
test_vst2q_lane_bf16 (bfloat16_t *a, bfloat16x8x2_t b)
|
||||
{
|
||||
return vst2q_lane_bf16 (a, b, 2);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vst3_lane_bf16:
|
||||
** vst3.16 {d0\[2\], d1\[2\], d2\[2\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
void
|
||||
test_vst3_lane_bf16 (bfloat16_t *a, bfloat16x4x3_t b)
|
||||
{
|
||||
return vst3_lane_bf16 (a, b, 2);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vst3q_lane_bf16:
|
||||
** vst3.16 {d0\[2\], d2\[2\], d4\[2\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
void
|
||||
test_vst3q_lane_bf16 (bfloat16_t *a, bfloat16x8x3_t b)
|
||||
{
|
||||
return vst3q_lane_bf16 (a, b, 2);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vst4_lane_bf16:
|
||||
** vst4.16 {d0\[2\], d1\[2\], d2\[2\], d3\[2\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
void
|
||||
test_vst4_lane_bf16 (bfloat16_t *a, bfloat16x4x4_t b)
|
||||
{
|
||||
return vst4_lane_bf16 (a, b, 2);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vst4q_lane_bf16:
|
||||
** vst4.16 {d0\[2\], d2\[2\], d4\[2\], d6\[2\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
void
|
||||
test_vst4q_lane_bf16 (bfloat16_t *a, bfloat16x8x4_t b)
|
||||
{
|
||||
return vst4q_lane_bf16 (a, b, 2);
|
||||
}
|
Loading…
Reference in New Issue
Block a user