aarch64: Remove macros for vld3[q]_lane Neon intrinsics
Remove macros for vld3[q]_lane Neon intrinsics. This is a preparatory step before adding new modes for structures of Advanced SIMD vectors. gcc/ChangeLog: 2021-08-16 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/arm_neon.h (__LD3_LANE_FUNC): Delete. (__LD3Q_LANE_FUNC): Delete. (vld3_lane_u8): Define without macro. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise.
This commit is contained in:
parent
5ed35a9874
commit
08f83812e5
@ -20334,100 +20334,525 @@ vld2q_lane_p64 (const poly64_t * __ptr, poly64x2x2_t __b, const int __c)
|
|||||||
|
|
||||||
/* vld3_lane */
|
/* vld3_lane */
|
||||||
|
|
||||||
#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \
|
__extension__ extern __inline uint8x8x3_t
|
||||||
qmode, ptrmode, funcsuffix, signedtype) \
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
__extension__ extern __inline intype \
|
vld3_lane_u8 (const uint8_t * __ptr, uint8x8x3_t __b, const int __c)
|
||||||
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
|
{
|
||||||
vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
|
__builtin_aarch64_simd_ci __o;
|
||||||
{ \
|
uint8x16x3_t __temp;
|
||||||
__builtin_aarch64_simd_ci __o; \
|
__temp.val[0] = vcombine_u8 (__b.val[0], vcreate_u8 (0));
|
||||||
largetype __temp; \
|
__temp.val[1] = vcombine_u8 (__b.val[1], vcreate_u8 (0));
|
||||||
__temp.val[0] = \
|
__temp.val[2] = vcombine_u8 (__b.val[2], vcreate_u8 (0));
|
||||||
vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
|
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
|
||||||
__temp.val[1] = \
|
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
|
||||||
vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
|
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
|
||||||
__temp.val[2] = \
|
__o = __builtin_aarch64_ld3_lanev8qi (
|
||||||
vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
|
(__builtin_aarch64_simd_qi *) __ptr, __o, __c);
|
||||||
__o = __builtin_aarch64_set_qregci##qmode (__o, \
|
__b.val[0] = (uint8x8_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
(signedtype) __temp.val[0], \
|
__b.val[1] = (uint8x8_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
0); \
|
__b.val[2] = (uint8x8_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
__o = __builtin_aarch64_set_qregci##qmode (__o, \
|
return __b;
|
||||||
(signedtype) __temp.val[1], \
|
|
||||||
1); \
|
|
||||||
__o = __builtin_aarch64_set_qregci##qmode (__o, \
|
|
||||||
(signedtype) __temp.val[2], \
|
|
||||||
2); \
|
|
||||||
__o = __builtin_aarch64_ld3_lane##mode ( \
|
|
||||||
(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
|
|
||||||
__b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); \
|
|
||||||
__b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1); \
|
|
||||||
__b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2); \
|
|
||||||
return __b; \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf,
|
__extension__ extern __inline uint16x4x3_t
|
||||||
v8hf, hf, f16, float16x8_t)
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf,
|
vld3_lane_u16 (const uint16_t * __ptr, uint16x4x3_t __b, const int __c)
|
||||||
sf, f32, float32x4_t)
|
{
|
||||||
__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df,
|
__builtin_aarch64_simd_ci __o;
|
||||||
df, f64, float64x2_t)
|
uint16x8x3_t __temp;
|
||||||
__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
|
__temp.val[0] = vcombine_u16 (__b.val[0], vcreate_u16 (0));
|
||||||
int8x16_t)
|
__temp.val[1] = vcombine_u16 (__b.val[1], vcreate_u16 (0));
|
||||||
__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi,
|
__temp.val[2] = vcombine_u16 (__b.val[2], vcreate_u16 (0));
|
||||||
p16, int16x8_t)
|
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
|
||||||
__LD3_LANE_FUNC (poly64x1x3_t, poly64x1_t, poly64x2x3_t, poly64_t, di,
|
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
|
||||||
v2di_ssps, di, p64, poly64x2_t)
|
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
|
||||||
__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
|
__o = __builtin_aarch64_ld3_lanev4hi (
|
||||||
int8x16_t)
|
(__builtin_aarch64_simd_hi *) __ptr, __o, __c);
|
||||||
__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
|
__b.val[0] = (uint16x4_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
int16x8_t)
|
__b.val[1] = (uint16x4_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
|
__b.val[2] = (uint16x4_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
int32x4_t)
|
return __b;
|
||||||
__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64,
|
}
|
||||||
int64x2_t)
|
|
||||||
__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
|
__extension__ extern __inline uint32x2x3_t
|
||||||
int8x16_t)
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi,
|
vld3_lane_u32 (const uint32_t * __ptr, uint32x2x3_t __b, const int __c)
|
||||||
u16, int16x8_t)
|
{
|
||||||
__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si,
|
__builtin_aarch64_simd_ci __o;
|
||||||
u32, int32x4_t)
|
uint32x4x3_t __temp;
|
||||||
__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
|
__temp.val[0] = vcombine_u32 (__b.val[0], vcreate_u32 (0));
|
||||||
u64, int64x2_t)
|
__temp.val[1] = vcombine_u32 (__b.val[1], vcreate_u32 (0));
|
||||||
|
__temp.val[2] = vcombine_u32 (__b.val[2], vcreate_u32 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev2si (
|
||||||
|
(__builtin_aarch64_simd_si *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (uint32x2_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (uint32x2_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (uint32x2_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline uint64x1x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_u64 (const uint64_t * __ptr, uint64x1x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
uint64x2x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_u64 (__b.val[0], vcreate_u64 (0));
|
||||||
|
__temp.val[1] = vcombine_u64 (__b.val[1], vcreate_u64 (0));
|
||||||
|
__temp.val[2] = vcombine_u64 (__b.val[2], vcreate_u64 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanedi (
|
||||||
|
(__builtin_aarch64_simd_di *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline int8x8x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_s8 (const int8_t * __ptr, int8x8x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
int8x16x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_s8 (__b.val[0], vcreate_s8 (0));
|
||||||
|
__temp.val[1] = vcombine_s8 (__b.val[1], vcreate_s8 (0));
|
||||||
|
__temp.val[2] = vcombine_s8 (__b.val[2], vcreate_s8 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev8qi (
|
||||||
|
(__builtin_aarch64_simd_qi *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (int8x8_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (int8x8_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (int8x8_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline int16x4x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_s16 (const int16_t * __ptr, int16x4x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
int16x8x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_s16 (__b.val[0], vcreate_s16 (0));
|
||||||
|
__temp.val[1] = vcombine_s16 (__b.val[1], vcreate_s16 (0));
|
||||||
|
__temp.val[2] = vcombine_s16 (__b.val[2], vcreate_s16 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev4hi (
|
||||||
|
(__builtin_aarch64_simd_hi *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (int16x4_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (int16x4_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (int16x4_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline int32x2x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_s32 (const int32_t * __ptr, int32x2x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
int32x4x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_s32 (__b.val[0], vcreate_s32 (0));
|
||||||
|
__temp.val[1] = vcombine_s32 (__b.val[1], vcreate_s32 (0));
|
||||||
|
__temp.val[2] = vcombine_s32 (__b.val[2], vcreate_s32 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev2si (
|
||||||
|
(__builtin_aarch64_simd_si *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (int32x2_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (int32x2_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (int32x2_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline int64x1x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_s64 (const int64_t * __ptr, int64x1x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
int64x2x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_s64 (__b.val[0], vcreate_s64 (0));
|
||||||
|
__temp.val[1] = vcombine_s64 (__b.val[1], vcreate_s64 (0));
|
||||||
|
__temp.val[2] = vcombine_s64 (__b.val[2], vcreate_s64 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanedi (
|
||||||
|
(__builtin_aarch64_simd_di *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_f16 (const float16_t * __ptr, float16x4x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
float16x8x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_f16 (__b.val[0], vcreate_f16 (0));
|
||||||
|
__temp.val[1] = vcombine_f16 (__b.val[1], vcreate_f16 (0));
|
||||||
|
__temp.val[2] = vcombine_f16 (__b.val[2], vcreate_f16 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev4hf (
|
||||||
|
(__builtin_aarch64_simd_hf *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (float16x4_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (float16x4_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (float16x4_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_f32 (const float32_t * __ptr, float32x2x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
float32x4x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_f32 (__b.val[0], vcreate_f32 (0));
|
||||||
|
__temp.val[1] = vcombine_f32 (__b.val[1], vcreate_f32 (0));
|
||||||
|
__temp.val[2] = vcombine_f32 (__b.val[2], vcreate_f32 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev2sf (
|
||||||
|
(__builtin_aarch64_simd_sf *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (float32x2_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (float32x2_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (float32x2_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float64x1x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_f64 (const float64_t * __ptr, float64x1x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
float64x2x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_f64 (__b.val[0], vcreate_f64 (0));
|
||||||
|
__temp.val[1] = vcombine_f64 (__b.val[1], vcreate_f64 (0));
|
||||||
|
__temp.val[2] = vcombine_f64 (__b.val[2], vcreate_f64 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanedi (
|
||||||
|
(__builtin_aarch64_simd_di *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline poly8x8x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_p8 (const poly8_t * __ptr, poly8x8x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
poly8x16x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_p8 (__b.val[0], vcreate_p8 (0));
|
||||||
|
__temp.val[1] = vcombine_p8 (__b.val[1], vcreate_p8 (0));
|
||||||
|
__temp.val[2] = vcombine_p8 (__b.val[2], vcreate_p8 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev8qi (
|
||||||
|
(__builtin_aarch64_simd_qi *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (poly8x8_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (poly8x8_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (poly8x8_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline poly16x4x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_p16 (const poly16_t * __ptr, poly16x4x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
poly16x8x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_p16 (__b.val[0], vcreate_p16 (0));
|
||||||
|
__temp.val[1] = vcombine_p16 (__b.val[1], vcreate_p16 (0));
|
||||||
|
__temp.val[2] = vcombine_p16 (__b.val[2], vcreate_p16 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev4hi (
|
||||||
|
(__builtin_aarch64_simd_hi *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (poly16x4_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (poly16x4_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (poly16x4_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline poly64x1x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3_lane_p64 (const poly64_t * __ptr, poly64x1x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
poly64x2x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_p64 (__b.val[0], vcreate_p64 (0));
|
||||||
|
__temp.val[1] = vcombine_p64 (__b.val[1], vcreate_p64 (0));
|
||||||
|
__temp.val[2] = vcombine_p64 (__b.val[2], vcreate_p64 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanedi (
|
||||||
|
(__builtin_aarch64_simd_di *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
/* vld3q_lane */
|
/* vld3q_lane */
|
||||||
|
|
||||||
#define __LD3Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
|
__extension__ extern __inline uint8x16x3_t
|
||||||
__extension__ extern __inline intype \
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
|
vld3q_lane_u8 (const uint8_t * __ptr, uint8x16x3_t __b, const int __c)
|
||||||
vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
|
{
|
||||||
{ \
|
__builtin_aarch64_simd_ci __o;
|
||||||
__builtin_aarch64_simd_ci __o; \
|
uint8x16x3_t ret;
|
||||||
intype ret; \
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \
|
__o = __builtin_aarch64_ld3_lanev16qi (
|
||||||
__o = __builtin_aarch64_ld3_lane##mode ( \
|
(__builtin_aarch64_simd_qi *) __ptr, __o, __c);
|
||||||
(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
|
ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0); \
|
ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1); \
|
ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2); \
|
return ret;
|
||||||
return ret; \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__LD3Q_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
|
__extension__ extern __inline uint16x8x3_t
|
||||||
__LD3Q_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
__LD3Q_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
|
vld3q_lane_u16 (const uint16_t * __ptr, uint16x8x3_t __b, const int __c)
|
||||||
__LD3Q_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
|
{
|
||||||
__LD3Q_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
|
__builtin_aarch64_simd_ci __o;
|
||||||
__LD3Q_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
|
uint16x8x3_t ret;
|
||||||
__LD3Q_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
__LD3Q_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
__LD3Q_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
__LD3Q_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
|
__o = __builtin_aarch64_ld3_lanev8hi (
|
||||||
__LD3Q_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
|
(__builtin_aarch64_simd_hi *) __ptr, __o, __c);
|
||||||
__LD3Q_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
|
ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
__LD3Q_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
|
ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
__LD3Q_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
|
ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline uint32x4x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_u32 (const uint32_t * __ptr, uint32x4x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
uint32x4x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev4si (
|
||||||
|
(__builtin_aarch64_simd_si *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline uint64x2x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_u64 (const uint64_t * __ptr, uint64x2x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
uint64x2x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev2di (
|
||||||
|
(__builtin_aarch64_simd_di *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline int8x16x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_s8 (const int8_t * __ptr, int8x16x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
int8x16x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev16qi (
|
||||||
|
(__builtin_aarch64_simd_qi *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline int16x8x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_s16 (const int16_t * __ptr, int16x8x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
int16x8x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev8hi (
|
||||||
|
(__builtin_aarch64_simd_hi *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline int32x4x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_s32 (const int32_t * __ptr, int32x4x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
int32x4x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev4si (
|
||||||
|
(__builtin_aarch64_simd_si *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline int64x2x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_s64 (const int64_t * __ptr, int64x2x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
int64x2x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev2di (
|
||||||
|
(__builtin_aarch64_simd_di *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_f16 (const float16_t * __ptr, float16x8x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
float16x8x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev8hf (
|
||||||
|
(__builtin_aarch64_simd_hf *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_f32 (const float32_t * __ptr, float32x4x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
float32x4x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev4sf (
|
||||||
|
(__builtin_aarch64_simd_sf *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float64x2x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_f64 (const float64_t * __ptr, float64x2x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
float64x2x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev2df (
|
||||||
|
(__builtin_aarch64_simd_df *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline poly8x16x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_p8 (const poly8_t * __ptr, poly8x16x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
poly8x16x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev16qi (
|
||||||
|
(__builtin_aarch64_simd_qi *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline poly16x8x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_p16 (const poly16_t * __ptr, poly16x8x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
poly16x8x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev8hi (
|
||||||
|
(__builtin_aarch64_simd_hi *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline poly64x2x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_p64 (const poly64_t * __ptr, poly64x2x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
poly64x2x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev2di (
|
||||||
|
(__builtin_aarch64_simd_di *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/* vld4_lane */
|
/* vld4_lane */
|
||||||
|
|
||||||
@ -34979,9 +35404,43 @@ vld2q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x2_t __b, const int __c)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
__LD3_LANE_FUNC (bfloat16x4x3_t, bfloat16x4_t, bfloat16x8x3_t, bfloat16_t, v4bf,
|
__extension__ extern __inline bfloat16x4x3_t
|
||||||
v8bf, bf, bf16, bfloat16x8_t)
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
__LD3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
|
vld3_lane_bf16 (const bfloat16_t * __ptr, bfloat16x4x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
bfloat16x8x3_t __temp;
|
||||||
|
__temp.val[0] = vcombine_bf16 (__b.val[0], vcreate_bf16 (0));
|
||||||
|
__temp.val[1] = vcombine_bf16 (__b.val[1], vcreate_bf16 (0));
|
||||||
|
__temp.val[2] = vcombine_bf16 (__b.val[2], vcreate_bf16 (0));
|
||||||
|
__o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev4bf (
|
||||||
|
(__builtin_aarch64_simd_bf *) __ptr, __o, __c);
|
||||||
|
__b.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregcidi (__o, 0);
|
||||||
|
__b.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregcidi (__o, 1);
|
||||||
|
__b.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregcidi (__o, 2);
|
||||||
|
return __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline bfloat16x8x3_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
|
||||||
|
vld3q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x3_t __b, const int __c)
|
||||||
|
{
|
||||||
|
__builtin_aarch64_simd_ci __o;
|
||||||
|
bfloat16x8x3_t ret;
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1);
|
||||||
|
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2);
|
||||||
|
__o = __builtin_aarch64_ld3_lanev8bf (
|
||||||
|
(__builtin_aarch64_simd_bf *) __ptr, __o, __c);
|
||||||
|
ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv4si (__o, 0);
|
||||||
|
ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv4si (__o, 1);
|
||||||
|
ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv4si (__o, 2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf,
|
__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf,
|
||||||
v8bf, bf, bf16, bfloat16x8_t)
|
v8bf, bf, bf16, bfloat16x8_t)
|
||||||
__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
|
__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
|
||||||
@ -35280,8 +35739,6 @@ vaddq_p128 (poly128_t __a, poly128_t __b)
|
|||||||
#undef __aarch64_vdupq_laneq_u32
|
#undef __aarch64_vdupq_laneq_u32
|
||||||
#undef __aarch64_vdupq_laneq_u64
|
#undef __aarch64_vdupq_laneq_u64
|
||||||
|
|
||||||
#undef __LD3_LANE_FUNC
|
|
||||||
#undef __LD3Q_LANE_FUNC
|
|
||||||
#undef __LD4_LANE_FUNC
|
#undef __LD4_LANE_FUNC
|
||||||
#undef __LD4Q_LANE_FUNC
|
#undef __LD4Q_LANE_FUNC
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user