aarch64: Use memcpy to copy vector tables in vst1[q]_x3 intrinsics
Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vst1[q]_x3 Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. Add new code generation tests to verify that superfluous move instructions are not generated for the vst1q_x3 intrinsics. gcc/ChangeLog: 2021-07-23 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/arm_neon.h (vst1_s64_x3): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_ci one vector at a time. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vector_structure_intrinsics.c: Add new tests.
This commit is contained in:
parent
085666673d
commit
ccf6e2c21b
@ -26619,9 +26619,7 @@ vst1_s64_x3 (int64_t * __a, int64x1x3_t __val)
|
||||
__temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
|
||||
__temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
|
||||
__temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26634,9 +26632,7 @@ vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __val)
|
||||
__temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26649,9 +26645,7 @@ vst1_f64_x3 (float64_t * __a, float64x1x3_t __val)
|
||||
__temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3df ((__builtin_aarch64_simd_df *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26664,9 +26658,7 @@ vst1_s8_x3 (int8_t * __a, int8x8x3_t __val)
|
||||
__temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
|
||||
__temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
|
||||
__temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26679,9 +26671,7 @@ vst1_p8_x3 (poly8_t * __a, poly8x8x3_t __val)
|
||||
__temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26694,9 +26684,7 @@ vst1_s16_x3 (int16_t * __a, int16x4x3_t __val)
|
||||
__temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
|
||||
__temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
|
||||
__temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26709,9 +26697,7 @@ vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __val)
|
||||
__temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26724,9 +26710,7 @@ vst1_s32_x3 (int32_t * __a, int32x2x3_t __val)
|
||||
__temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
|
||||
__temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
|
||||
__temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26739,9 +26723,7 @@ vst1_u8_x3 (uint8_t * __a, uint8x8x3_t __val)
|
||||
__temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26754,9 +26736,7 @@ vst1_u16_x3 (uint16_t * __a, uint16x4x3_t __val)
|
||||
__temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26769,9 +26749,7 @@ vst1_u32_x3 (uint32_t * __a, uint32x2x3_t __val)
|
||||
__temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26784,9 +26762,7 @@ vst1_f16_x3 (float16_t * __a, float16x4x3_t __val)
|
||||
__temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26799,9 +26775,7 @@ vst1_f32_x3 (float32_t * __a, float32x2x3_t __val)
|
||||
__temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26814,12 +26788,7 @@ vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __val)
|
||||
__temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
|
||||
__temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
|
||||
__o = __builtin_aarch64_set_qregciv2di_ssps (__o,
|
||||
(poly64x2_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv2di_ssps (__o,
|
||||
(poly64x2_t) __temp.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv2di_ssps (__o,
|
||||
(poly64x2_t) __temp.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
__builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26828,9 +26797,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_s8_x3 (int8_t * __a, int8x16x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26839,9 +26806,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26850,9 +26815,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_s16_x3 (int16_t * __a, int16x8x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26861,9 +26824,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26872,9 +26833,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_s32_x3 (int32_t * __a, int32x4x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26883,9 +26842,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_s64_x3 (int64_t * __a, int64x2x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26894,9 +26851,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26905,9 +26860,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26916,9 +26869,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26927,9 +26878,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26938,9 +26887,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_f16_x3 (float16_t * __a, float16x8x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26949,9 +26896,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_f32_x3 (float32_t * __a, float32x4x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26960,9 +26905,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_f64_x3 (float64_t * __a, float64x2x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v2df ((__builtin_aarch64_simd_df *) __a, __o);
|
||||
}
|
||||
|
||||
@ -26971,12 +26914,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __val)
|
||||
{
|
||||
__builtin_aarch64_simd_ci __o;
|
||||
__o = __builtin_aarch64_set_qregciv2di_ssps (__o,
|
||||
(poly64x2_t) __val.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregciv2di_ssps (__o,
|
||||
(poly64x2_t) __val.val[1], 1);
|
||||
__o = __builtin_aarch64_set_qregciv2di_ssps (__o,
|
||||
(poly64x2_t) __val.val[2], 2);
|
||||
__builtin_memcpy (&__o, &__val, sizeof (__val));
|
||||
__builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o);
|
||||
}
|
||||
|
||||
|
@ -160,6 +160,28 @@ TEST_ST1xN (vst1q, uint64x2x4_t, uint64_t*, u64, x4);
|
||||
TEST_ST1xN (vst1q, poly64x2x4_t, poly64_t*, p64, x4);
|
||||
TEST_ST1xN (vst1q, float64x2x4_t, float64_t*, f64, x4);
|
||||
|
||||
#define TEST_ST1x3(name, tbltype, ptrtype, ts, xn) \
|
||||
void test_ ## name ## _ ## ts ## _ ## xn (ptrtype a, int8x8_t dummy, \
|
||||
tbltype b) \
|
||||
{ \
|
||||
name ## _ ## ts ## _ ## xn (a, b); \
|
||||
}
|
||||
|
||||
TEST_ST1x3 (vst1q, int8x16x3_t, int8_t*, s8, x3);
|
||||
TEST_ST1x3 (vst1q, uint8x16x3_t, uint8_t*, u8, x3);
|
||||
TEST_ST1x3 (vst1q, poly8x16x3_t, poly8_t*, p8, x3);
|
||||
TEST_ST1x3 (vst1q, int16x8x3_t, int16_t*, s16, x3);
|
||||
TEST_ST1x3 (vst1q, uint16x8x3_t, uint16_t*, u16, x3);
|
||||
TEST_ST1x3 (vst1q, poly16x8x3_t, poly16_t*, p16, x3);
|
||||
TEST_ST1x3 (vst1q, float16x8x3_t, float16_t*, f16, x3);
|
||||
TEST_ST1x3 (vst1q, int32x4x3_t, int32_t*, s32, x3);
|
||||
TEST_ST1x3 (vst1q, uint32x4x3_t, uint32_t*, u32, x3);
|
||||
TEST_ST1x3 (vst1q, float32x4x3_t, float32_t*, f32, x3);
|
||||
TEST_ST1x3 (vst1q, int64x2x3_t, int64_t*, s64, x3);
|
||||
TEST_ST1x3 (vst1q, uint64x2x3_t, uint64_t*, u64, x3);
|
||||
TEST_ST1x3 (vst1q, poly64x2x3_t, poly64_t*, p64, x3);
|
||||
TEST_ST1x3 (vst1q, float64x2x3_t, float64_t*, f64, x3);
|
||||
|
||||
/* { dg-final { scan-assembler-not "mov\\t" } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times "tbl\\t" 18} } */
|
||||
@ -167,4 +189,4 @@ TEST_ST1xN (vst1q, float64x2x4_t, float64_t*, f64, x4);
|
||||
/* { dg-final { scan-assembler-times "st4\\t" 14} } */
|
||||
/* { dg-final { scan-assembler-times "st3\\t" 14} } */
|
||||
/* { dg-final { scan-assembler-times "st2\\t" 14} } */
|
||||
/* { dg-final { scan-assembler-times "st1\\t" 14} } */
|
||||
/* { dg-final { scan-assembler-times "st1\\t" 28} } */
|
||||
|
Loading…
Reference in New Issue
Block a user