aarch64: Use memcpy to copy vector tables in vtbx4 intrinsics
Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vtbx4 Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. gcc/ChangeLog: 2021-07-19 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/arm_neon.h (vtbx4_s8): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_oi one vector at a time. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise.
This commit is contained in:
parent
f2f04d8b9d
commit
4848e283cc
@ -28417,10 +28417,7 @@ vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
|
||||
__builtin_aarch64_simd_oi __o;
|
||||
__temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
|
||||
__temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]);
|
||||
__o = __builtin_aarch64_set_qregoiv16qi (__o,
|
||||
(int8x16_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregoiv16qi (__o,
|
||||
(int8x16_t) __temp.val[1], 1);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
return __builtin_aarch64_qtbx2v8qi (__r, __o, __idx);
|
||||
}
|
||||
|
||||
@ -28432,10 +28429,7 @@ vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
|
||||
__builtin_aarch64_simd_oi __o;
|
||||
__temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
|
||||
__temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]);
|
||||
__o = __builtin_aarch64_set_qregoiv16qi (__o,
|
||||
(int8x16_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregoiv16qi (__o,
|
||||
(int8x16_t) __temp.val[1], 1);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o,
|
||||
(int8x8_t)__idx);
|
||||
}
|
||||
@ -28448,10 +28442,7 @@ vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
|
||||
__builtin_aarch64_simd_oi __o;
|
||||
__temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
|
||||
__temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]);
|
||||
__o = __builtin_aarch64_set_qregoiv16qi (__o,
|
||||
(int8x16_t) __temp.val[0], 0);
|
||||
__o = __builtin_aarch64_set_qregoiv16qi (__o,
|
||||
(int8x16_t) __temp.val[1], 1);
|
||||
__builtin_memcpy (&__o, &__temp, sizeof (__temp));
|
||||
return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o,
|
||||
(int8x8_t)__idx);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user