[PATCH 6/17][ARM] Add data processing intrinsics for float16_t.
gcc/ 2016-09-23 Matthew Wahab <matthew.wahab@arm.com> * config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and V4HF modes. (arm_evpc_neon_vtrn): Likewise. (arm_evpc_neon_vrev): Likewise. (arm_evpc_neon_vext): Likewise. * config/arm/arm_neon.h (vbsl_f16): New. (vbslq_f16): New. (vdup_n_f16): New. (vdupq_n_f16): New. (vdup_lane_f16): New. (vdupq_lane_f16): New. (vext_f16): New. (vextq_f16): New. (vmov_n_f16): New. (vmovq_n_f16): New. (vrev64_f16): New. (vrev64q_f16): New. (vtrn_f16): New. (vtrnq_f16): New. (vuzp_f16): New. (vuzpq_f16): New. (vzip_f16): New. (vzipq_f16): New. * config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants). (vdup_lane): New (v8hf, v4hf variants). (vext): New (v8hf, v4hf variants). (vbsl): New (v8hf, v4hf variants). * config/arm/iterators.md (VDQWH): New. (VH): New. (V_double_vector_mode): Add V8HF and V4HF. Fix white-space. (Scalar_mul_8_16): Fix white-space. (Is_d_reg): Add V4HF and V8HF. * config/arm/neon.md (neon_vdup_lane<mode>_internal): New. (neon_vdup_lane<mode>): New. (neon_vtrn<mode>_internal): Replace VDQW with VDQWH. (*neon_vtrn<mode>_insn): Likewise. (neon_vzip<mode>_internal): Likewise. Also fix white-space. (*neon_vzip<mode>_insn): Likewise (neon_vuzp<mode>_internal): Likewise. (*neon_vuzp<mode>_insn): Likewise * config/arm/vec-common.md (vec_perm_const<mode>): New. testsuite/ 2016-09-23 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (FP16_SUPPORTED): New (expected-hfloat-16x4): Make conditional on __fp16 support. (expected-hfloat-16x8): Likewise. (vdup_n_f16): Disable for non-AArch64 targets. * gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests, conditional on FP16_SUPPORTED. * gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support for testing __fp16. * gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests, conditional on FP16_SUPPORTED. * gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise. From-SVN: r240404
This commit is contained in:
parent
50df9464b8
commit
b1a970a5cc
@ -1,3 +1,47 @@
|
||||
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
|
||||
|
||||
* config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and
|
||||
V4HF modes.
|
||||
(arm_evpc_neon_vtrn): Likewise.
|
||||
(arm_evpc_neon_vrev): Likewise.
|
||||
(arm_evpc_neon_vext): Likewise.
|
||||
* config/arm/arm_neon.h (vbsl_f16): New.
|
||||
(vbslq_f16): New.
|
||||
(vdup_n_f16): New.
|
||||
(vdupq_n_f16): New.
|
||||
(vdup_lane_f16): New.
|
||||
(vdupq_lane_f16): New.
|
||||
(vext_f16): New.
|
||||
(vextq_f16): New.
|
||||
(vmov_n_f16): New.
|
||||
(vmovq_n_f16): New.
|
||||
(vrev64_f16): New.
|
||||
(vrev64q_f16): New.
|
||||
(vtrn_f16): New.
|
||||
(vtrnq_f16): New.
|
||||
(vuzp_f16): New.
|
||||
(vuzpq_f16): New.
|
||||
(vzip_f16): New.
|
||||
(vzipq_f16): New.
|
||||
* config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants).
|
||||
(vdup_lane): New (v8hf, v4hf variants).
|
||||
(vext): New (v8hf, v4hf variants).
|
||||
(vbsl): New (v8hf, v4hf variants).
|
||||
* config/arm/iterators.md (VDQWH): New.
|
||||
(VH): New.
|
||||
(V_double_vector_mode): Add V8HF and V4HF. Fix white-space.
|
||||
(Scalar_mul_8_16): Fix white-space.
|
||||
(Is_d_reg): Add V4HF and V8HF.
|
||||
* config/arm/neon.md (neon_vdup_lane<mode>_internal): New.
|
||||
(neon_vdup_lane<mode>): New.
|
||||
(neon_vtrn<mode>_internal): Replace VDQW with VDQWH.
|
||||
(*neon_vtrn<mode>_insn): Likewise.
|
||||
(neon_vzip<mode>_internal): Likewise. Also fix white-space.
|
||||
(*neon_vzip<mode>_insn): Likewise
|
||||
(neon_vuzp<mode>_internal): Likewise.
|
||||
(*neon_vuzp<mode>_insn): Likewise
|
||||
* config/arm/vec-common.md (vec_perm_const<mode>): New.
|
||||
|
||||
2016-09-23 Jiong Wang <jiong.wang@arm.com>
|
||||
Matthew Wahab <matthew.wahab@arm.com>
|
||||
|
||||
|
@ -28576,6 +28576,8 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
|
||||
case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
|
||||
case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
|
||||
case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
|
||||
case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
|
||||
case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
|
||||
case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
|
||||
case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
|
||||
case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
|
||||
@ -28649,6 +28651,8 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
|
||||
case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
|
||||
case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
|
||||
case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
|
||||
case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
|
||||
case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
|
||||
case V4SImode: gen = gen_neon_vzipv4si_internal; break;
|
||||
case V2SImode: gen = gen_neon_vzipv2si_internal; break;
|
||||
case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
|
||||
@ -28701,6 +28705,8 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
|
||||
case V8QImode: gen = gen_neon_vrev32v8qi; break;
|
||||
case V8HImode: gen = gen_neon_vrev64v8hi; break;
|
||||
case V4HImode: gen = gen_neon_vrev64v4hi; break;
|
||||
case V8HFmode: gen = gen_neon_vrev64v8hf; break;
|
||||
case V4HFmode: gen = gen_neon_vrev64v4hf; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -28784,6 +28790,8 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
|
||||
case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
|
||||
case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
|
||||
case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
|
||||
case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
|
||||
case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
|
||||
case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
|
||||
case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
|
||||
case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
|
||||
@ -28859,6 +28867,8 @@ arm_evpc_neon_vext (struct expand_vec_perm_d *d)
|
||||
case V8HImode: gen = gen_neon_vextv8hi; break;
|
||||
case V2SImode: gen = gen_neon_vextv2si; break;
|
||||
case V4SImode: gen = gen_neon_vextv4si; break;
|
||||
case V4HFmode: gen = gen_neon_vextv4hf; break;
|
||||
case V8HFmode: gen = gen_neon_vextv8hf; break;
|
||||
case V2SFmode: gen = gen_neon_vextv2sf; break;
|
||||
case V4SFmode: gen = gen_neon_vextv4sf; break;
|
||||
case V2DImode: gen = gen_neon_vextv2di; break;
|
||||
|
@ -14842,6 +14842,181 @@ vmull_high_p64 (poly64x2_t __a, poly64x2_t __b)
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
/* Half-precision data processing intrinsics. */
|
||||
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
|
||||
{
|
||||
return __builtin_neon_vbslv4hf ((int16x4_t)__a, __b, __c);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
|
||||
{
|
||||
return __builtin_neon_vbslv8hf ((int16x8_t)__a, __b, __c);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vdup_n_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_neon_vdup_nv4hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vdupq_n_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_neon_vdup_nv8hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vdup_lane_f16 (float16x4_t __a, const int __b)
|
||||
{
|
||||
return __builtin_neon_vdup_lanev4hf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vdupq_lane_f16 (float16x4_t __a, const int __b)
|
||||
{
|
||||
return __builtin_neon_vdup_lanev8hf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vext_f16 (float16x4_t __a, float16x4_t __b, const int __c)
|
||||
{
|
||||
return __builtin_neon_vextv4hf (__a, __b, __c);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vextq_f16 (float16x8_t __a, float16x8_t __b, const int __c)
|
||||
{
|
||||
return __builtin_neon_vextv8hf (__a, __b, __c);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vmov_n_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_neon_vdup_nv4hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vmovq_n_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_neon_vdup_nv8hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vrev64_f16 (float16x4_t __a)
|
||||
{
|
||||
return (float16x4_t)__builtin_shuffle (__a, (uint16x4_t){ 3, 2, 1, 0 });
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vrev64q_f16 (float16x8_t __a)
|
||||
{
|
||||
return
|
||||
(float16x8_t)__builtin_shuffle (__a,
|
||||
(uint16x8_t){ 3, 2, 1, 0, 7, 6, 5, 4 });
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
|
||||
vtrn_f16 (float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
float16x4x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 1, 7, 3 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 6, 2 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 2, 6 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 5, 3, 7 });
|
||||
#endif
|
||||
return __rv;
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
|
||||
vtrnq_f16 (float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
float16x8x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t){ 9, 1, 11, 3, 13, 5, 15, 7 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t){ 8, 0, 10, 2, 12, 4, 14, 6 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t){ 0, 8, 2, 10, 4, 12, 6, 14 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t){ 1, 9, 3, 11, 5, 13, 7, 15 });
|
||||
#endif
|
||||
return __rv;
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
|
||||
vuzp_f16 (float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
float16x4x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 7, 1, 3 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 6, 0, 2 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 2, 4, 6 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 3, 5, 7 });
|
||||
#endif
|
||||
return __rv;
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
|
||||
vuzpq_f16 (float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
float16x8x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 5, 7, 1, 3, 13, 15, 9, 11 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 4, 6, 0, 2, 12, 14, 8, 10 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t){ 0, 2, 4, 6, 8, 10, 12, 14 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t){ 1, 3, 5, 7, 9, 11, 13, 15 });
|
||||
#endif
|
||||
return __rv;
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
|
||||
vzip_f16 (float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
float16x4x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 6, 2, 7, 3 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 5, 1 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 1, 5 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 2, 6, 3, 7 });
|
||||
#endif
|
||||
return __rv;
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
|
||||
vzipq_f16 (float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
float16x8x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 10, 2, 11, 3, 8, 0, 9, 1 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 14, 6, 15, 7, 12, 4, 13, 5 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t){ 0, 8, 1, 9, 2, 10, 3, 11 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t){ 4, 12, 5, 13, 6, 14, 7, 15 });
|
||||
#endif
|
||||
return __rv;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -166,8 +166,10 @@ VAR10 (SETLANE, vset_lane,
|
||||
VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
|
||||
VAR10 (UNOP, vdup_n,
|
||||
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
|
||||
VAR2 (UNOP, vdup_n, v8hf, v4hf)
|
||||
VAR10 (GETLANE, vdup_lane,
|
||||
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
|
||||
VAR2 (GETLANE, vdup_lane, v8hf, v4hf)
|
||||
VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di)
|
||||
VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
|
||||
VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
|
||||
@ -197,6 +199,7 @@ VAR2 (MAC_N, vmlslu_n, v4hi, v2si)
|
||||
VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si)
|
||||
VAR10 (SETLANE, vext,
|
||||
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
|
||||
VAR2 (SETLANE, vext, v8hf, v4hf)
|
||||
VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
|
||||
VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi)
|
||||
VAR2 (UNOP, vrev16, v8qi, v16qi)
|
||||
@ -208,6 +211,7 @@ VAR1 (UNOP, vcvtv4sf, v4hf)
|
||||
VAR1 (UNOP, vcvtv4hf, v4sf)
|
||||
VAR10 (TERNOP, vbsl,
|
||||
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
|
||||
VAR2 (TERNOP, vbsl, v8hf, v4hf)
|
||||
VAR2 (UNOP, copysignf, v2sf, v4sf)
|
||||
VAR2 (UNOP, vrintn, v2sf, v4sf)
|
||||
VAR2 (UNOP, vrinta, v2sf, v4sf)
|
||||
|
@ -119,6 +119,10 @@
|
||||
;; All supported vector modes (except those with 64-bit integer elements).
|
||||
(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
|
||||
|
||||
;; All supported vector modes including 16-bit float modes.
|
||||
(define_mode_iterator VDQWH [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF
|
||||
V8HF V4HF])
|
||||
|
||||
;; Supported integer vector modes (not 64 bit elements).
|
||||
(define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI])
|
||||
|
||||
@ -174,6 +178,9 @@
|
||||
;; Modes with 8-bit, 16-bit and 32-bit elements.
|
||||
(define_mode_iterator VU [V16QI V8HI V4SI])
|
||||
|
||||
;; Vector modes for 16-bit floating-point support.
|
||||
(define_mode_iterator VH [V8HF V4HF])
|
||||
|
||||
;; Iterators used for fixed-point support.
|
||||
(define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA])
|
||||
|
||||
@ -475,9 +482,10 @@
|
||||
;; Used for neon_vdup_lane, where the second operand is double-sized
|
||||
;; even when the first one is quad.
|
||||
(define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI")
|
||||
(V4SI "V2SI") (V4SF "V2SF")
|
||||
(V8QI "V8QI") (V4HI "V4HI")
|
||||
(V2SI "V2SI") (V2SF "V2SF")])
|
||||
(V4SI "V2SI") (V4SF "V2SF")
|
||||
(V8QI "V8QI") (V4HI "V4HI")
|
||||
(V2SI "V2SI") (V2SF "V2SF")
|
||||
(V8HF "V4HF") (V4HF "V4HF")])
|
||||
|
||||
;; Mode of result of comparison operations (and bit-select operand 1).
|
||||
(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
|
||||
@ -582,17 +590,17 @@
|
||||
(DI "false") (V2DI "false")])
|
||||
|
||||
(define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true")
|
||||
(V4HI "true") (V8HI "true")
|
||||
(V2SI "false") (V4SI "false")
|
||||
(V2SF "false") (V4SF "false")
|
||||
(DI "false") (V2DI "false")])
|
||||
|
||||
(V4HI "true") (V8HI "true")
|
||||
(V2SI "false") (V4SI "false")
|
||||
(V2SF "false") (V4SF "false")
|
||||
(DI "false") (V2DI "false")])
|
||||
|
||||
(define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false")
|
||||
(V4HI "true") (V8HI "false")
|
||||
(V2SI "true") (V4SI "false")
|
||||
(V2SF "true") (V4SF "false")
|
||||
(DI "true") (V2DI "false")])
|
||||
(DI "true") (V2DI "false")
|
||||
(V4HF "true") (V8HF "false")])
|
||||
|
||||
(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
|
||||
(V4HF "4") (V8HF "8")
|
||||
|
@ -3045,6 +3045,28 @@ if (BYTES_BIG_ENDIAN)
|
||||
[(set_attr "type" "neon_dup<q>")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vdup_lane<mode>_internal"
|
||||
[(set (match_operand:VH 0 "s_register_operand" "=w")
|
||||
(vec_duplicate:VH
|
||||
(vec_select:<V_elem>
|
||||
(match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
|
||||
"TARGET_NEON && TARGET_FP16"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
int elt = INTVAL (operands[2]);
|
||||
elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
|
||||
operands[2] = GEN_INT (elt);
|
||||
}
|
||||
if (<Is_d_reg>)
|
||||
return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
|
||||
else
|
||||
return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
|
||||
}
|
||||
[(set_attr "type" "neon_dup<q>")]
|
||||
)
|
||||
|
||||
(define_expand "neon_vdup_lane<mode>"
|
||||
[(match_operand:VDQW 0 "s_register_operand" "=w")
|
||||
(match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
|
||||
@ -3064,6 +3086,25 @@ if (BYTES_BIG_ENDIAN)
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "neon_vdup_lane<mode>"
|
||||
[(match_operand:VH 0 "s_register_operand")
|
||||
(match_operand:<V_double_vector_mode> 1 "s_register_operand")
|
||||
(match_operand:SI 2 "immediate_operand")]
|
||||
"TARGET_NEON && TARGET_FP16"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
unsigned int elt = INTVAL (operands[2]);
|
||||
unsigned int reg_nelts
|
||||
= 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
|
||||
elt ^= reg_nelts - 1;
|
||||
operands[2] = GEN_INT (elt);
|
||||
}
|
||||
emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
|
||||
operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
; Scalar index is ignored, since only zero is valid here.
|
||||
(define_expand "neon_vdup_lanedi"
|
||||
[(match_operand:DI 0 "s_register_operand" "=w")
|
||||
@ -4281,25 +4322,25 @@ if (BYTES_BIG_ENDIAN)
|
||||
|
||||
(define_expand "neon_vtrn<mode>_internal"
|
||||
[(parallel
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
|
||||
(match_operand:VDQW 2 "s_register_operand" "")]
|
||||
[(set (match_operand:VDQWH 0 "s_register_operand")
|
||||
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
|
||||
(match_operand:VDQWH 2 "s_register_operand")]
|
||||
UNSPEC_VTRN1))
|
||||
(set (match_operand:VDQW 3 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
|
||||
(set (match_operand:VDQWH 3 "s_register_operand")
|
||||
(unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
|
||||
"TARGET_NEON"
|
||||
""
|
||||
)
|
||||
|
||||
;; Note: Different operand numbering to handle tied registers correctly.
|
||||
(define_insn "*neon_vtrn<mode>_insn"
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "=&w")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
|
||||
(match_operand:VDQW 3 "s_register_operand" "2")]
|
||||
UNSPEC_VTRN1))
|
||||
(set (match_operand:VDQW 2 "s_register_operand" "=&w")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 3)]
|
||||
UNSPEC_VTRN2))]
|
||||
[(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
|
||||
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
|
||||
(match_operand:VDQWH 3 "s_register_operand" "2")]
|
||||
UNSPEC_VTRN1))
|
||||
(set (match_operand:VDQWH 2 "s_register_operand" "=&w")
|
||||
(unspec:VDQWH [(match_dup 1) (match_dup 3)]
|
||||
UNSPEC_VTRN2))]
|
||||
"TARGET_NEON"
|
||||
"vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
|
||||
[(set_attr "type" "neon_permute<q>")]
|
||||
@ -4307,25 +4348,25 @@ if (BYTES_BIG_ENDIAN)
|
||||
|
||||
(define_expand "neon_vzip<mode>_internal"
|
||||
[(parallel
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
|
||||
(match_operand:VDQW 2 "s_register_operand" "")]
|
||||
UNSPEC_VZIP1))
|
||||
(set (match_operand:VDQW 3 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
|
||||
[(set (match_operand:VDQWH 0 "s_register_operand")
|
||||
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
|
||||
(match_operand:VDQWH 2 "s_register_operand")]
|
||||
UNSPEC_VZIP1))
|
||||
(set (match_operand:VDQWH 3 "s_register_operand")
|
||||
(unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
|
||||
"TARGET_NEON"
|
||||
""
|
||||
)
|
||||
|
||||
;; Note: Different operand numbering to handle tied registers correctly.
|
||||
(define_insn "*neon_vzip<mode>_insn"
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "=&w")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
|
||||
(match_operand:VDQW 3 "s_register_operand" "2")]
|
||||
UNSPEC_VZIP1))
|
||||
(set (match_operand:VDQW 2 "s_register_operand" "=&w")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 3)]
|
||||
UNSPEC_VZIP2))]
|
||||
[(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
|
||||
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
|
||||
(match_operand:VDQWH 3 "s_register_operand" "2")]
|
||||
UNSPEC_VZIP1))
|
||||
(set (match_operand:VDQWH 2 "s_register_operand" "=&w")
|
||||
(unspec:VDQWH [(match_dup 1) (match_dup 3)]
|
||||
UNSPEC_VZIP2))]
|
||||
"TARGET_NEON"
|
||||
"vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
|
||||
[(set_attr "type" "neon_zip<q>")]
|
||||
@ -4333,25 +4374,25 @@ if (BYTES_BIG_ENDIAN)
|
||||
|
||||
(define_expand "neon_vuzp<mode>_internal"
|
||||
[(parallel
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
|
||||
(match_operand:VDQW 2 "s_register_operand" "")]
|
||||
[(set (match_operand:VDQWH 0 "s_register_operand")
|
||||
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
|
||||
(match_operand:VDQWH 2 "s_register_operand")]
|
||||
UNSPEC_VUZP1))
|
||||
(set (match_operand:VDQW 3 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
|
||||
(set (match_operand:VDQWH 3 "s_register_operand" "")
|
||||
(unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
|
||||
"TARGET_NEON"
|
||||
""
|
||||
)
|
||||
|
||||
;; Note: Different operand numbering to handle tied registers correctly.
|
||||
(define_insn "*neon_vuzp<mode>_insn"
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "=&w")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
|
||||
(match_operand:VDQW 3 "s_register_operand" "2")]
|
||||
UNSPEC_VUZP1))
|
||||
(set (match_operand:VDQW 2 "s_register_operand" "=&w")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 3)]
|
||||
UNSPEC_VUZP2))]
|
||||
[(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
|
||||
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
|
||||
(match_operand:VDQWH 3 "s_register_operand" "2")]
|
||||
UNSPEC_VUZP1))
|
||||
(set (match_operand:VDQWH 2 "s_register_operand" "=&w")
|
||||
(unspec:VDQWH [(match_dup 1) (match_dup 3)]
|
||||
UNSPEC_VUZP2))]
|
||||
"TARGET_NEON"
|
||||
"vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
|
||||
[(set_attr "type" "neon_zip<q>")]
|
||||
|
@ -124,6 +124,20 @@
|
||||
FAIL;
|
||||
})
|
||||
|
||||
(define_expand "vec_perm_const<mode>"
|
||||
[(match_operand:VH 0 "s_register_operand")
|
||||
(match_operand:VH 1 "s_register_operand")
|
||||
(match_operand:VH 2 "s_register_operand")
|
||||
(match_operand:<V_cmp_result> 3)]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
if (arm_expand_vec_perm_const (operands[0], operands[1],
|
||||
operands[2], operands[3]))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
})
|
||||
|
||||
(define_expand "vec_perm<mode>"
|
||||
[(match_operand:VE 0 "s_register_operand" "")
|
||||
(match_operand:VE 1 "s_register_operand" "")
|
||||
|
@ -1,3 +1,23 @@
|
||||
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
|
||||
|
||||
* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
|
||||
(FP16_SUPPORTED): New
|
||||
(expected-hfloat-16x4): Make conditional on __fp16 support.
|
||||
(expected-hfloat-16x8): Likewise.
|
||||
(vdup_n_f16): Disable for non-AArch64 targets.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests,
|
||||
conditional on FP16_SUPPORTED.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support
|
||||
for testing __fp16.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests,
|
||||
conditional on FP16_SUPPORTED.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.
|
||||
|
||||
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
|
||||
|
||||
* gcc.target/arm/short-vfp-1.c: New.
|
||||
|
@ -16,6 +16,15 @@ extern void *memset(void *, int, size_t);
|
||||
extern void *memcpy(void *, const void *, size_t);
|
||||
extern size_t strlen(const char *);
|
||||
|
||||
/* Helper macro to select FP16 tests. */
|
||||
#if (!defined (__aarch64__) \
|
||||
&& (defined (__ARM_FP16_FORMAT_IEEE) \
|
||||
|| defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
|
||||
#define FP16_SUPPORTED (1)
|
||||
#else
|
||||
#undef FP16_SUPPORTED
|
||||
#endif
|
||||
|
||||
/* Various string construction helpers. */
|
||||
|
||||
/*
|
||||
@ -511,7 +520,9 @@ static void clean_results (void)
|
||||
/* Helpers to initialize vectors. */
|
||||
#define VDUP(VAR, Q, T1, T2, W, N, V) \
|
||||
VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
|
||||
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
|
||||
#if (defined (__aarch64__) \
|
||||
&& (defined (__ARM_FP16_FORMAT_IEEE) \
|
||||
|| defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
|
||||
/* Work around that there is no vdup_n_f16 intrinsic. */
|
||||
#define vdup_n_f16(VAL) \
|
||||
__extension__ \
|
||||
|
@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 };
|
||||
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
|
||||
0xf7, 0xf7, 0xf7, 0xf7 };
|
||||
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc09, 0xcb89,
|
||||
0xcb09, 0xca89 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 };
|
||||
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
|
||||
0xf6, 0xf6, 0xf6, 0xf6,
|
||||
@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
|
||||
0xf7, 0xf7, 0xf7, 0xf7 };
|
||||
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
|
||||
0xfff4, 0xfff4, 0xfff6, 0xfff6 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc09, 0xcb89,
|
||||
0xcb09, 0xca89,
|
||||
0xca09, 0xc989,
|
||||
0xc909, 0xc889 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001,
|
||||
0xc1600001, 0xc1500001 };
|
||||
|
||||
@ -66,6 +76,10 @@ void exec_vbsl (void)
|
||||
clean_results ();
|
||||
|
||||
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VLOAD(vector, buffer, , float, f, 16, 4);
|
||||
VLOAD(vector, buffer, q, float, f, 16, 8);
|
||||
#endif
|
||||
VLOAD(vector, buffer, , float, f, 32, 2);
|
||||
VLOAD(vector, buffer, q, float, f, 32, 4);
|
||||
|
||||
@ -80,6 +94,9 @@ void exec_vbsl (void)
|
||||
VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
|
||||
VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
|
||||
VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VDUP(vector2, , float, f, 16, 4, -2.4f); /* -2.4f is 0xC0CD. */
|
||||
#endif
|
||||
VDUP(vector2, , float, f, 32, 2, -30.3f);
|
||||
VDUP(vector2, , poly, p, 8, 8, 0xF3);
|
||||
VDUP(vector2, , poly, p, 16, 4, 0xFFF2);
|
||||
@ -94,6 +111,9 @@ void exec_vbsl (void)
|
||||
VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3);
|
||||
VDUP(vector2, q, poly, p, 8, 16, 0xF3);
|
||||
VDUP(vector2, q, poly, p, 16, 8, 0xFFF2);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VDUP(vector2, q, float, f, 16, 8, -2.4f);
|
||||
#endif
|
||||
VDUP(vector2, q, float, f, 32, 4, -30.4f);
|
||||
|
||||
VDUP(vector_first, , uint, u, 8, 8, 0xF4);
|
||||
@ -111,10 +131,18 @@ void exec_vbsl (void)
|
||||
TEST_VBSL(uint, , poly, p, 16, 4);
|
||||
TEST_VBSL(uint, q, poly, p, 8, 16);
|
||||
TEST_VBSL(uint, q, poly, p, 16, 8);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VBSL(uint, , float, f, 16, 4);
|
||||
TEST_VBSL(uint, q, float, f, 16, 8);
|
||||
#endif
|
||||
TEST_VBSL(uint, , float, f, 32, 2);
|
||||
TEST_VBSL(uint, q, float, f, 32, 4);
|
||||
|
||||
#if defined (FP16_SUPPORTED)
|
||||
CHECK_RESULTS (TEST_MSG, "");
|
||||
#else
|
||||
CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
|
||||
#endif
|
||||
}
|
||||
|
||||
int main (void)
|
||||
|
@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
|
||||
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
|
||||
0xf0, 0xf0, 0xf0, 0xf0 };
|
||||
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00,
|
||||
0xcc00, 0xcc00 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
|
||||
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
|
||||
0xf0, 0xf0, 0xf0, 0xf0,
|
||||
@ -46,6 +50,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
|
||||
0xf0, 0xf0, 0xf0, 0xf0 };
|
||||
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
|
||||
0xfff0, 0xfff0, 0xfff0, 0xfff0 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00,
|
||||
0xcc00, 0xcc00,
|
||||
0xcc00, 0xcc00,
|
||||
0xcc00, 0xcc00 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
|
||||
0xc1800000, 0xc1800000 };
|
||||
|
||||
@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
|
||||
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
|
||||
0xf1, 0xf1, 0xf1, 0xf1 };
|
||||
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80,
|
||||
0xcb80, 0xcb80 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
|
||||
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
|
||||
0xf1, 0xf1, 0xf1, 0xf1,
|
||||
@ -90,6 +104,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
|
||||
0xf1, 0xf1, 0xf1, 0xf1 };
|
||||
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
|
||||
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80,
|
||||
0xcb80, 0xcb80,
|
||||
0xcb80, 0xcb80,
|
||||
0xcb80, 0xcb80 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
|
||||
0xc1700000, 0xc1700000 };
|
||||
|
||||
@ -107,6 +127,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
|
||||
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
|
||||
0xf2, 0xf2, 0xf2, 0xf2 };
|
||||
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00,
|
||||
0xcb00, 0xcb00 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
|
||||
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
|
||||
0xf2, 0xf2, 0xf2, 0xf2,
|
||||
@ -134,6 +158,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
|
||||
0xf2, 0xf2, 0xf2, 0xf2 };
|
||||
VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
|
||||
0xfff2, 0xfff2, 0xfff2, 0xfff2 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00,
|
||||
0xcb00, 0xcb00,
|
||||
0xcb00, 0xcb00,
|
||||
0xcb00, 0xcb00 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
|
||||
0xc1600000, 0xc1600000 };
|
||||
|
||||
@ -171,6 +201,9 @@ void exec_vdup_vmov (void)
|
||||
TEST_VDUP(, uint, u, 64, 1);
|
||||
TEST_VDUP(, poly, p, 8, 8);
|
||||
TEST_VDUP(, poly, p, 16, 4);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VDUP(, float, f, 16, 4);
|
||||
#endif
|
||||
TEST_VDUP(, float, f, 32, 2);
|
||||
|
||||
TEST_VDUP(q, int, s, 8, 16);
|
||||
@ -183,8 +216,26 @@ void exec_vdup_vmov (void)
|
||||
TEST_VDUP(q, uint, u, 64, 2);
|
||||
TEST_VDUP(q, poly, p, 8, 16);
|
||||
TEST_VDUP(q, poly, p, 16, 8);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VDUP(q, float, f, 16, 8);
|
||||
#endif
|
||||
TEST_VDUP(q, float, f, 32, 4);
|
||||
|
||||
#if defined (FP16_SUPPORTED)
|
||||
switch (i) {
|
||||
case 0:
|
||||
CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
|
||||
break;
|
||||
case 1:
|
||||
CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
|
||||
break;
|
||||
case 2:
|
||||
CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
#else
|
||||
switch (i) {
|
||||
case 0:
|
||||
CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
|
||||
@ -198,6 +249,7 @@ void exec_vdup_vmov (void)
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Do the same tests with vmov. Use the same expected results. */
|
||||
@ -216,6 +268,9 @@ void exec_vdup_vmov (void)
|
||||
TEST_VMOV(, uint, u, 64, 1);
|
||||
TEST_VMOV(, poly, p, 8, 8);
|
||||
TEST_VMOV(, poly, p, 16, 4);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VMOV(, float, f, 16, 4);
|
||||
#endif
|
||||
TEST_VMOV(, float, f, 32, 2);
|
||||
|
||||
TEST_VMOV(q, int, s, 8, 16);
|
||||
@ -228,8 +283,26 @@ void exec_vdup_vmov (void)
|
||||
TEST_VMOV(q, uint, u, 64, 2);
|
||||
TEST_VMOV(q, poly, p, 8, 16);
|
||||
TEST_VMOV(q, poly, p, 16, 8);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VMOV(q, float, f, 16, 8);
|
||||
#endif
|
||||
TEST_VMOV(q, float, f, 32, 4);
|
||||
|
||||
#if defined (FP16_SUPPORTED)
|
||||
switch (i) {
|
||||
case 0:
|
||||
CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
|
||||
break;
|
||||
case 1:
|
||||
CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
|
||||
break;
|
||||
case 2:
|
||||
CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
#else
|
||||
switch (i) {
|
||||
case 0:
|
||||
CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
|
||||
@ -243,6 +316,8 @@ void exec_vdup_vmov (void)
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,10 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
|
||||
0xf7, 0xf7, 0xf7, 0xf7 };
|
||||
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
|
||||
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xca80, 0xca80,
|
||||
0xca80, 0xca80 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
|
||||
0xf2, 0xf2, 0xf2, 0xf2,
|
||||
0xf2, 0xf2, 0xf2, 0xf2,
|
||||
@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
|
||||
0xf5, 0xf5, 0xf5, 0xf5 };
|
||||
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
|
||||
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80,
|
||||
0xca80, 0xca80,
|
||||
0xca80, 0xca80,
|
||||
0xca80, 0xca80 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
|
||||
0xc1700000, 0xc1700000 };
|
||||
|
||||
@ -63,6 +73,9 @@ void exec_vdup_lane (void)
|
||||
clean_results ();
|
||||
|
||||
TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VLOAD(vector, buffer, , float, f, 16, 4);
|
||||
#endif
|
||||
VLOAD(vector, buffer, , float, f, 32, 2);
|
||||
|
||||
/* Choose lane arbitrarily. */
|
||||
@ -76,6 +89,9 @@ void exec_vdup_lane (void)
|
||||
TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0);
|
||||
TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7);
|
||||
TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VDUP_LANE(, float, f, 16, 4, 4, 3);
|
||||
#endif
|
||||
TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
|
||||
|
||||
TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
|
||||
@ -88,9 +104,16 @@ void exec_vdup_lane (void)
|
||||
TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0);
|
||||
TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5);
|
||||
TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3);
|
||||
#endif
|
||||
TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
|
||||
|
||||
#if defined (FP16_SUPPORTED)
|
||||
CHECK_RESULTS (TEST_MSG, "");
|
||||
#else
|
||||
CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
|
||||
#endif
|
||||
}
|
||||
|
||||
int main (void)
|
||||
|
@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
|
||||
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55,
|
||||
0x55, 0x55, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 };
|
||||
VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11,
|
||||
0x11, 0x11, 0x11, 0x11,
|
||||
@ -39,6 +43,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
|
||||
0x55, 0x55, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66,
|
||||
0x66, 0x66, 0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xc880, 0x4b4d,
|
||||
0x4b4d, 0x4b4d,
|
||||
0x4b4d, 0x4b4d,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd,
|
||||
0x4204cccd, 0x4204cccd };
|
||||
|
||||
@ -60,6 +70,10 @@ void exec_vext (void)
|
||||
clean_results ();
|
||||
|
||||
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
|
||||
#ifdef FP16_SUPPORTED
|
||||
VLOAD(vector1, buffer, , float, f, 16, 4);
|
||||
VLOAD(vector1, buffer, q, float, f, 16, 8);
|
||||
#endif
|
||||
VLOAD(vector1, buffer, , float, f, 32, 2);
|
||||
VLOAD(vector1, buffer, q, float, f, 32, 4);
|
||||
|
||||
@ -74,6 +88,9 @@ void exec_vext (void)
|
||||
VDUP(vector2, , uint, u, 64, 1, 0x88);
|
||||
VDUP(vector2, , poly, p, 8, 8, 0x55);
|
||||
VDUP(vector2, , poly, p, 16, 4, 0x66);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */
|
||||
#endif
|
||||
VDUP(vector2, , float, f, 32, 2, 33.6f);
|
||||
|
||||
VDUP(vector2, q, int, s, 8, 16, 0x11);
|
||||
@ -86,6 +103,9 @@ void exec_vext (void)
|
||||
VDUP(vector2, q, uint, u, 64, 2, 0x88);
|
||||
VDUP(vector2, q, poly, p, 8, 16, 0x55);
|
||||
VDUP(vector2, q, poly, p, 16, 8, 0x66);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VDUP (vector2, q, float, f, 16, 8, 14.6f);
|
||||
#endif
|
||||
VDUP(vector2, q, float, f, 32, 4, 33.2f);
|
||||
|
||||
/* Choose arbitrary extract offsets. */
|
||||
@ -99,6 +119,9 @@ void exec_vext (void)
|
||||
TEST_VEXT(, uint, u, 64, 1, 0);
|
||||
TEST_VEXT(, poly, p, 8, 8, 6);
|
||||
TEST_VEXT(, poly, p, 16, 4, 2);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VEXT(, float, f, 16, 4, 2);
|
||||
#endif
|
||||
TEST_VEXT(, float, f, 32, 2, 1);
|
||||
|
||||
TEST_VEXT(q, int, s, 8, 16, 14);
|
||||
@ -111,9 +134,16 @@ void exec_vext (void)
|
||||
TEST_VEXT(q, uint, u, 64, 2, 1);
|
||||
TEST_VEXT(q, poly, p, 8, 16, 12);
|
||||
TEST_VEXT(q, poly, p, 16, 8, 6);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VEXT(q, float, f, 16, 8, 7);
|
||||
#endif
|
||||
TEST_VEXT(q, float, f, 32, 4, 3);
|
||||
|
||||
#if defined (FP16_SUPPORTED)
|
||||
CHECK_RESULTS (TEST_MSG, "");
|
||||
#else
|
||||
CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
|
||||
#endif
|
||||
}
|
||||
|
||||
int main (void)
|
||||
|
@ -63,6 +63,10 @@ VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 };
|
||||
VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
|
||||
0xf3, 0xf2, 0xf1, 0xf0 };
|
||||
VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected_vrev64, hfloat, 16, 4) [] = { 0xca80, 0xcb00,
|
||||
0xcb80, 0xcc00 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 };
|
||||
VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
|
||||
0xf3, 0xf2, 0xf1, 0xf0,
|
||||
@ -86,6 +90,12 @@ VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
|
||||
0xfb, 0xfa, 0xf9, 0xf8 };
|
||||
VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
|
||||
0xfff7, 0xfff6, 0xfff5, 0xfff4 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected_vrev64, hfloat, 16, 8) [] = { 0xca80, 0xcb00,
|
||||
0xcb80, 0xcc00,
|
||||
0xc880, 0xc900,
|
||||
0xc980, 0xca00 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000,
|
||||
0xc1500000, 0xc1600000 };
|
||||
|
||||
@ -104,6 +114,10 @@ void exec_vrev (void)
|
||||
|
||||
/* Initialize input "vector" from "buffer". */
|
||||
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VLOAD (vector, buffer, , float, f, 16, 4);
|
||||
VLOAD (vector, buffer, q, float, f, 16, 8);
|
||||
#endif
|
||||
VLOAD(vector, buffer, , float, f, 32, 2);
|
||||
VLOAD(vector, buffer, q, float, f, 32, 4);
|
||||
|
||||
@ -187,6 +201,12 @@ void exec_vrev (void)
|
||||
CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, "");
|
||||
CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, "");
|
||||
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VREV (, float, f, 16, 4, 64);
|
||||
TEST_VREV (q, float, f, 16, 8, 64);
|
||||
CHECK_FP(TEST_MSG, float, 16, 4, PRIx32, expected_vrev64, "");
|
||||
CHECK_FP(TEST_MSG, float, 16, 8, PRIx32, expected_vrev64, "");
|
||||
#endif
|
||||
TEST_VREV(, float, f, 32, 2, 64);
|
||||
TEST_VREV(q, float, f, 32, 4, 64);
|
||||
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, "");
|
||||
|
@ -53,9 +53,17 @@ void FNNAME (INSN_NAME) (void)
|
||||
DECL_VSHUFFLE(float, 32, 4)
|
||||
|
||||
DECL_ALL_VSHUFFLE();
|
||||
#if defined (FP16_SUPPORTED)
|
||||
DECL_VSHUFFLE (float, 16, 4);
|
||||
DECL_VSHUFFLE (float, 16, 8);
|
||||
#endif
|
||||
|
||||
/* Initialize input "vector" from "buffer". */
|
||||
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VLOAD (vector1, buffer, , float, f, 16, 4);
|
||||
VLOAD (vector1, buffer, q, float, f, 16, 8);
|
||||
#endif
|
||||
VLOAD(vector1, buffer, , float, f, 32, 2);
|
||||
VLOAD(vector1, buffer, q, float, f, 32, 4);
|
||||
|
||||
@ -68,6 +76,9 @@ void FNNAME (INSN_NAME) (void)
|
||||
VDUP(vector2, , uint, u, 32, 2, 0x77);
|
||||
VDUP(vector2, , poly, p, 8, 8, 0x55);
|
||||
VDUP(vector2, , poly, p, 16, 4, 0x66);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */
|
||||
#endif
|
||||
VDUP(vector2, , float, f, 32, 2, 33.6f);
|
||||
|
||||
VDUP(vector2, q, int, s, 8, 16, 0x11);
|
||||
@ -78,8 +89,11 @@ void FNNAME (INSN_NAME) (void)
|
||||
VDUP(vector2, q, uint, u, 32, 4, 0x77);
|
||||
VDUP(vector2, q, poly, p, 8, 16, 0x55);
|
||||
VDUP(vector2, q, poly, p, 16, 8, 0x66);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VDUP (vector2, q, float, f, 16, 8, 14.6f);
|
||||
#endif
|
||||
VDUP(vector2, q, float, f, 32, 4, 33.8f);
|
||||
|
||||
|
||||
#define TEST_ALL_VSHUFFLE(INSN) \
|
||||
TEST_VSHUFFLE(INSN, , int, s, 8, 8); \
|
||||
TEST_VSHUFFLE(INSN, , int, s, 16, 4); \
|
||||
@ -100,6 +114,10 @@ void FNNAME (INSN_NAME) (void)
|
||||
TEST_VSHUFFLE(INSN, q, poly, p, 16, 8); \
|
||||
TEST_VSHUFFLE(INSN, q, float, f, 32, 4)
|
||||
|
||||
#define TEST_VSHUFFLE_FP16(INSN) \
|
||||
TEST_VSHUFFLE(INSN, , float, f, 16, 4); \
|
||||
TEST_VSHUFFLE(INSN, q, float, f, 16, 8);
|
||||
|
||||
#define TEST_ALL_EXTRA_CHUNKS() \
|
||||
TEST_EXTRA_CHUNK(int, 8, 8, 1); \
|
||||
TEST_EXTRA_CHUNK(int, 16, 4, 1); \
|
||||
@ -143,17 +161,37 @@ void FNNAME (INSN_NAME) (void)
|
||||
CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
|
||||
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
|
||||
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHECK_RESULTS_VSHUFFLE_FP16(test_name,EXPECTED,comment) \
|
||||
{ \
|
||||
CHECK_FP (test_name, float, 16, 4, PRIx16, EXPECTED, comment); \
|
||||
CHECK_FP (test_name, float, 16, 8, PRIx16, EXPECTED, comment); \
|
||||
}
|
||||
|
||||
clean_results ();
|
||||
|
||||
/* Execute the tests. */
|
||||
TEST_ALL_VSHUFFLE(INSN_NAME);
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_VSHUFFLE_FP16 (INSN_NAME);
|
||||
#endif
|
||||
|
||||
CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)");
|
||||
#if defined (FP16_SUPPORTED)
|
||||
CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected0, "(chunk 0)");
|
||||
#endif
|
||||
|
||||
TEST_ALL_EXTRA_CHUNKS();
|
||||
#if defined (FP16_SUPPORTED)
|
||||
TEST_EXTRA_CHUNK (float, 16, 4, 1);
|
||||
TEST_EXTRA_CHUNK (float, 16, 8, 1);
|
||||
#endif
|
||||
|
||||
CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)");
|
||||
#if defined (FP16_SUPPORTED)
|
||||
CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected1, "(chunk 1)");
|
||||
#endif
|
||||
}
|
||||
|
||||
int main (void)
|
||||
|
@ -15,6 +15,10 @@ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
|
||||
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55,
|
||||
0xf2, 0xf3, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
|
||||
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11,
|
||||
0xf2, 0xf3, 0x11, 0x11,
|
||||
@ -36,6 +40,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55,
|
||||
0xf6, 0xf7, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66,
|
||||
0xfff2, 0xfff3, 0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
|
||||
0x4b4d, 0x4b4d,
|
||||
0xcb00, 0xca80,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
|
||||
0x42073333, 0x42073333 };
|
||||
|
||||
@ -51,6 +61,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
|
||||
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55,
|
||||
0xf6, 0xf7, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
|
||||
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11,
|
||||
0xfa, 0xfb, 0x11, 0x11,
|
||||
@ -72,6 +86,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55,
|
||||
0xfe, 0xff, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66,
|
||||
0xfff6, 0xfff7, 0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xca00, 0xc980,
|
||||
0x4b4d, 0x4b4d,
|
||||
0xc900, 0xc880,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1600000, 0xc1500000,
|
||||
0x42073333, 0x42073333 };
|
||||
|
||||
|
@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
|
||||
0xf4, 0xf5, 0xf6, 0xf7 };
|
||||
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1,
|
||||
0xfff2, 0xfff3 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
|
||||
0xcb00, 0xca80 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
|
||||
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
|
||||
0xf4, 0xf5, 0xf6, 0xf7,
|
||||
@ -48,6 +52,12 @@ VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1,
|
||||
0xfff2, 0xfff3,
|
||||
0xfff4, 0xfff5,
|
||||
0xfff6, 0xfff7 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
|
||||
0xcb00, 0xca80,
|
||||
0xca00, 0xc980,
|
||||
0xc900, 0xc880 };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
|
||||
0xc1600000, 0xc1500000 };
|
||||
|
||||
@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
|
||||
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
|
||||
0x55, 0x55, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0x4b4d, 0x4b4d,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
|
||||
VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x11, 0x11,
|
||||
@ -84,6 +98,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
|
||||
0x55, 0x55, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
|
||||
0x66, 0x66, 0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0x4b4d, 0x4b4d,
|
||||
0x4b4d, 0x4b4d,
|
||||
0x4b4d, 0x4b4d,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333,
|
||||
0x42073333, 0x42073333 };
|
||||
|
||||
|
@ -18,6 +18,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
|
||||
0xf1, 0xf5, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2,
|
||||
0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb00,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
|
||||
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11,
|
||||
0xf1, 0xf9, 0x11, 0x11,
|
||||
@ -41,6 +45,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
|
||||
0xf3, 0xfb, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
|
||||
0xfff1, 0xfff5, 0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xca00,
|
||||
0x4b4d, 0x4b4d,
|
||||
0xcb80, 0xc980,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000,
|
||||
0x42073333, 0x42073333 };
|
||||
|
||||
@ -59,6 +69,10 @@ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
|
||||
0xf3, 0xf7, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3,
|
||||
0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xca80,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
|
||||
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11,
|
||||
0xf5, 0xfd, 0x11, 0x11,
|
||||
@ -82,6 +96,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
|
||||
0xf7, 0xff, 0x55, 0x55 };
|
||||
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
|
||||
0xfff3, 0xfff7, 0x66, 0x66 };
|
||||
#if defined (FP16_SUPPORTED)
|
||||
VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb00, 0xc900,
|
||||
0x4b4d, 0x4b4d,
|
||||
0xca80, 0xc880,
|
||||
0x4b4d, 0x4b4d };
|
||||
#endif
|
||||
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000,
|
||||
0x42073333, 0x42073333 };
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user