[PATCH 6/17][ARM] Add data processing intrinsics for float16_t.

gcc/
2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>

	* config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and
	V4HF modes.
	(arm_evpc_neon_vtrn): Likewise.
	(arm_evpc_neon_vrev): Likewise.
	(arm_evpc_neon_vext): Likewise.
	* config/arm/arm_neon.h (vbsl_f16): New.
	(vbslq_f16): New.
	(vdup_n_f16): New.
	(vdupq_n_f16): New.
	(vdup_lane_f16): New.
	(vdupq_lane_f16): New.
	(vext_f16): New.
	(vextq_f16): New.
	(vmov_n_f16): New.
	(vmovq_n_f16): New.
	(vrev64_f16): New.
	(vrev64q_f16): New.
	(vtrn_f16): New.
	(vtrnq_f16): New.
	(vuzp_f16): New.
	(vuzpq_f16): New.
	(vzip_f16): New.
	(vzipq_f16): New.
	* config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants).
	(vdup_lane): New (v8hf, v4hf variants).
	(vext): New (v8hf, v4hf variants).
	(vbsl): New (v8hf, v4hf variants).
	* config/arm/iterators.md (VDQWH): New.
	(VH): New.
	(V_double_vector_mode): Add V8HF and V4HF.  Fix white-space.
	(Scalar_mul_8_16): Fix white-space.
	(Is_d_reg): Add V4HF and V8HF.
	* config/arm/neon.md (neon_vdup_lane<mode>_internal): New.
	(neon_vdup_lane<mode>): New.
	(neon_vtrn<mode>_internal): Replace VDQW with VDQWH.
	(*neon_vtrn<mode>_insn): Likewise.
	(neon_vzip<mode>_internal): Likewise. Also fix white-space.
	(*neon_vzip<mode>_insn): Likewise
	(neon_vuzp<mode>_internal): Likewise.
	(*neon_vuzp<mode>_insn): Likewise
	* config/arm/vec-common.md (vec_perm_const<mode>): New.

testsuite/
2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>

	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
	(FP16_SUPPORTED): New
	(expected-hfloat-16x4): Make conditional on __fp16 support.
	(expected-hfloat-16x8): Likewise.
	(vdup_n_f16): Disable for non-AArch64 targets.
	* gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests,
	conditional on FP16_SUPPORTED.
	* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support
	for testing __fp16.
	* gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests,
	conditional on FP16_SUPPORTED.
	* gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.

From-SVN: r240404
This commit is contained in:
Matthew Wahab 2016-09-23 09:23:01 +00:00 committed by Matthew Wahab
parent 50df9464b8
commit b1a970a5cc
18 changed files with 650 additions and 49 deletions

View File

@ -1,3 +1,47 @@
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
* config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and
V4HF modes.
(arm_evpc_neon_vtrn): Likewise.
(arm_evpc_neon_vrev): Likewise.
(arm_evpc_neon_vext): Likewise.
* config/arm/arm_neon.h (vbsl_f16): New.
(vbslq_f16): New.
(vdup_n_f16): New.
(vdupq_n_f16): New.
(vdup_lane_f16): New.
(vdupq_lane_f16): New.
(vext_f16): New.
(vextq_f16): New.
(vmov_n_f16): New.
(vmovq_n_f16): New.
(vrev64_f16): New.
(vrev64q_f16): New.
(vtrn_f16): New.
(vtrnq_f16): New.
(vuzp_f16): New.
(vuzpq_f16): New.
(vzip_f16): New.
(vzipq_f16): New.
* config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants).
(vdup_lane): New (v8hf, v4hf variants).
(vext): New (v8hf, v4hf variants).
(vbsl): New (v8hf, v4hf variants).
* config/arm/iterators.md (VDQWH): New.
(VH): New.
(V_double_vector_mode): Add V8HF and V4HF. Fix white-space.
(Scalar_mul_8_16): Fix white-space.
(Is_d_reg): Add V4HF and V8HF.
* config/arm/neon.md (neon_vdup_lane<mode>_internal): New.
(neon_vdup_lane<mode>): New.
(neon_vtrn<mode>_internal): Replace VDQW with VDQWH.
(*neon_vtrn<mode>_insn): Likewise.
(neon_vzip<mode>_internal): Likewise. Also fix white-space.
(*neon_vzip<mode>_insn): Likewise
(neon_vuzp<mode>_internal): Likewise.
(*neon_vuzp<mode>_insn): Likewise
* config/arm/vec-common.md (vec_perm_const<mode>): New.
2016-09-23 Jiong Wang <jiong.wang@arm.com>
Matthew Wahab <matthew.wahab@arm.com>

View File

@ -28576,6 +28576,8 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
@ -28649,6 +28651,8 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
case V4SImode: gen = gen_neon_vzipv4si_internal; break;
case V2SImode: gen = gen_neon_vzipv2si_internal; break;
case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
@ -28701,6 +28705,8 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
case V8QImode: gen = gen_neon_vrev32v8qi; break;
case V8HImode: gen = gen_neon_vrev64v8hi; break;
case V4HImode: gen = gen_neon_vrev64v4hi; break;
case V8HFmode: gen = gen_neon_vrev64v8hf; break;
case V4HFmode: gen = gen_neon_vrev64v4hf; break;
default:
return false;
}
@ -28784,6 +28790,8 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
@ -28859,6 +28867,8 @@ arm_evpc_neon_vext (struct expand_vec_perm_d *d)
case V8HImode: gen = gen_neon_vextv8hi; break;
case V2SImode: gen = gen_neon_vextv2si; break;
case V4SImode: gen = gen_neon_vextv4si; break;
case V4HFmode: gen = gen_neon_vextv4hf; break;
case V8HFmode: gen = gen_neon_vextv8hf; break;
case V2SFmode: gen = gen_neon_vextv2sf; break;
case V4SFmode: gen = gen_neon_vextv4sf; break;
case V2DImode: gen = gen_neon_vextv2di; break;

View File

@ -14842,6 +14842,181 @@ vmull_high_p64 (poly64x2_t __a, poly64x2_t __b)
#pragma GCC pop_options
/* Half-precision data processing intrinsics. */
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
{
return __builtin_neon_vbslv4hf ((int16x4_t)__a, __b, __c);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
{
return __builtin_neon_vbslv8hf ((int16x8_t)__a, __b, __c);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vdup_n_f16 (float16_t __a)
{
return __builtin_neon_vdup_nv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vdupq_n_f16 (float16_t __a)
{
return __builtin_neon_vdup_nv8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vdup_lane_f16 (float16x4_t __a, const int __b)
{
return __builtin_neon_vdup_lanev4hf (__a, __b);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vdupq_lane_f16 (float16x4_t __a, const int __b)
{
return __builtin_neon_vdup_lanev8hf (__a, __b);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vext_f16 (float16x4_t __a, float16x4_t __b, const int __c)
{
return __builtin_neon_vextv4hf (__a, __b, __c);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vextq_f16 (float16x8_t __a, float16x8_t __b, const int __c)
{
return __builtin_neon_vextv8hf (__a, __b, __c);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vmov_n_f16 (float16_t __a)
{
return __builtin_neon_vdup_nv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vmovq_n_f16 (float16_t __a)
{
return __builtin_neon_vdup_nv8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vrev64_f16 (float16x4_t __a)
{
return (float16x4_t)__builtin_shuffle (__a, (uint16x4_t){ 3, 2, 1, 0 });
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vrev64q_f16 (float16x8_t __a)
{
return
(float16x8_t)__builtin_shuffle (__a,
(uint16x8_t){ 3, 2, 1, 0, 7, 6, 5, 4 });
}
__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
vtrn_f16 (float16x4_t __a, float16x4_t __b)
{
float16x4x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 1, 7, 3 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 6, 2 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 2, 6 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 5, 3, 7 });
#endif
return __rv;
}
__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
vtrnq_f16 (float16x8_t __a, float16x8_t __b)
{
float16x8x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b,
(uint16x8_t){ 9, 1, 11, 3, 13, 5, 15, 7 });
__rv.val[1] = __builtin_shuffle (__a, __b,
(uint16x8_t){ 8, 0, 10, 2, 12, 4, 14, 6 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b,
(uint16x8_t){ 0, 8, 2, 10, 4, 12, 6, 14 });
__rv.val[1] = __builtin_shuffle (__a, __b,
(uint16x8_t){ 1, 9, 3, 11, 5, 13, 7, 15 });
#endif
return __rv;
}
__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
vuzp_f16 (float16x4_t __a, float16x4_t __b)
{
float16x4x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 7, 1, 3 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 6, 0, 2 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 2, 4, 6 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 3, 5, 7 });
#endif
return __rv;
}
__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
vuzpq_f16 (float16x8_t __a, float16x8_t __b)
{
float16x8x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
{ 5, 7, 1, 3, 13, 15, 9, 11 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
{ 4, 6, 0, 2, 12, 14, 8, 10 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b,
(uint16x8_t){ 0, 2, 4, 6, 8, 10, 12, 14 });
__rv.val[1] = __builtin_shuffle (__a, __b,
(uint16x8_t){ 1, 3, 5, 7, 9, 11, 13, 15 });
#endif
return __rv;
}
__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
vzip_f16 (float16x4_t __a, float16x4_t __b)
{
float16x4x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 6, 2, 7, 3 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 5, 1 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 1, 5 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 2, 6, 3, 7 });
#endif
return __rv;
}
__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
vzipq_f16 (float16x8_t __a, float16x8_t __b)
{
float16x8x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
{ 10, 2, 11, 3, 8, 0, 9, 1 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
{ 14, 6, 15, 7, 12, 4, 13, 5 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b,
(uint16x8_t){ 0, 8, 1, 9, 2, 10, 3, 11 });
__rv.val[1] = __builtin_shuffle (__a, __b,
(uint16x8_t){ 4, 12, 5, 13, 6, 14, 7, 15 });
#endif
return __rv;
}
#endif
#ifdef __cplusplus
}
#endif

View File

@ -166,8 +166,10 @@ VAR10 (SETLANE, vset_lane,
VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
VAR10 (UNOP, vdup_n,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR2 (UNOP, vdup_n, v8hf, v4hf)
VAR10 (GETLANE, vdup_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR2 (GETLANE, vdup_lane, v8hf, v4hf)
VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
@ -197,6 +199,7 @@ VAR2 (MAC_N, vmlslu_n, v4hi, v2si)
VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si)
VAR10 (SETLANE, vext,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR2 (SETLANE, vext, v8hf, v4hf)
VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi)
VAR2 (UNOP, vrev16, v8qi, v16qi)
@ -208,6 +211,7 @@ VAR1 (UNOP, vcvtv4sf, v4hf)
VAR1 (UNOP, vcvtv4hf, v4sf)
VAR10 (TERNOP, vbsl,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR2 (TERNOP, vbsl, v8hf, v4hf)
VAR2 (UNOP, copysignf, v2sf, v4sf)
VAR2 (UNOP, vrintn, v2sf, v4sf)
VAR2 (UNOP, vrinta, v2sf, v4sf)

View File

@ -119,6 +119,10 @@
;; All supported vector modes (except those with 64-bit integer elements).
(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
;; All supported vector modes including 16-bit float modes.
(define_mode_iterator VDQWH [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF
V8HF V4HF])
;; Supported integer vector modes (not 64 bit elements).
(define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI])
@ -174,6 +178,9 @@
;; Modes with 8-bit, 16-bit and 32-bit elements.
(define_mode_iterator VU [V16QI V8HI V4SI])
;; Vector modes for 16-bit floating-point support.
(define_mode_iterator VH [V8HF V4HF])
;; Iterators used for fixed-point support.
(define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA])
@ -475,9 +482,10 @@
;; Used for neon_vdup_lane, where the second operand is double-sized
;; even when the first one is quad.
(define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI")
(V4SI "V2SI") (V4SF "V2SF")
(V8QI "V8QI") (V4HI "V4HI")
(V2SI "V2SI") (V2SF "V2SF")])
(V4SI "V2SI") (V4SF "V2SF")
(V8QI "V8QI") (V4HI "V4HI")
(V2SI "V2SI") (V2SF "V2SF")
(V8HF "V4HF") (V4HF "V4HF")])
;; Mode of result of comparison operations (and bit-select operand 1).
(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
@ -582,17 +590,17 @@
(DI "false") (V2DI "false")])
(define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true")
(V4HI "true") (V8HI "true")
(V2SI "false") (V4SI "false")
(V2SF "false") (V4SF "false")
(DI "false") (V2DI "false")])
(V4HI "true") (V8HI "true")
(V2SI "false") (V4SI "false")
(V2SF "false") (V4SF "false")
(DI "false") (V2DI "false")])
(define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false")
(V4HI "true") (V8HI "false")
(V2SI "true") (V4SI "false")
(V2SF "true") (V4SF "false")
(DI "true") (V2DI "false")])
(DI "true") (V2DI "false")
(V4HF "true") (V8HF "false")])
(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
(V4HF "4") (V8HF "8")

View File

@ -3045,6 +3045,28 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_dup<q>")]
)
(define_insn "neon_vdup_lane<mode>_internal"
[(set (match_operand:VH 0 "s_register_operand" "=w")
(vec_duplicate:VH
(vec_select:<V_elem>
(match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_NEON && TARGET_FP16"
{
if (BYTES_BIG_ENDIAN)
{
int elt = INTVAL (operands[2]);
elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
operands[2] = GEN_INT (elt);
}
if (<Is_d_reg>)
return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
else
return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
}
[(set_attr "type" "neon_dup<q>")]
)
(define_expand "neon_vdup_lane<mode>"
[(match_operand:VDQW 0 "s_register_operand" "=w")
(match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
@ -3064,6 +3086,25 @@ if (BYTES_BIG_ENDIAN)
DONE;
})
(define_expand "neon_vdup_lane<mode>"
[(match_operand:VH 0 "s_register_operand")
(match_operand:<V_double_vector_mode> 1 "s_register_operand")
(match_operand:SI 2 "immediate_operand")]
"TARGET_NEON && TARGET_FP16"
{
if (BYTES_BIG_ENDIAN)
{
unsigned int elt = INTVAL (operands[2]);
unsigned int reg_nelts
= 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
elt ^= reg_nelts - 1;
operands[2] = GEN_INT (elt);
}
emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
operands[2]));
DONE;
})
; Scalar index is ignored, since only zero is valid here.
(define_expand "neon_vdup_lanedi"
[(match_operand:DI 0 "s_register_operand" "=w")
@ -4281,25 +4322,25 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vtrn<mode>_internal"
[(parallel
[(set (match_operand:VDQW 0 "s_register_operand" "")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
(match_operand:VDQW 2 "s_register_operand" "")]
[(set (match_operand:VDQWH 0 "s_register_operand")
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
(match_operand:VDQWH 2 "s_register_operand")]
UNSPEC_VTRN1))
(set (match_operand:VDQW 3 "s_register_operand" "")
(unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
(set (match_operand:VDQWH 3 "s_register_operand")
(unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
"TARGET_NEON"
""
)
;; Note: Different operand numbering to handle tied registers correctly.
(define_insn "*neon_vtrn<mode>_insn"
[(set (match_operand:VDQW 0 "s_register_operand" "=&w")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
(match_operand:VDQW 3 "s_register_operand" "2")]
UNSPEC_VTRN1))
(set (match_operand:VDQW 2 "s_register_operand" "=&w")
(unspec:VDQW [(match_dup 1) (match_dup 3)]
UNSPEC_VTRN2))]
[(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
(match_operand:VDQWH 3 "s_register_operand" "2")]
UNSPEC_VTRN1))
(set (match_operand:VDQWH 2 "s_register_operand" "=&w")
(unspec:VDQWH [(match_dup 1) (match_dup 3)]
UNSPEC_VTRN2))]
"TARGET_NEON"
"vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
[(set_attr "type" "neon_permute<q>")]
@ -4307,25 +4348,25 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vzip<mode>_internal"
[(parallel
[(set (match_operand:VDQW 0 "s_register_operand" "")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
(match_operand:VDQW 2 "s_register_operand" "")]
UNSPEC_VZIP1))
(set (match_operand:VDQW 3 "s_register_operand" "")
(unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
[(set (match_operand:VDQWH 0 "s_register_operand")
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
(match_operand:VDQWH 2 "s_register_operand")]
UNSPEC_VZIP1))
(set (match_operand:VDQWH 3 "s_register_operand")
(unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
"TARGET_NEON"
""
)
;; Note: Different operand numbering to handle tied registers correctly.
(define_insn "*neon_vzip<mode>_insn"
[(set (match_operand:VDQW 0 "s_register_operand" "=&w")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
(match_operand:VDQW 3 "s_register_operand" "2")]
UNSPEC_VZIP1))
(set (match_operand:VDQW 2 "s_register_operand" "=&w")
(unspec:VDQW [(match_dup 1) (match_dup 3)]
UNSPEC_VZIP2))]
[(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
(match_operand:VDQWH 3 "s_register_operand" "2")]
UNSPEC_VZIP1))
(set (match_operand:VDQWH 2 "s_register_operand" "=&w")
(unspec:VDQWH [(match_dup 1) (match_dup 3)]
UNSPEC_VZIP2))]
"TARGET_NEON"
"vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
[(set_attr "type" "neon_zip<q>")]
@ -4333,25 +4374,25 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vuzp<mode>_internal"
[(parallel
[(set (match_operand:VDQW 0 "s_register_operand" "")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
(match_operand:VDQW 2 "s_register_operand" "")]
[(set (match_operand:VDQWH 0 "s_register_operand")
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
(match_operand:VDQWH 2 "s_register_operand")]
UNSPEC_VUZP1))
(set (match_operand:VDQW 3 "s_register_operand" "")
(unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
(set (match_operand:VDQWH 3 "s_register_operand" "")
(unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
"TARGET_NEON"
""
)
;; Note: Different operand numbering to handle tied registers correctly.
(define_insn "*neon_vuzp<mode>_insn"
[(set (match_operand:VDQW 0 "s_register_operand" "=&w")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
(match_operand:VDQW 3 "s_register_operand" "2")]
UNSPEC_VUZP1))
(set (match_operand:VDQW 2 "s_register_operand" "=&w")
(unspec:VDQW [(match_dup 1) (match_dup 3)]
UNSPEC_VUZP2))]
[(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
(match_operand:VDQWH 3 "s_register_operand" "2")]
UNSPEC_VUZP1))
(set (match_operand:VDQWH 2 "s_register_operand" "=&w")
(unspec:VDQWH [(match_dup 1) (match_dup 3)]
UNSPEC_VUZP2))]
"TARGET_NEON"
"vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
[(set_attr "type" "neon_zip<q>")]

View File

@ -124,6 +124,20 @@
FAIL;
})
(define_expand "vec_perm_const<mode>"
[(match_operand:VH 0 "s_register_operand")
(match_operand:VH 1 "s_register_operand")
(match_operand:VH 2 "s_register_operand")
(match_operand:<V_cmp_result> 3)]
"TARGET_NEON"
{
if (arm_expand_vec_perm_const (operands[0], operands[1],
operands[2], operands[3]))
DONE;
else
FAIL;
})
(define_expand "vec_perm<mode>"
[(match_operand:VE 0 "s_register_operand" "")
(match_operand:VE 1 "s_register_operand" "")

View File

@ -1,3 +1,23 @@
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
(FP16_SUPPORTED): New
(expected-hfloat-16x4): Make conditional on __fp16 support.
(expected-hfloat-16x8): Likewise.
(vdup_n_f16): Disable for non-AArch64 targets.
* gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests,
conditional on FP16_SUPPORTED.
* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support
for testing __fp16.
* gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests,
conditional on FP16_SUPPORTED.
* gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/arm/short-vfp-1.c: New.

View File

@ -16,6 +16,15 @@ extern void *memset(void *, int, size_t);
extern void *memcpy(void *, const void *, size_t);
extern size_t strlen(const char *);
/* Helper macro to select FP16 tests. */
#if (!defined (__aarch64__) \
&& (defined (__ARM_FP16_FORMAT_IEEE) \
|| defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
#define FP16_SUPPORTED (1)
#else
#undef FP16_SUPPORTED
#endif
/* Various string construction helpers. */
/*
@ -511,7 +520,9 @@ static void clean_results (void)
/* Helpers to initialize vectors. */
#define VDUP(VAR, Q, T1, T2, W, N, V) \
VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
#if (defined (__aarch64__) \
&& (defined (__ARM_FP16_FORMAT_IEEE) \
|| defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
/* Work around that there is no vdup_n_f16 intrinsic. */
#define vdup_n_f16(VAL) \
__extension__ \

View File

@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
0xf7, 0xf7, 0xf7, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc09, 0xcb89,
0xcb09, 0xca89 };
#endif
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf6, 0xf6, 0xf6, 0xf6,
@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
0xf7, 0xf7, 0xf7, 0xf7 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
0xfff4, 0xfff4, 0xfff6, 0xfff6 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc09, 0xcb89,
0xcb09, 0xca89,
0xca09, 0xc989,
0xc909, 0xc889 };
#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001,
0xc1600001, 0xc1500001 };
@ -66,6 +76,10 @@ void exec_vbsl (void)
clean_results ();
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
#if defined (FP16_SUPPORTED)
VLOAD(vector, buffer, , float, f, 16, 4);
VLOAD(vector, buffer, q, float, f, 16, 8);
#endif
VLOAD(vector, buffer, , float, f, 32, 2);
VLOAD(vector, buffer, q, float, f, 32, 4);
@ -80,6 +94,9 @@ void exec_vbsl (void)
VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3);
#if defined (FP16_SUPPORTED)
VDUP(vector2, , float, f, 16, 4, -2.4f); /* -2.4f is 0xC0CD. */
#endif
VDUP(vector2, , float, f, 32, 2, -30.3f);
VDUP(vector2, , poly, p, 8, 8, 0xF3);
VDUP(vector2, , poly, p, 16, 4, 0xFFF2);
@ -94,6 +111,9 @@ void exec_vbsl (void)
VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3);
VDUP(vector2, q, poly, p, 8, 16, 0xF3);
VDUP(vector2, q, poly, p, 16, 8, 0xFFF2);
#if defined (FP16_SUPPORTED)
VDUP(vector2, q, float, f, 16, 8, -2.4f);
#endif
VDUP(vector2, q, float, f, 32, 4, -30.4f);
VDUP(vector_first, , uint, u, 8, 8, 0xF4);
@ -111,10 +131,18 @@ void exec_vbsl (void)
TEST_VBSL(uint, , poly, p, 16, 4);
TEST_VBSL(uint, q, poly, p, 8, 16);
TEST_VBSL(uint, q, poly, p, 16, 8);
#if defined (FP16_SUPPORTED)
TEST_VBSL(uint, , float, f, 16, 4);
TEST_VBSL(uint, q, float, f, 16, 8);
#endif
TEST_VBSL(uint, , float, f, 32, 2);
TEST_VBSL(uint, q, float, f, 32, 4);
#if defined (FP16_SUPPORTED)
CHECK_RESULTS (TEST_MSG, "");
#else
CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
#endif
}
int main (void)

View File

@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00,
0xcc00, 0xcc00 };
#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0,
@ -46,6 +50,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
0xfff0, 0xfff0, 0xfff0, 0xfff0 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00,
0xcc00, 0xcc00,
0xcc00, 0xcc00,
0xcc00, 0xcc00 };
#endif
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
0xc1800000, 0xc1800000 };
@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80,
0xcb80, 0xcb80 };
#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1,
@ -90,6 +104,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80,
0xcb80, 0xcb80,
0xcb80, 0xcb80,
0xcb80, 0xcb80 };
#endif
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
0xc1700000, 0xc1700000 };
@ -107,6 +127,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00,
0xcb00, 0xcb00 };
#endif
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
@ -134,6 +158,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
0xfff2, 0xfff2, 0xfff2, 0xfff2 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00,
0xcb00, 0xcb00,
0xcb00, 0xcb00,
0xcb00, 0xcb00 };
#endif
VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
0xc1600000, 0xc1600000 };
@ -171,6 +201,9 @@ void exec_vdup_vmov (void)
TEST_VDUP(, uint, u, 64, 1);
TEST_VDUP(, poly, p, 8, 8);
TEST_VDUP(, poly, p, 16, 4);
#if defined (FP16_SUPPORTED)
TEST_VDUP(, float, f, 16, 4);
#endif
TEST_VDUP(, float, f, 32, 2);
TEST_VDUP(q, int, s, 8, 16);
@ -183,8 +216,26 @@ void exec_vdup_vmov (void)
TEST_VDUP(q, uint, u, 64, 2);
TEST_VDUP(q, poly, p, 8, 16);
TEST_VDUP(q, poly, p, 16, 8);
#if defined (FP16_SUPPORTED)
TEST_VDUP(q, float, f, 16, 8);
#endif
TEST_VDUP(q, float, f, 32, 4);
#if defined (FP16_SUPPORTED)
switch (i) {
case 0:
CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
break;
case 1:
CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
break;
case 2:
CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
break;
default:
abort();
}
#else
switch (i) {
case 0:
CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
@ -198,6 +249,7 @@ void exec_vdup_vmov (void)
default:
abort();
}
#endif
}
/* Do the same tests with vmov. Use the same expected results. */
@ -216,6 +268,9 @@ void exec_vdup_vmov (void)
TEST_VMOV(, uint, u, 64, 1);
TEST_VMOV(, poly, p, 8, 8);
TEST_VMOV(, poly, p, 16, 4);
#if defined (FP16_SUPPORTED)
TEST_VMOV(, float, f, 16, 4);
#endif
TEST_VMOV(, float, f, 32, 2);
TEST_VMOV(q, int, s, 8, 16);
@ -228,8 +283,26 @@ void exec_vdup_vmov (void)
TEST_VMOV(q, uint, u, 64, 2);
TEST_VMOV(q, poly, p, 8, 16);
TEST_VMOV(q, poly, p, 16, 8);
#if defined (FP16_SUPPORTED)
TEST_VMOV(q, float, f, 16, 8);
#endif
TEST_VMOV(q, float, f, 32, 4);
#if defined (FP16_SUPPORTED)
switch (i) {
case 0:
CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
break;
case 1:
CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
break;
case 2:
CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
break;
default:
abort();
}
#else
switch (i) {
case 0:
CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
@ -243,6 +316,8 @@ void exec_vdup_vmov (void)
default:
abort();
}
#endif
}
}

View File

@ -17,6 +17,10 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
0xf7, 0xf7, 0xf7, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xca80, 0xca80,
0xca80, 0xca80 };
#endif
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80,
0xca80, 0xca80,
0xca80, 0xca80,
0xca80, 0xca80 };
#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
0xc1700000, 0xc1700000 };
@ -63,6 +73,9 @@ void exec_vdup_lane (void)
clean_results ();
TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer);
#if defined (FP16_SUPPORTED)
VLOAD(vector, buffer, , float, f, 16, 4);
#endif
VLOAD(vector, buffer, , float, f, 32, 2);
/* Choose lane arbitrarily. */
@ -76,6 +89,9 @@ void exec_vdup_lane (void)
TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0);
TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7);
TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3);
#if defined (FP16_SUPPORTED)
TEST_VDUP_LANE(, float, f, 16, 4, 4, 3);
#endif
TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
@ -88,9 +104,16 @@ void exec_vdup_lane (void)
TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0);
TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5);
TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1);
#if defined (FP16_SUPPORTED)
TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3);
#endif
TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
#if defined (FP16_SUPPORTED)
CHECK_RESULTS (TEST_MSG, "");
#else
CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
#endif
}
int main (void)

View File

@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11,
0x11, 0x11, 0x11, 0x11,
@ -39,6 +43,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66,
0x66, 0x66, 0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xc880, 0x4b4d,
0x4b4d, 0x4b4d,
0x4b4d, 0x4b4d,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd,
0x4204cccd, 0x4204cccd };
@ -60,6 +70,10 @@ void exec_vext (void)
clean_results ();
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
#ifdef FP16_SUPPORTED
VLOAD(vector1, buffer, , float, f, 16, 4);
VLOAD(vector1, buffer, q, float, f, 16, 8);
#endif
VLOAD(vector1, buffer, , float, f, 32, 2);
VLOAD(vector1, buffer, q, float, f, 32, 4);
@ -74,6 +88,9 @@ void exec_vext (void)
VDUP(vector2, , uint, u, 64, 1, 0x88);
VDUP(vector2, , poly, p, 8, 8, 0x55);
VDUP(vector2, , poly, p, 16, 4, 0x66);
#if defined (FP16_SUPPORTED)
VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */
#endif
VDUP(vector2, , float, f, 32, 2, 33.6f);
VDUP(vector2, q, int, s, 8, 16, 0x11);
@ -86,6 +103,9 @@ void exec_vext (void)
VDUP(vector2, q, uint, u, 64, 2, 0x88);
VDUP(vector2, q, poly, p, 8, 16, 0x55);
VDUP(vector2, q, poly, p, 16, 8, 0x66);
#if defined (FP16_SUPPORTED)
VDUP (vector2, q, float, f, 16, 8, 14.6f);
#endif
VDUP(vector2, q, float, f, 32, 4, 33.2f);
/* Choose arbitrary extract offsets. */
@ -99,6 +119,9 @@ void exec_vext (void)
TEST_VEXT(, uint, u, 64, 1, 0);
TEST_VEXT(, poly, p, 8, 8, 6);
TEST_VEXT(, poly, p, 16, 4, 2);
#if defined (FP16_SUPPORTED)
TEST_VEXT(, float, f, 16, 4, 2);
#endif
TEST_VEXT(, float, f, 32, 2, 1);
TEST_VEXT(q, int, s, 8, 16, 14);
@ -111,9 +134,16 @@ void exec_vext (void)
TEST_VEXT(q, uint, u, 64, 2, 1);
TEST_VEXT(q, poly, p, 8, 16, 12);
TEST_VEXT(q, poly, p, 16, 8, 6);
#if defined (FP16_SUPPORTED)
TEST_VEXT(q, float, f, 16, 8, 7);
#endif
TEST_VEXT(q, float, f, 32, 4, 3);
#if defined (FP16_SUPPORTED)
CHECK_RESULTS (TEST_MSG, "");
#else
CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
#endif
}
int main (void)

View File

@ -63,6 +63,10 @@ VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 };
VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
0xf3, 0xf2, 0xf1, 0xf0 };
VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected_vrev64, hfloat, 16, 4) [] = { 0xca80, 0xcb00,
0xcb80, 0xcc00 };
#endif
VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 };
VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
0xf3, 0xf2, 0xf1, 0xf0,
@ -86,6 +90,12 @@ VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
0xfb, 0xfa, 0xf9, 0xf8 };
VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
0xfff7, 0xfff6, 0xfff5, 0xfff4 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected_vrev64, hfloat, 16, 8) [] = { 0xca80, 0xcb00,
0xcb80, 0xcc00,
0xc880, 0xc900,
0xc980, 0xca00 };
#endif
VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000,
0xc1500000, 0xc1600000 };
@ -104,6 +114,10 @@ void exec_vrev (void)
/* Initialize input "vector" from "buffer". */
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
#if defined (FP16_SUPPORTED)
VLOAD (vector, buffer, , float, f, 16, 4);
VLOAD (vector, buffer, q, float, f, 16, 8);
#endif
VLOAD(vector, buffer, , float, f, 32, 2);
VLOAD(vector, buffer, q, float, f, 32, 4);
@ -187,6 +201,12 @@ void exec_vrev (void)
CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, "");
CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, "");
#if defined (FP16_SUPPORTED)
TEST_VREV (, float, f, 16, 4, 64);
TEST_VREV (q, float, f, 16, 8, 64);
CHECK_FP(TEST_MSG, float, 16, 4, PRIx32, expected_vrev64, "");
CHECK_FP(TEST_MSG, float, 16, 8, PRIx32, expected_vrev64, "");
#endif
TEST_VREV(, float, f, 32, 2, 64);
TEST_VREV(q, float, f, 32, 4, 64);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, "");

View File

@ -53,9 +53,17 @@ void FNNAME (INSN_NAME) (void)
DECL_VSHUFFLE(float, 32, 4)
DECL_ALL_VSHUFFLE();
#if defined (FP16_SUPPORTED)
DECL_VSHUFFLE (float, 16, 4);
DECL_VSHUFFLE (float, 16, 8);
#endif
/* Initialize input "vector" from "buffer". */
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
#if defined (FP16_SUPPORTED)
VLOAD (vector1, buffer, , float, f, 16, 4);
VLOAD (vector1, buffer, q, float, f, 16, 8);
#endif
VLOAD(vector1, buffer, , float, f, 32, 2);
VLOAD(vector1, buffer, q, float, f, 32, 4);
@ -68,6 +76,9 @@ void FNNAME (INSN_NAME) (void)
VDUP(vector2, , uint, u, 32, 2, 0x77);
VDUP(vector2, , poly, p, 8, 8, 0x55);
VDUP(vector2, , poly, p, 16, 4, 0x66);
#if defined (FP16_SUPPORTED)
VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */
#endif
VDUP(vector2, , float, f, 32, 2, 33.6f);
VDUP(vector2, q, int, s, 8, 16, 0x11);
@ -78,8 +89,11 @@ void FNNAME (INSN_NAME) (void)
VDUP(vector2, q, uint, u, 32, 4, 0x77);
VDUP(vector2, q, poly, p, 8, 16, 0x55);
VDUP(vector2, q, poly, p, 16, 8, 0x66);
#if defined (FP16_SUPPORTED)
VDUP (vector2, q, float, f, 16, 8, 14.6f);
#endif
VDUP(vector2, q, float, f, 32, 4, 33.8f);
#define TEST_ALL_VSHUFFLE(INSN) \
TEST_VSHUFFLE(INSN, , int, s, 8, 8); \
TEST_VSHUFFLE(INSN, , int, s, 16, 4); \
@ -100,6 +114,10 @@ void FNNAME (INSN_NAME) (void)
TEST_VSHUFFLE(INSN, q, poly, p, 16, 8); \
TEST_VSHUFFLE(INSN, q, float, f, 32, 4)
#define TEST_VSHUFFLE_FP16(INSN) \
TEST_VSHUFFLE(INSN, , float, f, 16, 4); \
TEST_VSHUFFLE(INSN, q, float, f, 16, 8);
#define TEST_ALL_EXTRA_CHUNKS() \
TEST_EXTRA_CHUNK(int, 8, 8, 1); \
TEST_EXTRA_CHUNK(int, 16, 4, 1); \
@ -143,17 +161,37 @@ void FNNAME (INSN_NAME) (void)
CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
} \
}
#define CHECK_RESULTS_VSHUFFLE_FP16(test_name,EXPECTED,comment) \
{ \
CHECK_FP (test_name, float, 16, 4, PRIx16, EXPECTED, comment); \
CHECK_FP (test_name, float, 16, 8, PRIx16, EXPECTED, comment); \
}
clean_results ();
/* Execute the tests. */
TEST_ALL_VSHUFFLE(INSN_NAME);
#if defined (FP16_SUPPORTED)
TEST_VSHUFFLE_FP16 (INSN_NAME);
#endif
CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)");
#if defined (FP16_SUPPORTED)
CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected0, "(chunk 0)");
#endif
TEST_ALL_EXTRA_CHUNKS();
#if defined (FP16_SUPPORTED)
TEST_EXTRA_CHUNK (float, 16, 4, 1);
TEST_EXTRA_CHUNK (float, 16, 8, 1);
#endif
CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)");
#if defined (FP16_SUPPORTED)
CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected1, "(chunk 1)");
#endif
}
int main (void)

View File

@ -15,6 +15,10 @@ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55,
0xf2, 0xf3, 0x55, 0x55 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11,
0xf2, 0xf3, 0x11, 0x11,
@ -36,6 +40,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55,
0xf6, 0xf7, 0x55, 0x55 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66,
0xfff2, 0xfff3, 0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
0x4b4d, 0x4b4d,
0xcb00, 0xca80,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0x42073333, 0x42073333 };
@ -51,6 +61,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55,
0xf6, 0xf7, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11,
0xfa, 0xfb, 0x11, 0x11,
@ -72,6 +86,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55,
0xfe, 0xff, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66,
0xfff6, 0xfff7, 0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xca00, 0xc980,
0x4b4d, 0x4b4d,
0xc900, 0xc880,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1600000, 0xc1500000,
0x42073333, 0x42073333 };

View File

@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff2, 0xfff3 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
0xcb00, 0xca80 };
#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@ -48,6 +52,12 @@ VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1,
0xfff2, 0xfff3,
0xfff4, 0xfff5,
0xfff6, 0xfff7 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
0xcb00, 0xca80,
0xca00, 0xc980,
0xc900, 0xc880 };
#endif
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0x4b4d, 0x4b4d,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11,
0x11, 0x11, 0x11, 0x11,
@ -84,6 +98,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
0x66, 0x66, 0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0x4b4d, 0x4b4d,
0x4b4d, 0x4b4d,
0x4b4d, 0x4b4d,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333,
0x42073333, 0x42073333 };

View File

@ -18,6 +18,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
0xf1, 0xf5, 0x55, 0x55 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2,
0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb00,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11,
0xf1, 0xf9, 0x11, 0x11,
@ -41,6 +45,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
0xf3, 0xfb, 0x55, 0x55 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
0xfff1, 0xfff5, 0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xca00,
0x4b4d, 0x4b4d,
0xcb80, 0xc980,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000,
0x42073333, 0x42073333 };
@ -59,6 +69,10 @@ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
0xf3, 0xf7, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3,
0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xca80,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11,
0xf5, 0xfd, 0x11, 0x11,
@ -82,6 +96,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
0xf7, 0xff, 0x55, 0x55 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
0xfff3, 0xfff7, 0x66, 0x66 };
#if defined (FP16_SUPPORTED)
VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb00, 0xc900,
0x4b4d, 0x4b4d,
0xca80, 0xc880,
0x4b4d, 0x4b4d };
#endif
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000,
0x42073333, 0x42073333 };