[PATCH 6/17][ARM] Add data processing intrinsics for float16_t.

gcc/ 2016-09-23 Matthew Wahab <matthew.wahab@arm.com> * config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and V4HF modes. (arm_evpc_neon_vtrn): Likewise. (arm_evpc_neon_vrev): Likewise. (arm_evpc_neon_vext): Likewise. * config/arm/arm_neon.h (vbsl_f16): New. (vbslq_f16): New. (vdup_n_f16): New. (vdupq_n_f16): New. (vdup_lane_f16): New. (vdupq_lane_f16): New. (vext_f16): New. (vextq_f16): New. (vmov_n_f16): New. (vmovq_n_f16): New. (vrev64_f16): New. (vrev64q_f16): New. (vtrn_f16): New. (vtrnq_f16): New. (vuzp_f16): New. (vuzpq_f16): New. (vzip_f16): New. (vzipq_f16): New. * config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants). (vdup_lane): New (v8hf, v4hf variants). (vext): New (v8hf, v4hf variants). (vbsl): New (v8hf, v4hf variants). * config/arm/iterators.md (VDQWH): New. (VH): New. (V_double_vector_mode): Add V8HF and V4HF. Fix white-space. (Scalar_mul_8_16): Fix white-space. (Is_d_reg): Add V4HF and V8HF. * config/arm/neon.md (neon_vdup_lane<mode>_internal): New. (neon_vdup_lane<mode>): New. (neon_vtrn<mode>_internal): Replace VDQW with VDQWH. (*neon_vtrn<mode>_insn): Likewise. (neon_vzip<mode>_internal): Likewise. Also fix white-space. (*neon_vzip<mode>_insn): Likewise (neon_vuzp<mode>_internal): Likewise. (*neon_vuzp<mode>_insn): Likewise * config/arm/vec-common.md (vec_perm_const<mode>): New. testsuite/ 2016-09-23 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (FP16_SUPPORTED): New (expected-hfloat-16x4): Make conditional on __fp16 support. (expected-hfloat-16x8): Likewise. (vdup_n_f16): Disable for non-AArch64 targets. * gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests, conditional on FP16_SUPPORTED. * gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support for testing __fp16. * gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests, conditional on FP16_SUPPORTED. * gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise. From-SVN: r240404
2016-09-23 09:23:01 +00:00 · 2016-09-23 09:23:01 +00:00 · b1a970a5cc
commit b1a970a5cc
parent 50df9464b8
18 changed files with 650 additions and 49 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,47 @@
+2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>
+
+	* config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and
+	V4HF modes.
+	(arm_evpc_neon_vtrn): Likewise.
+	(arm_evpc_neon_vrev): Likewise.
+	(arm_evpc_neon_vext): Likewise.
+	* config/arm/arm_neon.h (vbsl_f16): New.
+	(vbslq_f16): New.
+	(vdup_n_f16): New.
+	(vdupq_n_f16): New.
+	(vdup_lane_f16): New.
+	(vdupq_lane_f16): New.
+	(vext_f16): New.
+	(vextq_f16): New.
+	(vmov_n_f16): New.
+	(vmovq_n_f16): New.
+	(vrev64_f16): New.
+	(vrev64q_f16): New.
+	(vtrn_f16): New.
+	(vtrnq_f16): New.
+	(vuzp_f16): New.
+	(vuzpq_f16): New.
+	(vzip_f16): New.
+	(vzipq_f16): New.
+	* config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants).
+	(vdup_lane): New (v8hf, v4hf variants).
+	(vext): New (v8hf, v4hf variants).
+	(vbsl): New (v8hf, v4hf variants).
+	* config/arm/iterators.md (VDQWH): New.
+	(VH): New.
+	(V_double_vector_mode): Add V8HF and V4HF.  Fix white-space.
+	(Scalar_mul_8_16): Fix white-space.
+	(Is_d_reg): Add V4HF and V8HF.
+	* config/arm/neon.md (neon_vdup_lane<mode>_internal): New.
+	(neon_vdup_lane<mode>): New.
+	(neon_vtrn<mode>_internal): Replace VDQW with VDQWH.
+	(*neon_vtrn<mode>_insn): Likewise.
+	(neon_vzip<mode>_internal): Likewise. Also fix white-space.
+	(*neon_vzip<mode>_insn): Likewise
+	(neon_vuzp<mode>_internal): Likewise.
+	(*neon_vuzp<mode>_insn): Likewise
+	* config/arm/vec-common.md (vec_perm_const<mode>): New.
+
 2016-09-23  Jiong Wang  <jiong.wang@arm.com>
 	    Matthew Wahab  <matthew.wahab@arm.com>

--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@ -28576,6 +28576,8 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
    case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
    case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
    case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
+    case V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
+    case V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
    case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
    case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
    case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
@ -28649,6 +28651,8 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
    case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
    case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
    case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
+    case V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
+    case V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
    case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
    case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
    case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
@ -28701,6 +28705,8 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
 	case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
 	case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
 	case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
+	case V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
+	case V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
 	default:
 	  return false;
 	}
@ -28784,6 +28790,8 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
    case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
    case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
    case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
+    case V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
+    case V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
    case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
    case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
    case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
@ -28859,6 +28867,8 @@ arm_evpc_neon_vext (struct expand_vec_perm_d *d)
    case V8HImode: gen = gen_neon_vextv8hi; break;
    case V2SImode: gen = gen_neon_vextv2si; break;
    case V4SImode: gen = gen_neon_vextv4si; break;
+    case V4HFmode: gen = gen_neon_vextv4hf; break;
+    case V8HFmode: gen = gen_neon_vextv8hf; break;
    case V2SFmode: gen = gen_neon_vextv2sf; break;
    case V4SFmode: gen = gen_neon_vextv4sf; break;
    case V2DImode: gen = gen_neon_vextv2di; break;
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@ -14842,6 +14842,181 @@ vmull_high_p64 (poly64x2_t __a, poly64x2_t __b)

 #pragma GCC pop_options

+  /* Half-precision data processing intrinsics.  */
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
+{
+  return __builtin_neon_vbslv4hf ((int16x4_t)__a, __b, __c);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
+{
+  return __builtin_neon_vbslv8hf ((int16x8_t)__a, __b, __c);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vdup_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vdupq_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vdup_lane_f16 (float16x4_t __a, const int __b)
+{
+  return __builtin_neon_vdup_lanev4hf (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_f16 (float16x4_t __a, const int __b)
+{
+  return __builtin_neon_vdup_lanev8hf (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vext_f16 (float16x4_t __a, float16x4_t __b, const int __c)
+{
+  return __builtin_neon_vextv4hf (__a, __b, __c);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vextq_f16 (float16x8_t __a, float16x8_t __b, const int __c)
+{
+  return __builtin_neon_vextv8hf (__a, __b, __c);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vmov_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vmovq_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrev64_f16 (float16x4_t __a)
+{
+  return (float16x4_t)__builtin_shuffle (__a, (uint16x4_t){ 3, 2, 1, 0 });
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrev64q_f16 (float16x8_t __a)
+{
+  return
+    (float16x8_t)__builtin_shuffle (__a,
+				    (uint16x8_t){ 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vtrn_f16 (float16x4_t __a, float16x4_t __b)
+{
+  float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 5, 3, 7 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_f16 (float16x8_t __a, float16x8_t __b)
+{
+  float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vuzp_f16 (float16x4_t __a, float16x4_t __b)
+{
+  float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 3, 5, 7 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vuzpq_f16 (float16x8_t __a, float16x8_t __b)
+{
+  float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+				   { 5, 7, 1, 3, 13, 15, 9, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+				   { 4, 6, 0, 2, 12, 14, 8, 10 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vzip_f16 (float16x4_t __a, float16x4_t __b)
+{
+  float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 2, 6, 3, 7 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vzipq_f16 (float16x8_t __a, float16x8_t __b)
+{
+  float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+				   { 10, 2, 11, 3, 8, 0, 9, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+				   { 14, 6, 15, 7, 12, 4, 13, 5 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
+  return __rv;
+}
+
+#endif
+
 #ifdef __cplusplus
 }
 #endif
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@ -166,8 +166,10 @@ VAR10 (SETLANE, vset_lane,
 VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
 VAR10 (UNOP, vdup_n,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (UNOP, vdup_n, v8hf, v4hf)
 VAR10 (GETLANE, vdup_lane,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (GETLANE, vdup_lane, v8hf, v4hf)
 VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di)
 VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
 VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
@ -197,6 +199,7 @@ VAR2 (MAC_N, vmlslu_n, v4hi, v2si)
 VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si)
 VAR10 (SETLANE, vext,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (SETLANE, vext, v8hf, v4hf)
 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi)
 VAR2 (UNOP, vrev16, v8qi, v16qi)
@ -208,6 +211,7 @@ VAR1 (UNOP, vcvtv4sf, v4hf)
 VAR1 (UNOP, vcvtv4hf, v4sf)
 VAR10 (TERNOP, vbsl,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (TERNOP, vbsl, v8hf, v4hf)
 VAR2 (UNOP, copysignf, v2sf, v4sf)
 VAR2 (UNOP, vrintn, v2sf, v4sf)
 VAR2 (UNOP, vrinta, v2sf, v4sf)
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@ -119,6 +119,10 @@
 ;; All supported vector modes (except those with 64-bit integer elements).
 (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])

+;; All supported vector modes including 16-bit float modes.
+(define_mode_iterator VDQWH [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF
+			     V8HF V4HF])
+
 ;; Supported integer vector modes (not 64 bit elements).
 (define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI])

@ -174,6 +178,9 @@
 ;; Modes with 8-bit, 16-bit and 32-bit elements.
 (define_mode_iterator VU [V16QI V8HI V4SI])

+;; Vector modes for 16-bit floating-point support.
+(define_mode_iterator VH [V8HF V4HF])
+
 ;; Iterators used for fixed-point support.
 (define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA])

@ -475,9 +482,10 @@
 ;; Used for neon_vdup_lane, where the second operand is double-sized
 ;; even when the first one is quad.
 (define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI")
-                                        (V4SI "V2SI") (V4SF "V2SF")
-                                        (V8QI "V8QI") (V4HI "V4HI")
-                                        (V2SI "V2SI") (V2SF "V2SF")])
+					(V4SI "V2SI") (V4SF "V2SF")
+					(V8QI "V8QI") (V4HI "V4HI")
+					(V2SI "V2SI") (V2SF "V2SF")
+					(V8HF "V4HF") (V4HF "V4HF")])

 ;; Mode of result of comparison operations (and bit-select operand 1).
 (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
@ -582,17 +590,17 @@
                 (DI "false") (V2DI "false")])

 (define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true")
-                   (V4HI "true") (V8HI "true")
-                   (V2SI "false") (V4SI "false")
-                   (V2SF "false") (V4SF "false")
-                   (DI "false") (V2DI "false")])
-
+				   (V4HI "true") (V8HI "true")
+				   (V2SI "false") (V4SI "false")
+				   (V2SF "false") (V4SF "false")
+				   (DI "false") (V2DI "false")])

 (define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false")
                            (V4HI "true") (V8HI  "false")
                            (V2SI "true") (V4SI  "false")
                            (V2SF "true") (V4SF  "false")
-                            (DI   "true") (V2DI  "false")])
+                            (DI   "true") (V2DI  "false")
+			    (V4HF "true") (V8HF  "false")])

 (define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
 				 (V4HF "4") (V8HF "8")
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@ -3045,6 +3045,28 @@ if (BYTES_BIG_ENDIAN)
  [(set_attr "type" "neon_dup<q>")]
 )

+(define_insn "neon_vdup_lane<mode>_internal"
+ [(set (match_operand:VH 0 "s_register_operand" "=w")
+   (vec_duplicate:VH
+    (vec_select:<V_elem>
+     (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
+     (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+ "TARGET_NEON && TARGET_FP16"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+  if (<Is_d_reg>)
+    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
+  else
+    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
+}
+  [(set_attr "type" "neon_dup<q>")]
+)
+
 (define_expand "neon_vdup_lane<mode>"
  [(match_operand:VDQW 0 "s_register_operand" "=w")
   (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
@ -3064,6 +3086,25 @@ if (BYTES_BIG_ENDIAN)
    DONE;
 })

+(define_expand "neon_vdup_lane<mode>"
+  [(match_operand:VH 0 "s_register_operand")
+   (match_operand:<V_double_vector_mode> 1 "s_register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_NEON && TARGET_FP16"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      unsigned int elt = INTVAL (operands[2]);
+      unsigned int reg_nelts
+	= 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
+      elt ^= reg_nelts - 1;
+      operands[2] = GEN_INT (elt);
+    }
+  emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
+						operands[2]));
+  DONE;
+})
+
 ; Scalar index is ignored, since only zero is valid here.
 (define_expand "neon_vdup_lanedi"
  [(match_operand:DI 0 "s_register_operand" "=w")
@ -4281,25 +4322,25 @@ if (BYTES_BIG_ENDIAN)

 (define_expand "neon_vtrn<mode>_internal"
  [(parallel
-    [(set (match_operand:VDQW 0 "s_register_operand" "")
-	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
-			(match_operand:VDQW 2 "s_register_operand" "")]
+    [(set (match_operand:VDQWH 0 "s_register_operand")
+	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+			 (match_operand:VDQWH 2 "s_register_operand")]
 	   UNSPEC_VTRN1))
-     (set (match_operand:VDQW 3 "s_register_operand" "")
-          (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
+     (set (match_operand:VDQWH 3 "s_register_operand")
+	  (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
  "TARGET_NEON"
  ""
 )

 ;; Note: Different operand numbering to handle tied registers correctly.
 (define_insn "*neon_vtrn<mode>_insn"
-  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 3 "s_register_operand" "2")]
-                     UNSPEC_VTRN1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
-         (unspec:VDQW [(match_dup 1) (match_dup 3)]
-                     UNSPEC_VTRN2))]
+  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+		       (match_operand:VDQWH 3 "s_register_operand" "2")]
+	 UNSPEC_VTRN1))
+   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
+	 UNSPEC_VTRN2))]
  "TARGET_NEON"
  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
  [(set_attr "type" "neon_permute<q>")]
@ -4307,25 +4348,25 @@ if (BYTES_BIG_ENDIAN)

 (define_expand "neon_vzip<mode>_internal"
  [(parallel
-    [(set (match_operand:VDQW 0 "s_register_operand" "")
-	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
-	  	        (match_operand:VDQW 2 "s_register_operand" "")]
-		       UNSPEC_VZIP1))
-    (set (match_operand:VDQW 3 "s_register_operand" "")
-	 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
+    [(set (match_operand:VDQWH 0 "s_register_operand")
+	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+			 (match_operand:VDQWH 2 "s_register_operand")]
+	   UNSPEC_VZIP1))
+    (set (match_operand:VDQWH 3 "s_register_operand")
+	 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
  "TARGET_NEON"
  ""
 )

 ;; Note: Different operand numbering to handle tied registers correctly.
 (define_insn "*neon_vzip<mode>_insn"
-  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 3 "s_register_operand" "2")]
-                     UNSPEC_VZIP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_dup 1) (match_dup 3)]
-                     UNSPEC_VZIP2))]
+  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+		       (match_operand:VDQWH 3 "s_register_operand" "2")]
+	 UNSPEC_VZIP1))
+   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
+	 UNSPEC_VZIP2))]
  "TARGET_NEON"
  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
  [(set_attr "type" "neon_zip<q>")]
@ -4333,25 +4374,25 @@ if (BYTES_BIG_ENDIAN)

 (define_expand "neon_vuzp<mode>_internal"
  [(parallel
-    [(set (match_operand:VDQW 0 "s_register_operand" "")
-	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
-			(match_operand:VDQW 2 "s_register_operand" "")]
+    [(set (match_operand:VDQWH 0 "s_register_operand")
+	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+			(match_operand:VDQWH 2 "s_register_operand")]
 	   UNSPEC_VUZP1))
-     (set (match_operand:VDQW 3 "s_register_operand" "")
-	  (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
+     (set (match_operand:VDQWH 3 "s_register_operand" "")
+	  (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
  "TARGET_NEON"
  ""
 )

 ;; Note: Different operand numbering to handle tied registers correctly.
 (define_insn "*neon_vuzp<mode>_insn"
-  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 3 "s_register_operand" "2")]
-                     UNSPEC_VUZP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_dup 1) (match_dup 3)]
-                     UNSPEC_VUZP2))]
+  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+		       (match_operand:VDQWH 3 "s_register_operand" "2")]
+	 UNSPEC_VUZP1))
+   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
+	 UNSPEC_VUZP2))]
  "TARGET_NEON"
  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
  [(set_attr "type" "neon_zip<q>")]
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@ -124,6 +124,20 @@
    FAIL;
 })

+(define_expand "vec_perm_const<mode>"
+  [(match_operand:VH 0 "s_register_operand")
+   (match_operand:VH 1 "s_register_operand")
+   (match_operand:VH 2 "s_register_operand")
+   (match_operand:<V_cmp_result> 3)]
+  "TARGET_NEON"
+{
+  if (arm_expand_vec_perm_const (operands[0], operands[1],
+				 operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (define_expand "vec_perm<mode>"
  [(match_operand:VE 0 "s_register_operand" "")
   (match_operand:VE 1 "s_register_operand" "")
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,23 @@
+2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>
+
+	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+	(FP16_SUPPORTED): New
+	(expected-hfloat-16x4): Make conditional on __fp16 support.
+	(expected-hfloat-16x8): Likewise.
+	(vdup_n_f16): Disable for non-AArch64 targets.
+	* gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests,
+	conditional on FP16_SUPPORTED.
+	* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support
+	for testing __fp16.
+	* gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests,
+	conditional on FP16_SUPPORTED.
+	* gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.
+
 2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>

 	* gcc.target/arm/short-vfp-1.c: New.
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@ -16,6 +16,15 @@ extern void *memset(void *, int, size_t);
 extern void *memcpy(void *, const void *, size_t);
 extern size_t strlen(const char *);

+/* Helper macro to select FP16 tests.  */
+#if (!defined (__aarch64__)						\
+     && (defined (__ARM_FP16_FORMAT_IEEE)				\
+	 || defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
+#define FP16_SUPPORTED (1)
+#else
+#undef FP16_SUPPORTED
+#endif
+
 /* Various string construction helpers.  */

 /*
@ -511,7 +520,9 @@ static void clean_results (void)
 /* Helpers to initialize vectors.  */
 #define VDUP(VAR, Q, T1, T2, W, N, V)			\
  VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
-#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#if (defined (__aarch64__)						\
+     && (defined (__ARM_FP16_FORMAT_IEEE)				\
+	 || defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
 /* Work around that there is no vdup_n_f16 intrinsic.  */
 #define vdup_n_f16(VAL)		\
  __extension__			\
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
 					0xf7, 0xf7, 0xf7, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc09, 0xcb89,
+					       0xcb09, 0xca89 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 };
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					0xf6, 0xf6, 0xf6, 0xf6,
@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
 					 0xf7, 0xf7, 0xf7, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
 					 0xfff4, 0xfff4, 0xfff6, 0xfff6 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc09, 0xcb89,
+					       0xcb09, 0xca89,
+					       0xca09, 0xc989,
+					       0xc909, 0xc889 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001,
 					   0xc1600001, 0xc1500001 };

@ -66,6 +76,10 @@ void exec_vbsl (void)
  clean_results ();

  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD(vector, buffer, , float, f, 16, 4);
+  VLOAD(vector, buffer, q, float, f, 16, 8);
+#endif
  VLOAD(vector, buffer, , float, f, 32, 2);
  VLOAD(vector, buffer, q, float, f, 32, 4);

@ -80,6 +94,9 @@ void exec_vbsl (void)
  VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
  VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
  VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3);
+#if defined (FP16_SUPPORTED)
+  VDUP(vector2, , float, f, 16, 4, -2.4f);   /* -2.4f is 0xC0CD.  */
+#endif
  VDUP(vector2, , float, f, 32, 2, -30.3f);
  VDUP(vector2, , poly, p, 8, 8, 0xF3);
  VDUP(vector2, , poly, p, 16, 4, 0xFFF2);
@ -94,6 +111,9 @@ void exec_vbsl (void)
  VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3);
  VDUP(vector2, q, poly, p, 8, 16, 0xF3);
  VDUP(vector2, q, poly, p, 16, 8, 0xFFF2);
+#if defined (FP16_SUPPORTED)
+  VDUP(vector2, q, float, f, 16, 8, -2.4f);
+#endif
  VDUP(vector2, q, float, f, 32, 4, -30.4f);

  VDUP(vector_first, , uint, u, 8, 8, 0xF4);
@ -111,10 +131,18 @@ void exec_vbsl (void)
  TEST_VBSL(uint, , poly, p, 16, 4);
  TEST_VBSL(uint, q, poly, p, 8, 16);
  TEST_VBSL(uint, q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+  TEST_VBSL(uint, , float, f, 16, 4);
+  TEST_VBSL(uint, q, float, f, 16, 8);
+#endif
  TEST_VBSL(uint, , float, f, 32, 2);
  TEST_VBSL(uint, q, float, f, 32, 4);

+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS (TEST_MSG, "");
+#else
  CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
 }

 int main (void)
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
 					 0xf0, 0xf0, 0xf0, 0xf0 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00,
+						0xcc00, 0xcc00 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
 					 0xf0, 0xf0, 0xf0, 0xf0,
@ -46,6 +50,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
 					  0xf0, 0xf0, 0xf0, 0xf0 };
 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
 					  0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00,
+						0xcc00, 0xcc00,
+						0xcc00, 0xcc00,
+						0xcc00, 0xcc00 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
 					    0xc1800000, 0xc1800000 };

@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
 					 0xf1, 0xf1, 0xf1, 0xf1 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80,
+						0xcb80, 0xcb80 };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
 					 0xf1, 0xf1, 0xf1, 0xf1,
@ -90,6 +104,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
 					  0xf1, 0xf1, 0xf1, 0xf1 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
 					  0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80,
+						0xcb80, 0xcb80,
+						0xcb80, 0xcb80,
+						0xcb80, 0xcb80 };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
 					    0xc1700000, 0xc1700000 };

@ -107,6 +127,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					 0xf2, 0xf2, 0xf2, 0xf2 };
 VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00,
+						0xcb00, 0xcb00 };
+#endif
 VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
 VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					 0xf2, 0xf2, 0xf2, 0xf2,
@ -134,6 +158,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					  0xf2, 0xf2, 0xf2, 0xf2 };
 VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
 					  0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00,
+						0xcb00, 0xcb00,
+						0xcb00, 0xcb00,
+						0xcb00, 0xcb00 };
+#endif
 VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
 					    0xc1600000, 0xc1600000 };

@ -171,6 +201,9 @@ void exec_vdup_vmov (void)
    TEST_VDUP(, uint, u, 64, 1);
    TEST_VDUP(, poly, p, 8, 8);
    TEST_VDUP(, poly, p, 16, 4);
+#if defined (FP16_SUPPORTED)
+    TEST_VDUP(, float, f, 16, 4);
+#endif
    TEST_VDUP(, float, f, 32, 2);

    TEST_VDUP(q, int, s, 8, 16);
@ -183,8 +216,26 @@ void exec_vdup_vmov (void)
    TEST_VDUP(q, uint, u, 64, 2);
    TEST_VDUP(q, poly, p, 8, 16);
    TEST_VDUP(q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+    TEST_VDUP(q, float, f, 16, 8);
+#endif
    TEST_VDUP(q, float, f, 32, 4);

+#if defined (FP16_SUPPORTED)
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+#else
    switch (i) {
    case 0:
      CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
@ -198,6 +249,7 @@ void exec_vdup_vmov (void)
    default:
      abort();
    }
+#endif
  }

  /* Do the same tests with vmov. Use the same expected results.  */
@ -216,6 +268,9 @@ void exec_vdup_vmov (void)
    TEST_VMOV(, uint, u, 64, 1);
    TEST_VMOV(, poly, p, 8, 8);
    TEST_VMOV(, poly, p, 16, 4);
+#if defined (FP16_SUPPORTED)
+    TEST_VMOV(, float, f, 16, 4);
+#endif
    TEST_VMOV(, float, f, 32, 2);

    TEST_VMOV(q, int, s, 8, 16);
@ -228,8 +283,26 @@ void exec_vdup_vmov (void)
    TEST_VMOV(q, uint, u, 64, 2);
    TEST_VMOV(q, poly, p, 8, 16);
    TEST_VMOV(q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+    TEST_VMOV(q, float, f, 16, 8);
+#endif
    TEST_VMOV(q, float, f, 32, 4);

+#if defined (FP16_SUPPORTED)
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+#else
    switch (i) {
    case 0:
      CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
@ -243,6 +316,8 @@ void exec_vdup_vmov (void)
    default:
      abort();
    }
+#endif
+
  }
 }

--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
@ -17,6 +17,10 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
 					0xf7, 0xf7, 0xf7, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xca80, 0xca80,
+					       0xca80, 0xca80 };
+#endif
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					0xf2, 0xf2, 0xf2, 0xf2,
 					0xf2, 0xf2, 0xf2, 0xf2,
@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
 					 0xf5, 0xf5, 0xf5, 0xf5 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
 					 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80,
+					       0xca80, 0xca80,
+					       0xca80, 0xca80,
+					       0xca80, 0xca80 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
 					   0xc1700000, 0xc1700000 };

@ -63,6 +73,9 @@ void exec_vdup_lane (void)
  clean_results ();

  TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD(vector, buffer, , float, f, 16, 4);
+#endif
  VLOAD(vector, buffer, , float, f, 32, 2);

  /* Choose lane arbitrarily.  */
@ -76,6 +89,9 @@ void exec_vdup_lane (void)
  TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0);
  TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7);
  TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3);
+#if defined (FP16_SUPPORTED)
+  TEST_VDUP_LANE(, float, f, 16, 4, 4, 3);
+#endif
  TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);

  TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
@ -88,9 +104,16 @@ void exec_vdup_lane (void)
  TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0);
  TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5);
  TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1);
+#if defined (FP16_SUPPORTED)
+  TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3);
+#endif
  TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);

+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS (TEST_MSG, "");
+#else
  CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
 }

 int main (void)
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55,
 					0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
+					       0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 };
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11,
 					0x11, 0x11, 0x11, 0x11,
@ -39,6 +43,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
 					 0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66,
 					 0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xc880, 0x4b4d,
+					       0x4b4d, 0x4b4d,
+					       0x4b4d, 0x4b4d,
+					       0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd,
 					   0x4204cccd, 0x4204cccd };

@ -60,6 +70,10 @@ void exec_vext (void)
  clean_results ();

  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+#ifdef FP16_SUPPORTED
+  VLOAD(vector1, buffer, , float, f, 16, 4);
+  VLOAD(vector1, buffer, q, float, f, 16, 8);
+#endif
  VLOAD(vector1, buffer, , float, f, 32, 2);
  VLOAD(vector1, buffer, q, float, f, 32, 4);

@ -74,6 +88,9 @@ void exec_vext (void)
  VDUP(vector2, , uint, u, 64, 1, 0x88);
  VDUP(vector2, , poly, p, 8, 8, 0x55);
  VDUP(vector2, , poly, p, 16, 4, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, , float, f, 16, 4, 14.6f);   /* 14.6f is 0x4b4d.  */
+#endif
  VDUP(vector2, , float, f, 32, 2, 33.6f);

  VDUP(vector2, q, int, s, 8, 16, 0x11);
@ -86,6 +103,9 @@ void exec_vext (void)
  VDUP(vector2, q, uint, u, 64, 2, 0x88);
  VDUP(vector2, q, poly, p, 8, 16, 0x55);
  VDUP(vector2, q, poly, p, 16, 8, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, q, float, f, 16, 8, 14.6f);
+#endif
  VDUP(vector2, q, float, f, 32, 4, 33.2f);

  /* Choose arbitrary extract offsets.  */
@ -99,6 +119,9 @@ void exec_vext (void)
  TEST_VEXT(, uint, u, 64, 1, 0);
  TEST_VEXT(, poly, p, 8, 8, 6);
  TEST_VEXT(, poly, p, 16, 4, 2);
+#if defined (FP16_SUPPORTED)
+  TEST_VEXT(, float, f, 16, 4, 2);
+#endif
  TEST_VEXT(, float, f, 32, 2, 1);

  TEST_VEXT(q, int, s, 8, 16, 14);
@ -111,9 +134,16 @@ void exec_vext (void)
  TEST_VEXT(q, uint, u, 64, 2, 1);
  TEST_VEXT(q, poly, p, 8, 16, 12);
  TEST_VEXT(q, poly, p, 16, 8, 6);
+#if defined (FP16_SUPPORTED)
+  TEST_VEXT(q, float, f, 16, 8, 7);
+#endif
  TEST_VEXT(q, float, f, 32, 4, 3);

+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS (TEST_MSG, "");
+#else
  CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
 }

 int main (void)
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c
@ -63,6 +63,10 @@ VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 };
 VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
 					       0xf3, 0xf2, 0xf1, 0xf0 };
 VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected_vrev64, hfloat, 16, 4) [] = { 0xca80, 0xcb00,
+						      0xcb80, 0xcc00 };
+#endif
 VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 };
 VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
 					       0xf3, 0xf2, 0xf1, 0xf0,
@ -86,6 +90,12 @@ VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
 						0xfb, 0xfa, 0xf9, 0xf8 };
 VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
 						0xfff7, 0xfff6, 0xfff5, 0xfff4 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected_vrev64, hfloat, 16, 8) [] = { 0xca80, 0xcb00,
+						      0xcb80, 0xcc00,
+						      0xc880, 0xc900,
+						      0xc980, 0xca00 };
+#endif
 VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000,
 						  0xc1500000, 0xc1600000 };

@ -104,6 +114,10 @@ void exec_vrev (void)

  /* Initialize input "vector" from "buffer".  */
  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD (vector, buffer, , float, f, 16, 4);
+  VLOAD (vector, buffer, q, float, f, 16, 8);
+#endif
  VLOAD(vector, buffer, , float, f, 32, 2);
  VLOAD(vector, buffer, q, float, f, 32, 4);

@ -187,6 +201,12 @@ void exec_vrev (void)
  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, "");
  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, "");

+#if defined (FP16_SUPPORTED)
+  TEST_VREV (, float, f, 16, 4, 64);
+  TEST_VREV (q, float, f, 16, 8, 64);
+  CHECK_FP(TEST_MSG, float, 16, 4, PRIx32, expected_vrev64, "");
+  CHECK_FP(TEST_MSG, float, 16, 8, PRIx32, expected_vrev64, "");
+#endif
  TEST_VREV(, float, f, 32, 2, 64);
  TEST_VREV(q, float, f, 32, 4, 64);
  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, "");
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc
@ -53,9 +53,17 @@ void FNNAME (INSN_NAME) (void)
  DECL_VSHUFFLE(float, 32, 4)

  DECL_ALL_VSHUFFLE();
+#if defined (FP16_SUPPORTED)
+  DECL_VSHUFFLE (float, 16, 4);
+  DECL_VSHUFFLE (float, 16, 8);
+#endif

  /* Initialize input "vector" from "buffer".  */
  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD (vector1, buffer, , float, f, 16, 4);
+  VLOAD (vector1, buffer, q, float, f, 16, 8);
+#endif
  VLOAD(vector1, buffer, , float, f, 32, 2);
  VLOAD(vector1, buffer, q, float, f, 32, 4);

@ -68,6 +76,9 @@ void FNNAME (INSN_NAME) (void)
  VDUP(vector2, , uint, u, 32, 2, 0x77);
  VDUP(vector2, , poly, p, 8, 8, 0x55);
  VDUP(vector2, , poly, p, 16, 4, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, , float, f, 16, 4, 14.6f);   /* 14.6f is 0x4b4d.  */
+#endif
  VDUP(vector2, , float, f, 32, 2, 33.6f);

  VDUP(vector2, q, int, s, 8, 16, 0x11);
@ -78,8 +89,11 @@ void FNNAME (INSN_NAME) (void)
  VDUP(vector2, q, uint, u, 32, 4, 0x77);
  VDUP(vector2, q, poly, p, 8, 16, 0x55);
  VDUP(vector2, q, poly, p, 16, 8, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, q, float, f, 16, 8, 14.6f);
+#endif
  VDUP(vector2, q, float, f, 32, 4, 33.8f);
-  
+
 #define TEST_ALL_VSHUFFLE(INSN)				\
  TEST_VSHUFFLE(INSN, , int, s, 8, 8);			\
  TEST_VSHUFFLE(INSN, , int, s, 16, 4);			\
@ -100,6 +114,10 @@ void FNNAME (INSN_NAME) (void)
  TEST_VSHUFFLE(INSN, q, poly, p, 16, 8);		\
  TEST_VSHUFFLE(INSN, q, float, f, 32, 4)

+#define TEST_VSHUFFLE_FP16(INSN)		\
+  TEST_VSHUFFLE(INSN, , float, f, 16, 4);	\
+  TEST_VSHUFFLE(INSN, q, float, f, 16, 8);
+
 #define TEST_ALL_EXTRA_CHUNKS()			\
  TEST_EXTRA_CHUNK(int, 8, 8, 1);		\
  TEST_EXTRA_CHUNK(int, 16, 4, 1);		\
@ -143,17 +161,37 @@ void FNNAME (INSN_NAME) (void)
    CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);		\
    CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);		\
    CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
-  }									\
+  }
+
+#define CHECK_RESULTS_VSHUFFLE_FP16(test_name,EXPECTED,comment)		\
+  {									\
+    CHECK_FP (test_name, float, 16, 4, PRIx16, EXPECTED, comment);	\
+    CHECK_FP (test_name, float, 16, 8, PRIx16, EXPECTED, comment);	\
+  }

  clean_results ();

  /* Execute the tests.  */
  TEST_ALL_VSHUFFLE(INSN_NAME);
+#if defined (FP16_SUPPORTED)
+  TEST_VSHUFFLE_FP16 (INSN_NAME);
+#endif

  CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)");
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected0, "(chunk 0)");
+#endif

  TEST_ALL_EXTRA_CHUNKS();
+#if defined (FP16_SUPPORTED)
+  TEST_EXTRA_CHUNK (float, 16, 4, 1);
+  TEST_EXTRA_CHUNK (float, 16, 8, 1);
+#endif
+
  CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)");
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected1, "(chunk 1)");
+#endif
 }

 int main (void)
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c
@ -15,6 +15,10 @@ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
 VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55,
 					 0xf2, 0xf3, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11,
 					 0xf2, 0xf3, 0x11, 0x11,
@ -36,6 +40,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55,
 					  0xf6, 0xf7, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66,
 					  0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
+						0x4b4d, 0x4b4d,
+						0xcb00, 0xca80,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
 					    0x42073333, 0x42073333 };

@ -51,6 +61,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55,
 					 0xf6, 0xf7, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11,
 					 0xfa, 0xfb, 0x11, 0x11,
@ -72,6 +86,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55,
 					  0xfe, 0xff, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66,
 					  0xfff6, 0xfff7, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xca00, 0xc980,
+						0x4b4d, 0x4b4d,
+						0xc900, 0xc880,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1600000, 0xc1500000,
 					    0x42073333, 0x42073333 };

--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					 0xf4, 0xf5, 0xf6, 0xf7 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1,
 					  0xfff2, 0xfff3 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
+						0xcb00, 0xca80 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					 0xf4, 0xf5, 0xf6, 0xf7,
@ -48,6 +52,12 @@ VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1,
 					  0xfff2, 0xfff3,
 					  0xfff4, 0xfff5,
 					  0xfff6, 0xfff7 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
+						0xcb00, 0xca80,
+						0xca00, 0xc980,
+						0xc900, 0xc880 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
 					    0xc1600000, 0xc1500000 };

@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
 					 0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0x4b4d, 0x4b4d,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11,
 					 0x11, 0x11, 0x11, 0x11,
@ -84,6 +98,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
 					  0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
 					  0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0x4b4d, 0x4b4d,
+						0x4b4d, 0x4b4d,
+						0x4b4d, 0x4b4d,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333,
 					    0x42073333, 0x42073333 };

--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c
@ -18,6 +18,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
 					 0xf1, 0xf5, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2,
 					  0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb00,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11,
 					 0xf1, 0xf9, 0x11, 0x11,
@ -41,6 +45,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
 					  0xf3, 0xfb, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
 					  0xfff1, 0xfff5, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xca00,
+						0x4b4d, 0x4b4d,
+						0xcb80, 0xc980,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000,
 					    0x42073333, 0x42073333 };

@ -59,6 +69,10 @@ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
 					 0xf3, 0xf7, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3,
 					  0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xca80,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11,
 					 0xf5, 0xfd, 0x11, 0x11,
@ -82,6 +96,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
 					  0xf7, 0xff, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
 					  0xfff3, 0xfff7, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb00, 0xc900,
+						0x4b4d, 0x4b4d,
+						0xca80, 0xc880,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000,
 					    0x42073333, 0x42073333 };