[ARM] Support ARMv8.2 FP16 simd instructions

gas/ * config/tc-arm.c (N_S_32): New. (N_F_16_32): Likewise. (N_SUF_32): Support N_F16. (N_IF_32): Likewise. (neon_dyadic_misc): Likewise. (do_neon_cmp): Likewise. (do_neon_cmp_inv): Likewise. (do_neon_mul): Likewise. (do_neon_fcmp_absolute): Likewise. (do_neon_step): Likewise. (do_neon_abs_neg): Likewise. (CVT_FLAVOR_VAR): Likewise. (do_neon_cvt_1): Likewise. (do_neon_recip_est): Likewise. (do_vmaxnm): Likewise. (do_vrint_1): Likewise. (neon_check_type): Check architecture support for FP16 extension. (insns): Update comments. * testsuite/gas/arm/armv8-2-fp16-simd.s: New test source. * testsuite/gas/arm/armv8-2-fp16-simd.d: New testcase for arm mode. * testsuite/gas/arm/armv8-2-fp16-simd-thumb.d: Likewise for thumb mode. * testsuite/gas/arm/armv8-2-fp16-simd-warning.d: New rejection test for arm mode. * testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d: Likewise for thumb mode. * testsuite/gas/arm/armv8-2-fp16-simd-warning.l: New expected rejection error file. opcode/ * arm-dis.c (neon_opcodes): Support new FP16 instructions.
2016-03-16 16:11:59 +00:00 · 2016-03-16 16:11:59 +00:00 · cc93330137
parent 6b94a855be
commit cc93330137
10 changed files with 851 additions and 46 deletions
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@ -1,3 +1,33 @@
+2016-03-16  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/tc-arm.c (N_S_32): New.
+	(N_F_16_32): Likewise.
+	(N_SUF_32): Support N_F16.
+	(N_IF_32): Likewise.
+	(neon_dyadic_misc): Likewise.
+	(do_neon_cmp): Likewise.
+	(do_neon_cmp_inv): Likewise.
+	(do_neon_mul): Likewise.
+	(do_neon_fcmp_absolute): Likewise.
+	(do_neon_step): Likewise.
+	(do_neon_abs_neg): Likewise.
+	(CVT_FLAVOR_VAR): Likewise.
+	(do_neon_cvt_1): Likewise.
+	(do_neon_recip_est): Likewise.
+	(do_vmaxnm): Likewise.
+	(do_vrint_1): Likewise.
+	(neon_check_type): Check architecture support for FP16 extension.
+	(insns): Update comments.
+	* testsuite/gas/arm/armv8-2-fp16-simd.s: New test source.
+	* testsuite/gas/arm/armv8-2-fp16-simd.d: New testcase for arm mode.
+	* testsuite/gas/arm/armv8-2-fp16-simd-thumb.d: Likewise for thumb mode.
+	* testsuite/gas/arm/armv8-2-fp16-simd-warning.d: New rejection test for
+	arm mode.
+	* testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d: Likewise for
+	thumb mode.
+	* testsuite/gas/arm/armv8-2-fp16-simd-warning.l: New expected rejection
+	error file.
+
 2016-03-16  Nick Clifton  <nickc@redhat.com>

 	* read.c (emit_expr_with_reloc): Add code check a bignum with
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@ -13443,9 +13443,11 @@ enum neon_type_mask
 #define N_SU_ALL   (N_S8 | N_S16 | N_S32 | N_S64 | N_U8 | N_U16 | N_U32 | N_U64)
 #define N_SU_32    (N_S8 | N_S16 | N_S32 | N_U8 | N_U16 | N_U32)
 #define N_SU_16_64 (N_S16 | N_S32 | N_S64 | N_U16 | N_U32 | N_U64)
-#define N_SUF_32   (N_SU_32 | N_F32)
+#define N_S_32     (N_S8 | N_S16 | N_S32)
+#define N_F_16_32  (N_F16 | N_F32)
+#define N_SUF_32   (N_SU_32 | N_F_16_32)
 #define N_I_ALL    (N_I8 | N_I16 | N_I32 | N_I64)
-#define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F32)
+#define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F16 | N_F32)
 #define N_F_ALL    (N_F16 | N_F32 | N_F64)

 /* Pass this as the first type argument to neon_check_type to ignore types
@ -13915,6 +13917,15 @@ neon_check_type (unsigned els, enum neon_shape ns, ...)
 		  k_type = g_type;
 		  k_size = g_size;
 		  key_allowed = thisarg & ~N_KEY;
+
+		  /* Check architecture constraint on FP16 extension.  */
+		  if (k_size == 16
+		      && k_type == NT_float
+		      && ! ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16))
+		    {
+		      inst.error = _(BAD_FP16);
+		      return badtype;
+		    }
 		}
 	    }
 	  else
@ -14726,7 +14737,7 @@ neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types,
  if (et.type == NT_float)
    {
      NEON_ENCODE (FLOAT, inst);
-      neon_three_same (neon_quad (rs), 0, -1);
+      neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
    }
  else
    {
@ -14887,13 +14898,13 @@ neon_compare (unsigned regtypes, unsigned immtypes, int invert)
 static void
 do_neon_cmp (void)
 {
-  neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, FALSE);
+  neon_compare (N_SUF_32, N_S_32 | N_F_16_32, FALSE);
 }

 static void
 do_neon_cmp_inv (void)
 {
-  neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, TRUE);
+  neon_compare (N_SUF_32, N_S_32 | N_F_16_32, TRUE);
 }

 static void
@ -15021,7 +15032,7 @@ do_neon_mul (void)
  if (inst.operands[2].isscalar)
    do_neon_mac_maybe_scalar ();
  else
-    neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F32 | N_P8, 0);
+    neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F16 | N_F32 | N_P8, 0);
 }

 static void
@ -15082,9 +15093,10 @@ static void
 do_neon_fcmp_absolute (void)
 {
  enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-  neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
+  struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK,
+					    N_F_16_32 | N_KEY);
  /* Size field comes from bit mask.  */
-  neon_three_same (neon_quad (rs), 1, -1);
+  neon_three_same (neon_quad (rs), 1, et.size == 16 ? (int) et.size : -1);
 }

 static void
@ -15098,8 +15110,9 @@ static void
 do_neon_step (void)
 {
  enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-  neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
-  neon_three_same (neon_quad (rs), 0, -1);
+  struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK,
+					    N_F_16_32 | N_KEY);
+  neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
 }

 static void
@ -15115,7 +15128,7 @@ do_neon_abs_neg (void)
    return;

  rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
-  et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_F32 | N_KEY);
+  et = neon_check_type (2, rs, N_EQK, N_S_32 | N_F_16_32 | N_KEY);

  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
@ -15324,6 +15337,10 @@ do_neon_shll (void)
  CVT_VAR (f32_s32, N_F32, N_S32, whole_reg,   "fsltos", "fsitos", NULL)      \
  CVT_VAR (f32_u32, N_F32, N_U32, whole_reg,   "fultos", "fuitos", NULL)      \
  /* Half-precision conversions.  */					      \
+  CVT_VAR (s16_f16, N_S16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL)	      \
+  CVT_VAR (u16_f16, N_U16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL)	      \
+  CVT_VAR (f16_s16, N_F16 | N_KEY, N_S16, whole_reg, NULL, NULL, NULL)	      \
+  CVT_VAR (f16_u16, N_F16 | N_KEY, N_U16, whole_reg, NULL, NULL, NULL)	      \
  CVT_VAR (f32_f16, N_F32, N_F16, whole_reg,   NULL,     NULL,     NULL)      \
  CVT_VAR (f16_f32, N_F16, N_F32, whole_reg,   NULL,     NULL,     NULL)      \
  /* New VCVT instructions introduced by ARMv8.2 fp16 extension.	      \
@ -15556,10 +15573,15 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 					  NS_NULL);
  enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);

+  if (flavour == neon_cvt_flavour_invalid)
+    return;
+
  /* PR11109: Handle round-to-zero for VCVT conversions.  */
  if (mode == neon_cvt_mode_z
      && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_arch_vfp_v2)
-      && (flavour == neon_cvt_flavour_s32_f32
+      && (flavour == neon_cvt_flavour_s16_f16
+	  || flavour == neon_cvt_flavour_u16_f16
+	  || flavour == neon_cvt_flavour_s32_f32
 	  || flavour == neon_cvt_flavour_u32_f32
 	  || flavour == neon_cvt_flavour_s32_f64
 	  || flavour == neon_cvt_flavour_u32_f64)
@ -15598,7 +15620,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
    case NS_QQI:
      {
 	unsigned immbits;
-	unsigned enctab[] = { 0x0000100, 0x1000100, 0x0, 0x1000000 };
+	unsigned enctab[] = {0x0000100, 0x1000100, 0x0, 0x1000000,
+			     0x0000100, 0x1000100, 0x0, 0x1000000};

 	if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
 	  return;
@ -15607,7 +15630,6 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	   integer conversion.  */
 	if (inst.operands[2].present && inst.operands[2].imm == 0)
 	  goto int_encode;
-       immbits = 32 - inst.operands[2].imm;
 	NEON_ENCODE (IMMED, inst);
 	if (flavour != neon_cvt_flavour_invalid)
 	  inst.instruction |= enctab[flavour];
@ -15617,7 +15639,19 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	inst.instruction |= HI1 (inst.operands[1].reg) << 5;
 	inst.instruction |= neon_quad (rs) << 6;
 	inst.instruction |= 1 << 21;
-	inst.instruction |= immbits << 16;
+	if (flavour < neon_cvt_flavour_s16_f16)
+	  {
+	    inst.instruction |= 1 << 21;
+	    immbits = 32 - inst.operands[2].imm;
+	    inst.instruction |= immbits << 16;
+	  }
+	else
+	  {
+	    inst.instruction |= 3 << 20;
+	    immbits = 16 - inst.operands[2].imm;
+	    inst.instruction |= immbits << 16;
+	    inst.instruction &= ~(1 << 9);
+	  }

 	neon_dp_fixup (&inst);
      }
@ -15638,8 +15672,14 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	  inst.instruction |= LOW4 (inst.operands[1].reg);
 	  inst.instruction |= HI1 (inst.operands[1].reg) << 5;
 	  inst.instruction |= neon_quad (rs) << 6;
-	  inst.instruction |= (flavour == neon_cvt_flavour_u32_f32) << 7;
+	  inst.instruction |= (flavour == neon_cvt_flavour_u16_f16
+			       || flavour == neon_cvt_flavour_u32_f32) << 7;
 	  inst.instruction |= mode << 8;
+	  if (flavour == neon_cvt_flavour_u16_f16
+	      || flavour == neon_cvt_flavour_s16_f16)
+	    /* Mask off the original size bits and reencode them.  */
+	    inst.instruction = ((inst.instruction & 0xfff3ffff) | (1 << 18));
+
 	  if (thumb_mode)
 	    inst.instruction |= 0xfc000000;
 	  else
@ -15649,7 +15689,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	{
    int_encode:
 	  {
-	    unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080 };
+	    unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080,
+				  0x100, 0x180, 0x0, 0x080};

 	    NEON_ENCODE (INTEGER, inst);

@ -15664,7 +15705,12 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	    inst.instruction |= LOW4 (inst.operands[1].reg);
 	    inst.instruction |= HI1 (inst.operands[1].reg) << 5;
 	    inst.instruction |= neon_quad (rs) << 6;
-	    inst.instruction |= 2 << 18;
+	    if (flavour >= neon_cvt_flavour_s16_f16
+		&& flavour <= neon_cvt_flavour_f16_u16)
+	      /* Half precision.  */
+	      inst.instruction |= 1 << 18;
+	    else
+	      inst.instruction |= 2 << 18;

 	    neon_dp_fixup (&inst);
 	  }
@ -16487,7 +16533,7 @@ do_neon_recip_est (void)
 {
  enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
  struct neon_type_el et = neon_check_type (2, rs,
-    N_EQK | N_FLT, N_F32 | N_U32 | N_KEY);
+    N_EQK | N_FLT, N_F_16_32 | N_U32 | N_KEY);
  inst.instruction |= (et.type == NT_float) << 8;
  neon_two_same (neon_quad (rs), 1, et.size);
 }
@ -17002,7 +17048,7 @@ do_vmaxnm (void)
  if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL)
    return;

-  neon_dyadic_misc (NT_untyped, N_F32, 0);
+  neon_dyadic_misc (NT_untyped, N_F_16_32, 0);
 }

 static void
@ -17058,7 +17104,7 @@ do_vrint_1 (enum neon_cvt_mode mode)
    {
      /* Neon encodings (or something broken...).  */
      inst.error = NULL;
-      et = neon_check_type (2, rs, N_EQK, N_F32 | N_KEY);
+      et = neon_check_type (2, rs, N_EQK, N_F_16_32 | N_KEY);

      if (et.type == NT_invtype)
 	return;
@ -17074,6 +17120,10 @@ do_vrint_1 (enum neon_cvt_mode mode)
      inst.instruction |= LOW4 (inst.operands[1].reg);
      inst.instruction |= HI1 (inst.operands[1].reg) << 5;
      inst.instruction |= neon_quad (rs) << 6;
+      /* Mask off the original size bits and reencode them.  */
+      inst.instruction = ((inst.instruction & 0xfff3ffff)
+			  | neon_logbits (et.size) << 18);
+
      switch (mode)
 	{
 	case neon_cvt_mode_z: inst.instruction |= 3 << 7; break;
@ -20315,7 +20365,7 @@ static const struct asm_opcode insns[] =
 NUF(vbitq,     1200110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
 NUF(vbif,      1300110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
 NUF(vbifq,     1300110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
-  /* Int and float variants, types S8 S16 S32 U8 U16 U32 F32.  */
+  /* Int and float variants, types S8 S16 S32 U8 U16 U32 F16 F32.  */
 nUF(vabd,      _vabd,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
 nUF(vabdq,     _vabd,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
 nUF(vmax,      _vmax,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
@ -20478,7 +20528,7 @@ static const struct asm_opcode insns[] =
 NUF(vpadalq,   1b00600, 2, (RNQ,  RNQ),      neon_pair_long),
 NUF(vpaddl,    1b00200, 2, (RNDQ, RNDQ),     neon_pair_long),
 NUF(vpaddlq,   1b00200, 2, (RNQ,  RNQ),      neon_pair_long),
-  /* Reciprocal estimates. Types U32 F32.  */
+  /* Reciprocal estimates.  Types U32 F16 F32.  */
 NUF(vrecpe,    1b30400, 2, (RNDQ, RNDQ),     neon_recip_est),
 NUF(vrecpeq,   1b30400, 2, (RNQ,  RNQ),      neon_recip_est),
 NUF(vrsqrte,   1b30480, 2, (RNDQ, RNDQ),     neon_recip_est),
--- a/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d
@ -0,0 +1,147 @@
+#name: ARM v8.2 FP16 support on SIMD (Thumb)
+#source: armv8-2-fp16-simd.s
+#objdump: -d
+#as: -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8 -mthumb
+#skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+
+00000000 <func>:
+   0:	ff34 2d0e 	vabd.f16	d2, d4, d14
+   4:	ff38 4d6c 	vabd.f16	q2, q4, q14
+   8:	ef14 2f0e 	vmax.f16	d2, d4, d14
+   c:	ef18 4f6c 	vmax.f16	q2, q4, q14
+  10:	ef34 2f0e 	vmin.f16	d2, d4, d14
+  14:	ef38 4f6c 	vmin.f16	q2, q4, q14
+  18:	ff30 0dec 	vabd.f16	q0, q8, q14
+  1c:	ef10 0fec 	vmax.f16	q0, q8, q14
+  20:	ef30 0fec 	vmin.f16	q0, q8, q14
+  24:	ff33 1d0f 	vabd.f16	d1, d3, d15
+  28:	ff31 0d08 	vabd.f16	d0, d1, d8
+  2c:	ffb5 0708 	vabs.f16	d0, d8
+  30:	ffb5 0760 	vabs.f16	q0, q8
+  34:	ffb5 0788 	vneg.f16	d0, d8
+  38:	ffb5 07e0 	vneg.f16	q0, q8
+  3c:	ffb5 474c 	vabs.f16	q2, q6
+  40:	ffb5 47cc 	vneg.f16	q2, q6
+  44:	ffb5 7703 	vabs.f16	d7, d3
+  48:	ffb5 9781 	vneg.f16	d9, d1
+  4c:	ff14 2e1e 	vacge.f16	d2, d4, d14
+  50:	ff18 4e7c 	vacge.f16	q2, q4, q14
+  54:	ff34 2e1e 	vacgt.f16	d2, d4, d14
+  58:	ff38 4e7c 	vacgt.f16	q2, q4, q14
+  5c:	ff3e 2e14 	vacgt.f16	d2, d14, d4
+  60:	ff3c 4ed8 	vacgt.f16	q2, q14, q4
+  64:	ff1e 2e14 	vacge.f16	d2, d14, d4
+  68:	ff1c 4ed8 	vacge.f16	q2, q14, q4
+  6c:	ef14 2e0e 	vceq.f16	d2, d4, d14
+  70:	ef18 4e6c 	vceq.f16	q2, q4, q14
+  74:	ff14 2e0e 	vcge.f16	d2, d4, d14
+  78:	ff18 4e6c 	vcge.f16	q2, q4, q14
+  7c:	ff34 2e0e 	vcgt.f16	d2, d4, d14
+  80:	ff38 4e6c 	vcgt.f16	q2, q4, q14
+  84:	ff1e 2e04 	vcge.f16	d2, d14, d4
+  88:	ff1c 4ec8 	vcge.f16	q2, q14, q4
+  8c:	ff3e 2e04 	vcgt.f16	d2, d14, d4
+  90:	ff3c 4ec8 	vcgt.f16	q2, q14, q4
+  94:	ff10 0efc 	vacge.f16	q0, q8, q14
+  98:	ff30 0efc 	vacgt.f16	q0, q8, q14
+  9c:	ff3c 0ef0 	vacgt.f16	q0, q14, q8
+  a0:	ff1c 0ef0 	vacge.f16	q0, q14, q8
+  a4:	ef10 0eec 	vceq.f16	q0, q8, q14
+  a8:	ff10 0eec 	vcge.f16	q0, q8, q14
+  ac:	ff30 0eec 	vcgt.f16	q0, q8, q14
+  b0:	ff1c 0ee0 	vcge.f16	q0, q14, q8
+  b4:	ff3c 0ee0 	vcgt.f16	q0, q14, q8
+  b8:	ef14 2d0e 	vadd.f16	d2, d4, d14
+  bc:	ef18 4d6c 	vadd.f16	q2, q4, q14
+  c0:	ef34 2d0e 	vsub.f16	d2, d4, d14
+  c4:	ef38 4d6c 	vsub.f16	q2, q4, q14
+  c8:	ef10 0dec 	vadd.f16	q0, q8, q14
+  cc:	ef30 0dec 	vsub.f16	q0, q8, q14
+  d0:	ff14 2f1e 	vmaxnm.f16	d2, d4, d14
+  d4:	ff18 4f7c 	vmaxnm.f16	q2, q4, q14
+  d8:	ff34 2f1e 	vminnm.f16	d2, d4, d14
+  dc:	ff38 4f7c 	vminnm.f16	q2, q4, q14
+  e0:	ef14 2c1e 	vfma.f16	d2, d4, d14
+  e4:	ef18 4c7c 	vfma.f16	q2, q4, q14
+  e8:	ef34 2c1e 	vfms.f16	d2, d4, d14
+  ec:	ef38 4c7c 	vfms.f16	q2, q4, q14
+  f0:	ef14 2d1e 	vmla.f16	d2, d4, d14
+  f4:	ef18 4d7c 	vmla.f16	q2, q4, q14
+  f8:	ef34 2d1e 	vmls.f16	d2, d4, d14
+  fc:	ef38 4d7c 	vmls.f16	q2, q4, q14
+ 100:	ffb6 458e 	vrintz.f16	d4, d14
+ 104:	ffb6 85ec 	vrintz.f16	q4, q14
+ 108:	ffb6 448e 	vrintx.f16	d4, d14
+ 10c:	ffb6 84ec 	vrintx.f16	q4, q14
+ 110:	ffb6 450e 	vrinta.f16	d4, d14
+ 114:	ffb6 856c 	vrinta.f16	q4, q14
+ 118:	ffb6 440e 	vrintn.f16	d4, d14
+ 11c:	ffb6 846c 	vrintn.f16	q4, q14
+ 120:	ffb6 478e 	vrintp.f16	d4, d14
+ 124:	ffb6 87ec 	vrintp.f16	q4, q14
+ 128:	ffb6 468e 	vrintm.f16	d4, d14
+ 12c:	ffb6 86ec 	vrintm.f16	q4, q14
+ 130:	ff18 4d0e 	vpadd.f16	d4, d8, d14
+ 134:	ffb7 4508 	vrecpe.f16	d4, d8
+ 138:	ffb7 8560 	vrecpe.f16	q4, q8
+ 13c:	ffb7 4588 	vrsqrte.f16	d4, d8
+ 140:	ffb7 85e0 	vrsqrte.f16	q4, q8
+ 144:	ffb7 0564 	vrecpe.f16	q0, q10
+ 148:	ffb7 05e4 	vrsqrte.f16	q0, q10
+ 14c:	ef1a 8f1c 	vrecps.f16	d8, d10, d12
+ 150:	ef54 0ff8 	vrecps.f16	q8, q10, q12
+ 154:	ef3a 8f1c 	vrsqrts.f16	d8, d10, d12
+ 158:	ef74 0ff8 	vrsqrts.f16	q8, q10, q12
+ 15c:	ef10 4f58 	vrecps.f16	q2, q0, q4
+ 160:	ef30 4f58 	vrsqrts.f16	q2, q0, q4
+ 164:	ff18 4f0e 	vpmax.f16	d4, d8, d14
+ 168:	ff38 af02 	vpmin.f16	d10, d8, d2
+ 16c:	ff18 4d1e 	vmul.f16	d4, d8, d14
+ 170:	ff10 7d11 	vmul.f16	d7, d0, d1
+ 174:	ff10 4dd0 	vmul.f16	q2, q8, q0
+ 178:	ffb7 600c 	vcvta.s16.f16	d6, d12
+ 17c:	ffb7 c068 	vcvta.s16.f16	q6, q12
+ 180:	ffb7 630c 	vcvtm.s16.f16	d6, d12
+ 184:	ffb7 c368 	vcvtm.s16.f16	q6, q12
+ 188:	ffb7 610c 	vcvtn.s16.f16	d6, d12
+ 18c:	ffb7 c168 	vcvtn.s16.f16	q6, q12
+ 190:	ffb7 620c 	vcvtp.s16.f16	d6, d12
+ 194:	ffb7 c268 	vcvtp.s16.f16	q6, q12
+ 198:	ffb7 608c 	vcvta.u16.f16	d6, d12
+ 19c:	ffb7 c0e8 	vcvta.u16.f16	q6, q12
+ 1a0:	ffb7 638c 	vcvtm.u16.f16	d6, d12
+ 1a4:	ffb7 c3e8 	vcvtm.u16.f16	q6, q12
+ 1a8:	ffb7 618c 	vcvtn.u16.f16	d6, d12
+ 1ac:	ffb7 c1e8 	vcvtn.u16.f16	q6, q12
+ 1b0:	ffb7 628c 	vcvtp.u16.f16	d6, d12
+ 1b4:	ffb7 c2e8 	vcvtp.u16.f16	q6, q12
+ 1b8:	ffb7 e700 	vcvt.s16.f16	d14, d0
+ 1bc:	fff7 c740 	vcvt.s16.f16	q14, q0
+ 1c0:	ffb7 e780 	vcvt.u16.f16	d14, d0
+ 1c4:	fff7 c7c0 	vcvt.u16.f16	q14, q0
+ 1c8:	ffb7 e600 	vcvt.f16.s16	d14, d0
+ 1cc:	fff7 c640 	vcvt.f16.s16	q14, q0
+ 1d0:	ffb7 e680 	vcvt.f16.u16	d14, d0
+ 1d4:	fff7 c6c0 	vcvt.f16.u16	q14, q0
+ 1d8:	efbd ed10 	vcvt.s16.f16	d14, d0, #3
+ 1dc:	effd cd50 	vcvt.s16.f16	q14, q0, #3
+ 1e0:	ffbd ed10 	vcvt.u16.f16	d14, d0, #3
+ 1e4:	fffd cd50 	vcvt.u16.f16	q14, q0, #3
+ 1e8:	efbd ec10 	vcvt.f16.s16	d14, d0, #3
+ 1ec:	effd cc50 	vcvt.f16.s16	q14, q0, #3
+ 1f0:	ffbd ec10 	vcvt.f16.u16	d14, d0, #3
+ 1f4:	fffd cc50 	vcvt.f16.u16	q14, q0, #3
+ 1f8:	ffb5 e502 	vceq.f16	d14, d2, #0
+ 1fc:	fff5 c544 	vceq.f16	q14, q2, #0
+ 200:	ffb5 e482 	vcge.f16	d14, d2, #0
+ 204:	fff5 c4c4 	vcge.f16	q14, q2, #0
+ 208:	ffb5 e402 	vcgt.f16	d14, d2, #0
+ 20c:	fff5 c444 	vcgt.f16	q14, q2, #0
+ 210:	ffb5 e582 	vcle.f16	d14, d2, #0
+ 214:	fff5 c5c4 	vcle.f16	q14, q2, #0
+ 218:	ffb5 e602 	vclt.f16	d14, d2, #0
+ 21c:	fff5 c644 	vclt.f16	q14, q2, #0
--- a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d
@ -0,0 +1,4 @@
+#name: Reject ARM v8.2 FP16 SIMD instruction for early arch (Thumb)
+#source: armv8-2-fp16-simd.s
+#as: -march=armv8.2-a -mfpu=neon-fp-armv8 -mthumb
+#error-output: armv8-2-fp16-simd-warning.l
--- a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d
@ -0,0 +1,4 @@
+#name: Reject ARM v8.2 FP16 SIMD instruction for early arch
+#source: armv8-2-fp16-simd.s
+#as: -march=armv8.2-a -mfpu=neon-fp-armv8
+#error-output: armv8-2-fp16-simd-warning.l
--- a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l
@ -0,0 +1,137 @@
+[^:]*: Assembler messages:
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vabd.f16 d2,d4,d14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vabd.f16 q2,q4,q14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmax.f16 d2,d4,d14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmax.f16 q2,q4,q14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmin.f16 d2,d4,d14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmin.f16 q2,q4,q14'
+[^:]*:164: Error: selected processor does not support fp16 instruction -- `vabdq.f16 q0,q8,q14'
+[^:]*:164: Error: selected processor does not support fp16 instruction -- `vmaxq.f16 q0,q8,q14'
+[^:]*:164: Error: selected processor does not support fp16 instruction -- `vminq.f16 q0,q8,q14'
+[^:]*:165: Error: selected processor does not support fp16 instruction -- `vabd.f16 d1,d3,d15'
+[^:]*:166: Error: selected processor does not support fp16 instruction -- `vabd.f16 d0,d1,d8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vabs.f16 d0,d8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vabs.f16 q0,q8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vneg.f16 d0,d8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vneg.f16 q0,q8'
+[^:]*:170: Error: selected processor does not support fp16 instruction -- `vabsq.f16 q2,q6'
+[^:]*:170: Error: selected processor does not support fp16 instruction -- `vnegq.f16 q2,q6'
+[^:]*:171: Error: selected processor does not support fp16 instruction -- `vabs.f16 d7,d3'
+[^:]*:172: Error: selected processor does not support fp16 instruction -- `vneg.f16 d9,d1'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacge.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacge.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacgt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacgt.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vaclt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vaclt.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacle.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacle.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vceq.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vceq.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcge.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcge.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcgt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcgt.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcle.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcle.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vclt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vclt.f16 q2,q4,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacgeq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacgtq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacltq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacleq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vceqq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcgeq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcgtq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcleq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcltq.f16 q0,q8,q14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vadd.f16 d2,d4,d14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vadd.f16 q2,q4,q14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vsub.f16 d2,d4,d14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vsub.f16 q2,q4,q14'
+[^:]*:180: Error: selected processor does not support fp16 instruction -- `vaddq.f16 q0,q8,q14'
+[^:]*:180: Error: selected processor does not support fp16 instruction -- `vsubq.f16 q0,q8,q14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vmaxnm.f16 d2,d4,d14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vmaxnm.f16 q2,q4,q14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vminnm.f16 d2,d4,d14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vminnm.f16 q2,q4,q14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfma.f16 d2,d4,d14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfma.f16 q2,q4,q14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfms.f16 d2,d4,d14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfms.f16 q2,q4,q14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmla.f16 d2,d4,d14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmla.f16 q2,q4,q14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmls.f16 d2,d4,d14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmls.f16 q2,q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintz.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintz.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintx.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintx.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrinta.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrinta.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintn.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintn.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintp.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintp.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintm.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintm.f16 q4,q14'
+[^:]*:195: Error: selected processor does not support fp16 instruction -- `vpadd.f16 d4,d8,d14'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrecpe.f16 d4,d8'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrecpe.f16 q4,q8'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrsqrte.f16 d4,d8'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrsqrte.f16 q4,q8'
+[^:]*:199: Error: selected processor does not support fp16 instruction -- `vrecpeq.f16 q0,q10'
+[^:]*:199: Error: selected processor does not support fp16 instruction -- `vrsqrteq.f16 q0,q10'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrecps.f16 d8,d10,d12'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrecps.f16 q8,q10,q12'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrsqrts.f16 d8,d10,d12'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrsqrts.f16 q8,q10,q12'
+[^:]*:203: Error: selected processor does not support fp16 instruction -- `vrecpsq.f16 q2,q0,q4'
+[^:]*:203: Error: selected processor does not support fp16 instruction -- `vrsqrtsq.f16 q2,q0,q4'
+[^:]*:206: Error: selected processor does not support fp16 instruction -- `vpmax.f16 d4,d8,d14'
+[^:]*:207: Error: selected processor does not support fp16 instruction -- `vpmin.f16 d10,d8,d2'
+[^:]*:210: Error: selected processor does not support fp16 instruction -- `vmul.f16 d4,d8,d14'
+[^:]*:211: Error: selected processor does not support fp16 instruction -- `vmul.f16 d7,d0,d1'
+[^:]*:212: Error: selected processor does not support fp16 instruction -- `vmul.f16 q2,q8,q0'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.u16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.u16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.u16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.u16.f16 q6,q12'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 q14,q0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 q14,q0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 q14,q0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 q14,q0'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 q14,q0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 q14,q0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 q14,q0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 q14,q0,#3'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vceq.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vceq.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcge.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcge.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcgt.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcgt.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcle.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcle.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vclt.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vclt.f16 q14,q2,#0'
--- a/gas/testsuite/gas/arm/armv8-2-fp16-simd.d
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd.d
@ -0,0 +1,147 @@
+#name: ARM v8.2 FP16 support on SIMD
+#source: armv8-2-fp16-simd.s
+#objdump: -d
+#as: -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8
+#skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+
+00000000 <func>:
+   0:	f3342d0e 	vabd.f16	d2, d4, d14
+   4:	f3384d6c 	vabd.f16	q2, q4, q14
+   8:	f2142f0e 	vmax.f16	d2, d4, d14
+   c:	f2184f6c 	vmax.f16	q2, q4, q14
+  10:	f2342f0e 	vmin.f16	d2, d4, d14
+  14:	f2384f6c 	vmin.f16	q2, q4, q14
+  18:	f3300dec 	vabd.f16	q0, q8, q14
+  1c:	f2100fec 	vmax.f16	q0, q8, q14
+  20:	f2300fec 	vmin.f16	q0, q8, q14
+  24:	f3331d0f 	vabd.f16	d1, d3, d15
+  28:	f3310d08 	vabd.f16	d0, d1, d8
+  2c:	f3b50708 	vabs.f16	d0, d8
+  30:	f3b50760 	vabs.f16	q0, q8
+  34:	f3b50788 	vneg.f16	d0, d8
+  38:	f3b507e0 	vneg.f16	q0, q8
+  3c:	f3b5474c 	vabs.f16	q2, q6
+  40:	f3b547cc 	vneg.f16	q2, q6
+  44:	f3b57703 	vabs.f16	d7, d3
+  48:	f3b59781 	vneg.f16	d9, d1
+  4c:	f3142e1e 	vacge.f16	d2, d4, d14
+  50:	f3184e7c 	vacge.f16	q2, q4, q14
+  54:	f3342e1e 	vacgt.f16	d2, d4, d14
+  58:	f3384e7c 	vacgt.f16	q2, q4, q14
+  5c:	f33e2e14 	vacgt.f16	d2, d14, d4
+  60:	f33c4ed8 	vacgt.f16	q2, q14, q4
+  64:	f31e2e14 	vacge.f16	d2, d14, d4
+  68:	f31c4ed8 	vacge.f16	q2, q14, q4
+  6c:	f2142e0e 	vceq.f16	d2, d4, d14
+  70:	f2184e6c 	vceq.f16	q2, q4, q14
+  74:	f3142e0e 	vcge.f16	d2, d4, d14
+  78:	f3184e6c 	vcge.f16	q2, q4, q14
+  7c:	f3342e0e 	vcgt.f16	d2, d4, d14
+  80:	f3384e6c 	vcgt.f16	q2, q4, q14
+  84:	f31e2e04 	vcge.f16	d2, d14, d4
+  88:	f31c4ec8 	vcge.f16	q2, q14, q4
+  8c:	f33e2e04 	vcgt.f16	d2, d14, d4
+  90:	f33c4ec8 	vcgt.f16	q2, q14, q4
+  94:	f3100efc 	vacge.f16	q0, q8, q14
+  98:	f3300efc 	vacgt.f16	q0, q8, q14
+  9c:	f33c0ef0 	vacgt.f16	q0, q14, q8
+  a0:	f31c0ef0 	vacge.f16	q0, q14, q8
+  a4:	f2100eec 	vceq.f16	q0, q8, q14
+  a8:	f3100eec 	vcge.f16	q0, q8, q14
+  ac:	f3300eec 	vcgt.f16	q0, q8, q14
+  b0:	f31c0ee0 	vcge.f16	q0, q14, q8
+  b4:	f33c0ee0 	vcgt.f16	q0, q14, q8
+  b8:	f2142d0e 	vadd.f16	d2, d4, d14
+  bc:	f2184d6c 	vadd.f16	q2, q4, q14
+  c0:	f2342d0e 	vsub.f16	d2, d4, d14
+  c4:	f2384d6c 	vsub.f16	q2, q4, q14
+  c8:	f2100dec 	vadd.f16	q0, q8, q14
+  cc:	f2300dec 	vsub.f16	q0, q8, q14
+  d0:	f3142f1e 	vmaxnm.f16	d2, d4, d14
+  d4:	f3184f7c 	vmaxnm.f16	q2, q4, q14
+  d8:	f3342f1e 	vminnm.f16	d2, d4, d14
+  dc:	f3384f7c 	vminnm.f16	q2, q4, q14
+  e0:	f2142c1e 	vfma.f16	d2, d4, d14
+  e4:	f2184c7c 	vfma.f16	q2, q4, q14
+  e8:	f2342c1e 	vfms.f16	d2, d4, d14
+  ec:	f2384c7c 	vfms.f16	q2, q4, q14
+  f0:	f2142d1e 	vmla.f16	d2, d4, d14
+  f4:	f2184d7c 	vmla.f16	q2, q4, q14
+  f8:	f2342d1e 	vmls.f16	d2, d4, d14
+  fc:	f2384d7c 	vmls.f16	q2, q4, q14
+ 100:	f3b6458e 	vrintz.f16	d4, d14
+ 104:	f3b685ec 	vrintz.f16	q4, q14
+ 108:	f3b6448e 	vrintx.f16	d4, d14
+ 10c:	f3b684ec 	vrintx.f16	q4, q14
+ 110:	f3b6450e 	vrinta.f16	d4, d14
+ 114:	f3b6856c 	vrinta.f16	q4, q14
+ 118:	f3b6440e 	vrintn.f16	d4, d14
+ 11c:	f3b6846c 	vrintn.f16	q4, q14
+ 120:	f3b6478e 	vrintp.f16	d4, d14
+ 124:	f3b687ec 	vrintp.f16	q4, q14
+ 128:	f3b6468e 	vrintm.f16	d4, d14
+ 12c:	f3b686ec 	vrintm.f16	q4, q14
+ 130:	f3184d0e 	vpadd.f16	d4, d8, d14
+ 134:	f3b74508 	vrecpe.f16	d4, d8
+ 138:	f3b78560 	vrecpe.f16	q4, q8
+ 13c:	f3b74588 	vrsqrte.f16	d4, d8
+ 140:	f3b785e0 	vrsqrte.f16	q4, q8
+ 144:	f3b70564 	vrecpe.f16	q0, q10
+ 148:	f3b705e4 	vrsqrte.f16	q0, q10
+ 14c:	f21a8f1c 	vrecps.f16	d8, d10, d12
+ 150:	f2540ff8 	vrecps.f16	q8, q10, q12
+ 154:	f23a8f1c 	vrsqrts.f16	d8, d10, d12
+ 158:	f2740ff8 	vrsqrts.f16	q8, q10, q12
+ 15c:	f2104f58 	vrecps.f16	q2, q0, q4
+ 160:	f2304f58 	vrsqrts.f16	q2, q0, q4
+ 164:	f3184f0e 	vpmax.f16	d4, d8, d14
+ 168:	f338af02 	vpmin.f16	d10, d8, d2
+ 16c:	f3184d1e 	vmul.f16	d4, d8, d14
+ 170:	f3107d11 	vmul.f16	d7, d0, d1
+ 174:	f3104dd0 	vmul.f16	q2, q8, q0
+ 178:	f3b7600c 	vcvta.s16.f16	d6, d12
+ 17c:	f3b7c068 	vcvta.s16.f16	q6, q12
+ 180:	f3b7630c 	vcvtm.s16.f16	d6, d12
+ 184:	f3b7c368 	vcvtm.s16.f16	q6, q12
+ 188:	f3b7610c 	vcvtn.s16.f16	d6, d12
+ 18c:	f3b7c168 	vcvtn.s16.f16	q6, q12
+ 190:	f3b7620c 	vcvtp.s16.f16	d6, d12
+ 194:	f3b7c268 	vcvtp.s16.f16	q6, q12
+ 198:	f3b7608c 	vcvta.u16.f16	d6, d12
+ 19c:	f3b7c0e8 	vcvta.u16.f16	q6, q12
+ 1a0:	f3b7638c 	vcvtm.u16.f16	d6, d12
+ 1a4:	f3b7c3e8 	vcvtm.u16.f16	q6, q12
+ 1a8:	f3b7618c 	vcvtn.u16.f16	d6, d12
+ 1ac:	f3b7c1e8 	vcvtn.u16.f16	q6, q12
+ 1b0:	f3b7628c 	vcvtp.u16.f16	d6, d12
+ 1b4:	f3b7c2e8 	vcvtp.u16.f16	q6, q12
+ 1b8:	f3b7e700 	vcvt.s16.f16	d14, d0
+ 1bc:	f3f7c740 	vcvt.s16.f16	q14, q0
+ 1c0:	f3b7e780 	vcvt.u16.f16	d14, d0
+ 1c4:	f3f7c7c0 	vcvt.u16.f16	q14, q0
+ 1c8:	f3b7e600 	vcvt.f16.s16	d14, d0
+ 1cc:	f3f7c640 	vcvt.f16.s16	q14, q0
+ 1d0:	f3b7e680 	vcvt.f16.u16	d14, d0
+ 1d4:	f3f7c6c0 	vcvt.f16.u16	q14, q0
+ 1d8:	f2bded10 	vcvt.s16.f16	d14, d0, #3
+ 1dc:	f2fdcd50 	vcvt.s16.f16	q14, q0, #3
+ 1e0:	f3bded10 	vcvt.u16.f16	d14, d0, #3
+ 1e4:	f3fdcd50 	vcvt.u16.f16	q14, q0, #3
+ 1e8:	f2bdec10 	vcvt.f16.s16	d14, d0, #3
+ 1ec:	f2fdcc50 	vcvt.f16.s16	q14, q0, #3
+ 1f0:	f3bdec10 	vcvt.f16.u16	d14, d0, #3
+ 1f4:	f3fdcc50 	vcvt.f16.u16	q14, q0, #3
+ 1f8:	f3b5e502 	vceq.f16	d14, d2, #0
+ 1fc:	f3f5c544 	vceq.f16	q14, q2, #0
+ 200:	f3b5e482 	vcge.f16	d14, d2, #0
+ 204:	f3f5c4c4 	vcge.f16	q14, q2, #0
+ 208:	f3b5e402 	vcgt.f16	d14, d2, #0
+ 20c:	f3f5c444 	vcgt.f16	q14, q2, #0
+ 210:	f3b5e582 	vcle.f16	d14, d2, #0
+ 214:	f3f5c5c4 	vcle.f16	q14, q2, #0
+ 218:	f3b5e602 	vclt.f16	d14, d2, #0
+ 21c:	f3f5c644 	vclt.f16	q14, q2, #0
--- a/gas/testsuite/gas/arm/armv8-2-fp16-simd.s
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd.s
@ -0,0 +1,224 @@
+	.macro f16_dq_ifsu reg0 reg1 reg2
+	.irp op, vabd.f16, vmax.f16, vmin.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_q_ifsu reg0 reg1 reg2
+	.irp op, vabdq.f16, vmaxq.f16, vminq.f16
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_abs_neg reg0 reg1
+	.irp op, vabs.f16, vneg.f16
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_q_abs_neg reg0 reg1
+	.irp op, vabsq.f16, vnegq.f16
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_dq_fcmp reg0 reg1 reg2
+	.irp op, vacge.f16, vacgt.f16, vaclt.f16, vacle.f16, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_fcmp_imm0 reg0 reg1
+	.irp op, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16
+		\op d\reg0, d\reg1, #0
+		\op q\reg0, q\reg1, #0
+	.endr
+	.endm
+
+	.macro f16_q_fcmp reg0 reg1 reg2
+	.irp op, vacgeq.f16, vacgtq.f16, vacltq.f16, vacleq.f16, vceqq.f16, vcgeq.f16, vcgtq.f16, vcleq.f16, vcltq.f16
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_addsub reg0 reg1 reg2
+	.irp op, vadd.f16, vsub.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_q_addsub reg0 reg1 reg2
+	.irp op, vaddq.f16, vsubq.f16
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_vmaxnm reg0 reg1 reg2
+	.irp op, vmaxnm.f16, vminnm.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_fmac reg0 reg1 reg2
+	.irp op, vfma.f16, vfms.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_fmacmaybe reg0 reg1 reg2
+	.irp op, vmla.f16, vmls.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_vrint reg0 reg1
+	.irp op, vrintz.f16, vrintx.f16, vrinta.f16, vrintn.f16, vrintp.f16, vrintm.f16
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_dq_recip reg0 reg1
+	.irp op, vrecpe.f16, vrsqrte.f16
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_q_recip reg0 reg1
+	.irp op, vrecpeq.f16, vrsqrteq.f16
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_dq_step reg0 reg1 reg2
+	.irp op, vrecps.f16, vrsqrts.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_q_step reg0 reg1 reg2
+	.irp op, vrecpsq.f16, vrsqrtsq.f16
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_cvt reg0 reg1
+	.irp op, vcvta.s16.f16, vcvtm.s16.f16, vcvtn.s16.f16, vcvtp.s16.f16, vcvta.u16.f16, vcvtm.u16.f16, vcvtn.u16.f16, vcvtp.u16.f16,
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_dq_cvtz reg0 reg1
+	.irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16,
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_dq_cvtz_fixed reg0 reg1 imm
+	.irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16,
+		\op d\reg0, d\reg1, #\imm
+		\op q\reg0, q\reg1, #\imm
+	.endr
+	.endm
+
+	.macro f16_dq op reg0 reg1 reg2
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endm
+
+	.macro f16_d op reg0 reg1 reg2
+		\op d\reg0, d\reg1, d\reg2
+	.endm
+
+	.macro f16_q op reg0 reg1 reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endm
+
+	.macro f16_dq_2 op reg0 reg1
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endm
+
+	.macro f16_d_2 op reg0 reg1
+		\op d\reg0, d\reg1
+	.endm
+
+	.macro f16_q_2 op reg0 reg1
+		\op q\reg0, q\reg1
+	.endm
+
+func:
+	# neon_dyadic_if_su
+	f16_dq_ifsu 2 4 14
+	f16_q_ifsu 0 8 14
+	f16_d  vabd.f16 1 3 15
+	f16_d  vabd.f16 0 1 8
+
+	# neon_abs_neg
+	f16_dq_abs_neg 0 8
+	f16_q_abs_neg 2 6
+	f16_d_2  vabs.f16 7 3
+	f16_d_2  vneg.f16 9 1
+
+	# neon_fcmp
+	f16_dq_fcmp 2 4 14
+	f16_q_fcmp 0 8 14
+
+	# neon_addsub_if_i
+	f16_dq_addsub 2 4 14
+	f16_q_addsub 0 8 14
+
+	# neon_vmaxnm
+	f16_dq_vmaxnm 2 4 14
+
+	# neon_fmac
+	f16_dq_fmac 2 4 14
+
+	# neon_mac_maybe_scalar
+	f16_dq_fmacmaybe 2 4 14
+
+	# vrint
+	f16_dq_vrint 4 14
+
+	# neon_dyadic_if_i_d
+	f16_d vpadd.f16 4 8 14
+
+	# neon_recip_est
+	f16_dq_recip 4 8
+	f16_q_recip 0 10
+
+	# neon_step
+	f16_dq_step 8 10 12
+	f16_q_step 2 0 4
+
+	# neon_dyadic_if_su_d
+	f16_d vpmax.f16 4 8 14
+	f16_d vpmin.f16 10 8 2
+
+	# neon_mul
+	f16_d vmul.f16 4 8 14
+	f16_d vmul.f16 7 0 1
+	f16_q vmul.f16 2 8 0
+
+	# neon_cvt
+	f16_dq_cvt 6 12
+
+	# neon_cvtz
+	f16_dq_cvtz 14, 0
+
+	# neon_cvtz_fixed
+	f16_dq_cvtz_fixed 14, 0, 3
+
+	# neon_fcmp_imm0
+	f16_dq_fcmp_imm0 14, 2
--- a/opcodes/ChangeLog
+++ b/opcodes/ChangeLog
@ -1,3 +1,7 @@
+2016-03-16  Jiong Wang  <jiong.wang@arm.com>
+
+	* arm-dis.c (neon_opcodes): Support new FP16 instructions.
+
 2016-03-07  Trevor Saunders  <tbsaunde+binutils@tbsaunde.org>

 	* mcore-opc.h: Add const qualifiers.
--- a/opcodes/arm-dis.c
+++ b/opcodes/arm-dis.c
@ -1032,15 +1032,23 @@ static const struct opcode32 neon_opcodes[] =

  /* NEON fused multiply add instructions.  */
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_FMA),
-    0xf2000c10, 0xffa00f10, "vfma%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000c10, 0xffb00f10, "vfma%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100c10, 0xffb00f10, "vfma%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_FMA),
-    0xf2200c10, 0xffa00f10, "vfms%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2200c10, 0xffb00f10, "vfms%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2300c10, 0xffb00f10, "vfms%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},

  /* Two registers, miscellaneous.  */
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
    0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3b60400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f16\t%12-15,22R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
    0xf3bb0000, 0xffbf0c10, "vcvt%8-9?mpna%u.%7?us32.f32\t%12-15,22R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3b70000, 0xffbf0c10, "vcvt%8-9?mpna%u.%7?us16.f16\t%12-15,22R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
    0xf3b00300, 0xffbf0fd0, "aese%u.8\t%12-15,22Q, %0-3,5Q"},
  {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
@ -1080,8 +1088,12 @@ static const struct opcode32 neon_opcodes[] =
    "vshll%c.i%18-19S2\t%12-15,22Q, %0-3,5D, #%18-19S2"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
    0xf3bb0400, 0xffbf0e90, "vrecpe%c.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3b70400, 0xffbf0e90, "vrecpe%c.%8?fu16\t%12-15,22R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
    0xf3bb0480, 0xffbf0e90, "vrsqrte%c.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3b70480, 0xffbf0e90, "vrsqrte%c.%8?fu16\t%12-15,22R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
    0xf3b00000, 0xffb30f90, "vrev64%c.%18-19S2\t%12-15,22R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
@ -1121,8 +1133,11 @@ static const struct opcode32 neon_opcodes[] =
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
    0xf3b00600, 0xffb30f10, "vpadal%c.%7?us%18-19S2\t%12-15,22R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3b30600, 0xffb30e10,
+    0xf3bb0600, 0xffbf0e10,
    "vcvt%c.%7-8?usff%18-19Sa.%7-8?ffus%18-19Sa\t%12-15,22R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3b70600, 0xffbf0e10,
+    "vcvt%c.%7-8?usff16.%7-8?ffus16\t%12-15,22R, %0-3,5R"},

  /* Three registers of the same length.  */
  {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
@ -1140,9 +1155,13 @@ static const struct opcode32 neon_opcodes[] =
  {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
    0xf3200c40, 0xffb00f50, "sha256su1%u.32\t%12-15,22Q, %16-19,7Q, %0-3,5Q"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
-    0xf3000f10, 0xffa00f10, "vmaxnm%u.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000f10, 0xffb00f10, "vmaxnm%u.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100f10, 0xffb00f10, "vmaxnm%u.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
-    0xf3200f10, 0xffa00f10, "vminnm%u.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3200f10, 0xffb00f10, "vminnm%u.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3300f10, 0xffb00f10, "vminnm%u.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
    0xf2000110, 0xffb00f10, "vand%c\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
@ -1160,41 +1179,77 @@ static const struct opcode32 neon_opcodes[] =
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
    0xf3300110, 0xffb00f10, "vbif%c\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2000d00, 0xffa00f10, "vadd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000d00, 0xffb00f10, "vadd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100d00, 0xffb00f10, "vadd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2000d10, 0xffa00f10, "vmla%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000d10, 0xffb00f10, "vmla%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100d10, 0xffb00f10, "vmla%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2000e00, 0xffa00f10, "vceq%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000e00, 0xffb00f10, "vceq%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100e00, 0xffb00f10, "vceq%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2000f00, 0xffa00f10, "vmax%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000f00, 0xffb00f10, "vmax%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100f00, 0xffb00f10, "vmax%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2000f10, 0xffa00f10, "vrecps%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000f10, 0xffb00f10, "vrecps%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100f10, 0xffb00f10, "vrecps%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2200d00, 0xffa00f10, "vsub%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2200d00, 0xffb00f10, "vsub%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2300d00, 0xffb00f10, "vsub%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2200d10, 0xffa00f10, "vmls%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2200d10, 0xffb00f10, "vmls%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2300d10, 0xffb00f10, "vmls%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2200f00, 0xffa00f10, "vmin%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2200f00, 0xffb00f10, "vmin%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2300f00, 0xffb00f10, "vmin%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2200f10, 0xffa00f10, "vrsqrts%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2200f10, 0xffb00f10, "vrsqrts%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2300f10, 0xffb00f10, "vrsqrts%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3000d00, 0xffa00f10, "vpadd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000d00, 0xffb00f10, "vpadd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100d00, 0xffb00f10, "vpadd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3000d10, 0xffa00f10, "vmul%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000d10, 0xffb00f10, "vmul%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100d10, 0xffb00f10, "vmul%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3000e00, 0xffa00f10, "vcge%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000e00, 0xffb00f10, "vcge%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100e00, 0xffb00f10, "vcge%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3000e10, 0xffa00f10, "vacge%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000e10, 0xffb00f10, "vacge%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100e10, 0xffb00f10, "vacge%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3000f00, 0xffa00f10, "vpmax%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000f00, 0xffb00f10, "vpmax%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100f00, 0xffb00f10, "vpmax%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3200d00, 0xffa00f10, "vabd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3200d00, 0xffb00f10, "vabd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3300d00, 0xffb00f10, "vabd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3200e00, 0xffa00f10, "vcgt%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3200e00, 0xffb00f10, "vcgt%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3300e00, 0xffb00f10, "vcgt%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3200e10, 0xffa00f10, "vacgt%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3200e10, 0xffb00f10, "vacgt%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3300e10, 0xffb00f10, "vacgt%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3200f00, 0xffa00f10, "vpmin%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3200f00, 0xffb00f10, "vpmin%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3300f00, 0xffb00f10, "vpmin%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
    0xf2000800, 0xff800f10, "vadd%c.i%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
@ -1426,6 +1481,9 @@ static const struct opcode32 neon_opcodes[] =
  {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
    0xf2a00e10, 0xfea00e90,
    "vcvt%c.%24,8?usff32.%24,8?ffus32\t%12-15,22R, %0-3,5R, #%16-20e"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2a00c10, 0xfea00e90,
+    "vcvt%c.%24,8?usff16.%24,8?ffus16\t%12-15,22R, %0-3,5R, #%16-20e"},

  /* Three registers of different lengths.  */
  {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),