[binutils][arm] BFloat16 enablement [4/X]
Hi, This patch is part of a series that adds support for Armv8.6-A (Matrix Multiply and BFloat16 extensions) to binutils. This patch introduces BFloat16 instructions to the arm backend. The following BFloat16 instructions are added: vdot, vfma{l/t}, vmmla, vfmal{t/b}, vcvt, vcvt{t/b}. gas/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> 2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com> * config/tc-arm.c (arm_archs): Add armv8.6-a option. (cpu_arch_ver): Add TAG_CPU_ARCH_V8 tag for Armv8.6-a. * doc/c-arm.texi (-march): New armv8.6-a arch. * config/tc-arm.c (arm_ext_bf16): New feature set. (enum neon_el_type): Add NT_bfloat value. (B_MNEM_vfmat, B_MNEM_vfmab): New bfloat16 encoder helpers. (BAD_BF16): New message. (parse_neon_type): Add bf16 type specifier. (enum neon_type_mask): Add N_BF16 type. (type_chk_of_el_type): Account for NT_bfloat. (el_type_of_type_chk): Account for N_BF16. (neon_three_args): Split out from neon_three_same. (neon_three_same): Part split out into neon_three_args. (CVT_FLAVOUR_VAR): Add bf16_f32 cvt flavour. (do_neon_cvt_1): Account for vcvt.bf16.f32. (do_bfloat_vmla): New. (do_mve_vfma): New function to deal with the mnemonic clash between the BF16 vfmat and the MVE vfma in a VPT block with a 't'rue condition. (do_neon_cvttb_1): Account for vcvt{t,b}.bf16.f32. (do_vdot): New (do_vmmla): New (insns): Add vdot and vmmla mnemonics. (arm_extensions): Add "bf16" extension. * doc/c-arm.texi: Document "bf16" extension. * testsuite/gas/arm/attr-march-armv8_6-a.d: New test. * testsuite/gas/arm/bfloat16-bad.d: New test. * testsuite/gas/arm/bfloat16-bad.l: New test. * testsuite/gas/arm/bfloat16-bad.s: New test. * testsuite/gas/arm/bfloat16-cmdline-bad-2.d: New test. * testsuite/gas/arm/bfloat16-cmdline-bad-3.d: New test. * testsuite/gas/arm/bfloat16-cmdline-bad.d: New test. * testsuite/gas/arm/bfloat16-neon.s: New test. * testsuite/gas/arm/bfloat16-non-neon.s: New test. * testsuite/gas/arm/bfloat16-thumb-bad.d: New test. * testsuite/gas/arm/bfloat16-thumb-bad.l: New test. * testsuite/gas/arm/bfloat16-thumb.d: New test. * testsuite/gas/arm/bfloat16-vfp.d: New test. * testsuite/gas/arm/bfloat16.d: New test. * testsuite/gas/arm/bfloat16.s: New test. include/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> 2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com> * opcode/arm.h (ARM_EXT2_V8_6A, ARM_AEXT2_V8_6A, ARM_ARCH_V8_6A): New. * opcode/arm.h (ARM_EXT2_BF16): New feature macro. (ARM_AEXT2_V8_6A): Include above macro in definition. opcodes/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> 2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com> * arm-dis.c (select_arm_features): Update bfd_march_arm_8 with Armv8.6-A. (coprocessor_opcodes): Add bfloat16 vcvt{t,b}. (neon_opcodes): Add bfloat SIMD instructions. (print_insn_coprocessor): Add new control character %b to print condition code without checking cp_num. (print_insn_neon): Account for BFloat16 instructions that have no special top-byte handling. Regression tested on arm-none-eabi. Is it ok for trunk? Regards, Mihail
This commit is contained in:
parent
33593eafc9
commit
aab2c27d9f
|
@ -1,3 +1,47 @@
|
|||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
|
||||
|
||||
* config/tc-arm.c (arm_archs): Add armv8.6-a option.
|
||||
(cpu_arch_ver): Add TAG_CPU_ARCH_V8 tag for Armv8.6-a.
|
||||
* doc/c-arm.texi (-march): New armv8.6-a arch.
|
||||
* config/tc-arm.c (arm_ext_bf16): New feature set.
|
||||
(enum neon_el_type): Add NT_bfloat value.
|
||||
(B_MNEM_vfmat, B_MNEM_vfmab): New bfloat16 encoder
|
||||
helpers.
|
||||
(BAD_BF16): New message.
|
||||
(parse_neon_type): Add bf16 type specifier.
|
||||
(enum neon_type_mask): Add N_BF16 type.
|
||||
(type_chk_of_el_type): Account for NT_bfloat.
|
||||
(el_type_of_type_chk): Account for N_BF16.
|
||||
(neon_three_args): Split out from neon_three_same.
|
||||
(neon_three_same): Part split out into neon_three_args.
|
||||
(CVT_FLAVOUR_VAR): Add bf16_f32 cvt flavour.
|
||||
(do_neon_cvt_1): Account for vcvt.bf16.f32.
|
||||
(do_bfloat_vmla): New.
|
||||
(do_mve_vfma): New function to deal with the mnemonic clash between the BF16
|
||||
vfmat and the MVE vfma in a VPT block with a 't'rue condition.
|
||||
(do_neon_cvttb_1): Account for vcvt{t,b}.bf16.f32.
|
||||
(do_vdot): New
|
||||
(do_vmmla): New
|
||||
(insns): Add vdot and vmmla mnemonics.
|
||||
(arm_extensions): Add "bf16" extension.
|
||||
* doc/c-arm.texi: Document "bf16" extension.
|
||||
* testsuite/gas/arm/attr-march-armv8_6-a.d: New test.
|
||||
* testsuite/gas/arm/bfloat16-bad.d: New test.
|
||||
* testsuite/gas/arm/bfloat16-bad.l: New test.
|
||||
* testsuite/gas/arm/bfloat16-bad.s: New test.
|
||||
* testsuite/gas/arm/bfloat16-cmdline-bad-2.d: New test.
|
||||
* testsuite/gas/arm/bfloat16-cmdline-bad-3.d: New test.
|
||||
* testsuite/gas/arm/bfloat16-cmdline-bad.d: New test.
|
||||
* testsuite/gas/arm/bfloat16-neon.s: New test.
|
||||
* testsuite/gas/arm/bfloat16-non-neon.s: New test.
|
||||
* testsuite/gas/arm/bfloat16-thumb-bad.d: New test.
|
||||
* testsuite/gas/arm/bfloat16-thumb-bad.l: New test.
|
||||
* testsuite/gas/arm/bfloat16-thumb.d: New test.
|
||||
* testsuite/gas/arm/bfloat16-vfp.d: New test.
|
||||
* testsuite/gas/arm/bfloat16.d: New test.
|
||||
* testsuite/gas/arm/bfloat16.s: New test.
|
||||
|
||||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
|
||||
|
||||
|
|
|
@ -275,6 +275,8 @@ static const arm_feature_set arm_ext_sb =
|
|||
ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB);
|
||||
static const arm_feature_set arm_ext_predres =
|
||||
ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
|
||||
static const arm_feature_set arm_ext_bf16 =
|
||||
ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16);
|
||||
|
||||
static const arm_feature_set arm_arch_any = ARM_ANY;
|
||||
#ifdef OBJ_ELF
|
||||
|
@ -447,6 +449,7 @@ enum neon_el_type
|
|||
NT_float,
|
||||
NT_poly,
|
||||
NT_signed,
|
||||
NT_bfloat,
|
||||
NT_unsigned
|
||||
};
|
||||
|
||||
|
@ -894,6 +897,7 @@ struct asm_opcode
|
|||
_("cannot use writeback with PC-relative addressing")
|
||||
#define BAD_RANGE _("branch out of range")
|
||||
#define BAD_FP16 _("selected processor does not support fp16 instruction")
|
||||
#define BAD_BF16 _("selected processor does not support bf16 instruction")
|
||||
#define UNPRED_REG(R) _("using " R " results in unpredictable behaviour")
|
||||
#define THUMB1_RELOC_ONLY _("relocation valid in thumb1 code only")
|
||||
#define MVE_NOT_IT _("Warning: instruction is UNPREDICTABLE in an IT " \
|
||||
|
@ -1469,6 +1473,28 @@ parse_neon_type (struct neon_type *type, char **str)
|
|||
thissize = 64;
|
||||
ptr++;
|
||||
goto done;
|
||||
case 'b':
|
||||
thistype = NT_bfloat;
|
||||
switch (TOLOWER (*(++ptr)))
|
||||
{
|
||||
case 'f':
|
||||
ptr += 1;
|
||||
thissize = strtoul (ptr, &ptr, 10);
|
||||
if (thissize != 16)
|
||||
{
|
||||
as_bad (_("bad size %d in type specifier"), thissize);
|
||||
return FAIL;
|
||||
}
|
||||
goto done;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
case ' ': case '.':
|
||||
as_bad (_("unexpected type character `b' -- did you mean `bf'?"));
|
||||
return FAIL;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
as_bad (_("unexpected character `%c' in type specifier"), *ptr);
|
||||
return FAIL;
|
||||
|
@ -14506,6 +14532,10 @@ do_mve_scalar_shift (void)
|
|||
#define M_MNEM_vqrshrunt 0xfe801fc0
|
||||
#define M_MNEM_vqrshrunb 0xfe800fc0
|
||||
|
||||
/* Bfloat16 instruction encoder helpers. */
|
||||
#define B_MNEM_vfmat 0xfc300850
|
||||
#define B_MNEM_vfmab 0xfc300810
|
||||
|
||||
/* Neon instruction encoder helpers. */
|
||||
|
||||
/* Encodings for the different types for various Neon opcodes. */
|
||||
|
@ -14851,6 +14881,7 @@ enum neon_type_mask
|
|||
N_F32 = 0x0080000,
|
||||
N_F64 = 0x0100000,
|
||||
N_P64 = 0x0200000,
|
||||
N_BF16 = 0x0400000,
|
||||
N_KEY = 0x1000000, /* Key element (main type specifier). */
|
||||
N_EQK = 0x2000000, /* Given operand has the same type & size as the key. */
|
||||
N_VFP = 0x4000000, /* VFP mode: operand size must match register width. */
|
||||
|
@ -15149,6 +15180,10 @@ type_chk_of_el_type (enum neon_el_type type, unsigned size)
|
|||
}
|
||||
break;
|
||||
|
||||
case NT_bfloat:
|
||||
if (size == 16) return N_BF16;
|
||||
break;
|
||||
|
||||
default: ;
|
||||
}
|
||||
|
||||
|
@ -15167,7 +15202,8 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
|
|||
|
||||
if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0)
|
||||
*size = 8;
|
||||
else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16)) != 0)
|
||||
else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16 | N_BF16))
|
||||
!= 0)
|
||||
*size = 16;
|
||||
else if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0)
|
||||
*size = 32;
|
||||
|
@ -15188,6 +15224,8 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
|
|||
*type = NT_poly;
|
||||
else if ((mask & (N_F_ALL)) != 0)
|
||||
*type = NT_float;
|
||||
else if ((mask & (N_BF16)) != 0)
|
||||
*type = NT_bfloat;
|
||||
else
|
||||
return FAIL;
|
||||
|
||||
|
@ -16624,6 +16662,20 @@ mve_encode_rrqq (unsigned U, unsigned size)
|
|||
inst.is_neon = 1;
|
||||
}
|
||||
|
||||
/* Helper function for neon_three_same handling the operands. */
|
||||
static void
|
||||
neon_three_args (int isquad)
|
||||
{
|
||||
inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
|
||||
inst.instruction |= HI1 (inst.operands[0].reg) << 22;
|
||||
inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
|
||||
inst.instruction |= HI1 (inst.operands[1].reg) << 7;
|
||||
inst.instruction |= LOW4 (inst.operands[2].reg);
|
||||
inst.instruction |= HI1 (inst.operands[2].reg) << 5;
|
||||
inst.instruction |= (isquad != 0) << 6;
|
||||
inst.is_neon = 1;
|
||||
}
|
||||
|
||||
/* Encode insns with bit pattern:
|
||||
|
||||
|28/24|23|22 |21 20|19 16|15 12|11 8|7|6|5|4|3 0|
|
||||
|
@ -16635,13 +16687,7 @@ mve_encode_rrqq (unsigned U, unsigned size)
|
|||
static void
|
||||
neon_three_same (int isquad, int ubit, int size)
|
||||
{
|
||||
inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
|
||||
inst.instruction |= HI1 (inst.operands[0].reg) << 22;
|
||||
inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
|
||||
inst.instruction |= HI1 (inst.operands[1].reg) << 7;
|
||||
inst.instruction |= LOW4 (inst.operands[2].reg);
|
||||
inst.instruction |= HI1 (inst.operands[2].reg) << 5;
|
||||
inst.instruction |= (isquad != 0) << 6;
|
||||
neon_three_args (isquad);
|
||||
inst.instruction |= (ubit != 0) << 24;
|
||||
if (size != -1)
|
||||
inst.instruction |= neon_logbits (size) << 20;
|
||||
|
@ -17783,6 +17829,44 @@ do_neon_mac_maybe_scalar (void)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
do_bfloat_vfma (void)
|
||||
{
|
||||
constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
|
||||
constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
|
||||
enum neon_shape rs;
|
||||
int t_bit = 0;
|
||||
|
||||
if (inst.instruction != B_MNEM_vfmab)
|
||||
{
|
||||
t_bit = 1;
|
||||
inst.instruction = B_MNEM_vfmat;
|
||||
}
|
||||
|
||||
if (inst.operands[2].isscalar)
|
||||
{
|
||||
rs = neon_select_shape (NS_QQS, NS_NULL);
|
||||
neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
|
||||
|
||||
inst.instruction |= (1 << 25);
|
||||
int index = inst.operands[2].reg & 0xf;
|
||||
constraint (!(index < 4), _("index must be in the range 0 to 3"));
|
||||
inst.operands[2].reg >>= 4;
|
||||
constraint (!(inst.operands[2].reg < 8),
|
||||
_("indexed register must be less than 8"));
|
||||
neon_three_args (t_bit);
|
||||
inst.instruction |= ((index & 1) << 3);
|
||||
inst.instruction |= ((index & 2) << 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
rs = neon_select_shape (NS_QQQ, NS_NULL);
|
||||
neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
|
||||
neon_three_args (t_bit);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
do_neon_fmac (void)
|
||||
{
|
||||
|
@ -17801,6 +17885,7 @@ do_neon_fmac (void)
|
|||
|
||||
if (rs == NS_QQR)
|
||||
{
|
||||
|
||||
if (inst.operands[2].reg == REG_SP)
|
||||
as_tsktsk (MVE_BAD_SP);
|
||||
else if (inst.operands[2].reg == REG_PC)
|
||||
|
@ -17825,6 +17910,24 @@ do_neon_fmac (void)
|
|||
neon_dyadic_misc (NT_untyped, N_IF_32, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
do_mve_vfma (void)
|
||||
{
|
||||
if (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_bf16) &&
|
||||
inst.cond == COND_ALWAYS)
|
||||
{
|
||||
constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
|
||||
inst.instruction = N_MNEM_vfma;
|
||||
inst.pred_insn_type = INSIDE_VPT_INSN;
|
||||
inst.cond = 0xf;
|
||||
return do_neon_fmac();
|
||||
}
|
||||
else
|
||||
{
|
||||
do_bfloat_vfma();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
do_neon_tst (void)
|
||||
{
|
||||
|
@ -18654,6 +18757,7 @@ do_neon_shll (void)
|
|||
CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL) \
|
||||
CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\
|
||||
CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\
|
||||
CVT_VAR (bf16_f32, N_BF16, N_F32, whole_reg, NULL, NULL, NULL) \
|
||||
/* VFP instructions. */ \
|
||||
CVT_VAR (f32_f64, N_F32, N_F64, N_VFP, NULL, "fcvtsd", NULL) \
|
||||
CVT_VAR (f64_f32, N_F64, N_F32, N_VFP, NULL, "fcvtds", NULL) \
|
||||
|
@ -19121,8 +19225,21 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
|
|||
}
|
||||
|
||||
if (rs == NS_DQ)
|
||||
inst.instruction = 0x3b60600;
|
||||
{
|
||||
if (flavour == neon_cvt_flavour_bf16_f32)
|
||||
{
|
||||
if (vfp_or_neon_is_neon (NEON_CHECK_ARCH8) == FAIL)
|
||||
return;
|
||||
constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
|
||||
/* VCVT.bf16.f32. */
|
||||
inst.instruction = 0x11b60640;
|
||||
}
|
||||
else
|
||||
/* VCVT.f16.f32. */
|
||||
inst.instruction = 0x3b60600;
|
||||
}
|
||||
else
|
||||
/* VCVT.f32.f16. */
|
||||
inst.instruction = 0x3b60700;
|
||||
|
||||
inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
|
||||
|
@ -19272,6 +19389,14 @@ do_neon_cvttb_1 (bfd_boolean t)
|
|||
inst.error = NULL;
|
||||
do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
|
||||
}
|
||||
else if (neon_check_type (2, rs, N_BF16 | N_VFP, N_F32).type != NT_invtype)
|
||||
{
|
||||
constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
|
||||
inst.error = NULL;
|
||||
inst.instruction |= (1 << 8);
|
||||
inst.instruction &= ~(1 << 9);
|
||||
do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/FALSE);
|
||||
}
|
||||
else
|
||||
return;
|
||||
}
|
||||
|
@ -19523,16 +19648,6 @@ do_neon_fmac_maybe_scalar_long (int subtype)
|
|||
0x2. */
|
||||
int size = -1;
|
||||
|
||||
if (inst.cond != COND_ALWAYS)
|
||||
as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
|
||||
"behaviour is UNPREDICTABLE"));
|
||||
|
||||
constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
|
||||
_(BAD_FP16));
|
||||
|
||||
constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
|
||||
_(BAD_FPU));
|
||||
|
||||
/* vfmal/vfmsl are in three-same D/Q register format or the third operand can
|
||||
be a scalar index register. */
|
||||
if (inst.operands[2].isscalar)
|
||||
|
@ -19551,7 +19666,16 @@ do_neon_fmac_maybe_scalar_long (int subtype)
|
|||
rs = neon_select_shape (NS_DHH, NS_QDD, NS_NULL);
|
||||
}
|
||||
|
||||
neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16);
|
||||
|
||||
if (inst.cond != COND_ALWAYS)
|
||||
as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
|
||||
"behaviour is UNPREDICTABLE"));
|
||||
|
||||
constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
|
||||
_(BAD_FP16));
|
||||
|
||||
constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
|
||||
_(BAD_FPU));
|
||||
|
||||
/* "opcode" from template has included "ubit", so simply pass 0 here. Also,
|
||||
the "S" bit in size field has been reused to differentiate vfmal and vfmsl,
|
||||
|
@ -21501,6 +21625,46 @@ do_vjcvt (void)
|
|||
do_vfp_cond_or_thumb ();
|
||||
}
|
||||
|
||||
static void
|
||||
do_vdot (void)
|
||||
{
|
||||
enum neon_shape rs;
|
||||
constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
|
||||
set_pred_insn_type (OUTSIDE_PRED_INSN);
|
||||
if (inst.operands[2].isscalar)
|
||||
{
|
||||
rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
|
||||
neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
|
||||
|
||||
inst.instruction |= (1 << 25);
|
||||
int index = inst.operands[2].reg & 0xf;
|
||||
constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
|
||||
inst.operands[2].reg >>= 4;
|
||||
constraint (!(inst.operands[2].reg < 16),
|
||||
_("indexed register must be less than 16"));
|
||||
neon_three_args (rs == NS_QQS);
|
||||
inst.instruction |= (index << 5);
|
||||
}
|
||||
else
|
||||
{
|
||||
rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
|
||||
neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
|
||||
neon_three_args (rs == NS_QQQ);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
do_vmmla (void)
|
||||
{
|
||||
enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
|
||||
neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
|
||||
|
||||
constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
|
||||
set_pred_insn_type (OUTSIDE_PRED_INSN);
|
||||
|
||||
neon_three_args (1);
|
||||
}
|
||||
|
||||
|
||||
/* Overall per-instruction processing. */
|
||||
|
||||
|
@ -24846,8 +25010,8 @@ static const struct asm_opcode insns[] =
|
|||
NCE (vins, eb00ac0, 2, (RVS, RVS), neon_movhf),
|
||||
|
||||
/* New backported fma/fms instructions optional in v8.2. */
|
||||
NCE (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
|
||||
NCE (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
|
||||
NUF (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
|
||||
NUF (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
|
||||
|
||||
#undef THUMB_VARIANT
|
||||
#define THUMB_VARIANT & fpu_neon_ext_v1
|
||||
|
@ -25097,10 +25261,11 @@ static const struct asm_opcode insns[] =
|
|||
#define ARM_VARIANT & fpu_vfp_ext_fma
|
||||
#undef THUMB_VARIANT
|
||||
#define THUMB_VARIANT & fpu_vfp_ext_fma
|
||||
/* Mnemonics shared by Neon, VFP and MVE. These are included in the
|
||||
/* Mnemonics shared by Neon, VFP, MVE and BF16. These are included in the
|
||||
VFP FMA variant; NEON and VFP FMA always includes the NEON
|
||||
FMA instructions. */
|
||||
mnCEF(vfma, _vfma, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR), neon_fmac),
|
||||
TUF ("vfmat", c300850, fc300850, 3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), mve_vfma, mve_vfma),
|
||||
mnCEF(vfms, _vfms, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ), neon_fmac),
|
||||
|
||||
/* ffmas/ffmad/ffmss/ffmsd are dummy mnemonics to satisfy gas;
|
||||
|
@ -25773,6 +25938,24 @@ static const struct asm_opcode insns[] =
|
|||
#define THUMB_VARIANT & arm_ext_v6t2_v8m
|
||||
MNUF (vcadd, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ, EXPi), vcadd),
|
||||
MNUF (vcmla, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ_RNSC, EXPi), vcmla),
|
||||
|
||||
#undef ARM_VARIANT
|
||||
#define ARM_VARIANT &arm_ext_bf16
|
||||
#undef THUMB_VARIANT
|
||||
#define THUMB_VARIANT &arm_ext_bf16
|
||||
TUF ("vdot", c000d00, fc000d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vdot, vdot),
|
||||
TUF ("vmmla", c000c40, fc000c40, 3, (RNQ, RNQ, RNQ), vmmla, vmmla),
|
||||
TUF ("vfmab", c300810, fc300810, 3, (RNDQ, RNDQ, RNDQ_RNSC), bfloat_vfma, bfloat_vfma),
|
||||
|
||||
#undef ARM_VARIANT
|
||||
#define ARM_VARIANT &arm_ext_i8mm
|
||||
#undef THUMB_VARIANT
|
||||
#define THUMB_VARIANT &arm_ext_i8mm
|
||||
TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
|
||||
TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla),
|
||||
TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vummla, vummla),
|
||||
TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot),
|
||||
TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot),
|
||||
};
|
||||
#undef ARM_VARIANT
|
||||
#undef THUMB_VARIANT
|
||||
|
@ -30937,6 +31120,11 @@ static const struct arm_ext_table armv85a_ext_table[] =
|
|||
{ NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
|
||||
};
|
||||
|
||||
static const struct arm_ext_table armv86a_ext_table[] =
|
||||
{
|
||||
{ NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
|
||||
};
|
||||
|
||||
static const struct arm_ext_table armv8m_main_ext_table[] =
|
||||
{
|
||||
ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
|
||||
|
@ -31042,6 +31230,7 @@ static const struct arm_arch_option_table arm_archs[] =
|
|||
ARM_ARCH_OPT2 ("armv8-r", ARM_ARCH_V8R, FPU_ARCH_VFP, armv8r),
|
||||
ARM_ARCH_OPT2 ("armv8.4-a", ARM_ARCH_V8_4A, FPU_ARCH_VFP, armv84a),
|
||||
ARM_ARCH_OPT2 ("armv8.5-a", ARM_ARCH_V8_5A, FPU_ARCH_VFP, armv85a),
|
||||
ARM_ARCH_OPT2 ("armv8.6-a", ARM_ARCH_V8_6A, FPU_ARCH_VFP, armv86a),
|
||||
ARM_ARCH_OPT ("xscale", ARM_ARCH_XSCALE, FPU_ARCH_VFP),
|
||||
ARM_ARCH_OPT ("iwmmxt", ARM_ARCH_IWMMXT, FPU_ARCH_VFP),
|
||||
ARM_ARCH_OPT ("iwmmxt2", ARM_ARCH_IWMMXT2, FPU_ARCH_VFP),
|
||||
|
@ -31072,6 +31261,9 @@ struct arm_option_extension_value_table
|
|||
use the context sensitive approach using arm_ext_table's. */
|
||||
static const struct arm_option_extension_value_table arm_extensions[] =
|
||||
{
|
||||
ARM_EXT_OPT ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
ARM_ARCH_V8_2A),
|
||||
ARM_EXT_OPT ("crc", ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
|
||||
ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
|
||||
ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
|
||||
|
@ -31823,7 +32015,8 @@ static const cpu_arch_ver_table cpu_arch_ver[] =
|
|||
{TAG_CPU_ARCH_V8, ARM_ARCH_V8_4A},
|
||||
{TAG_CPU_ARCH_V8, ARM_ARCH_V8_5A},
|
||||
{TAG_CPU_ARCH_V8_1M_MAIN, ARM_ARCH_V8_1M_MAIN},
|
||||
{-1, ARM_ARCH_NONE}
|
||||
{TAG_CPU_ARCH_V8, ARM_ARCH_V8_6A},
|
||||
{-1, ARM_ARCH_NONE}
|
||||
};
|
||||
|
||||
/* Set an attribute if it has not already been set by the user. */
|
||||
|
|
|
@ -180,6 +180,7 @@ been added, again in ascending alphabetical order. For example,
|
|||
|
||||
|
||||
The following extensions are currently supported:
|
||||
@code{bf16} (BFloat16 extensions for v8.6-A architecture),
|
||||
@code{crc}
|
||||
@code{crypto} (Cryptography Extensions for v8-A architecture, implies @code{fp+simd}),
|
||||
@code{dotprod} (Dot Product Extensions for v8.2-A architecture, implies @code{fp+simd}),
|
||||
|
@ -254,6 +255,7 @@ names are recognized:
|
|||
@code{armv8-m.base},
|
||||
@code{armv8-m.main},
|
||||
@code{armv8.1-m.main},
|
||||
@code{armv8.6-a},
|
||||
@code{iwmmxt},
|
||||
@code{iwmmxt2}
|
||||
and
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
# name: attributes for -march=armv8.6-a
|
||||
# source: blank.s
|
||||
# as: -march=armv8.6-a
|
||||
# readelf: -A
|
||||
# This test is only valid on EABI based ports.
|
||||
# target: *-*-*eabi* *-*-nacl*
|
||||
|
||||
Attribute Section: aeabi
|
||||
File Attributes
|
||||
Tag_CPU_name: "8.6-A"
|
||||
Tag_CPU_arch: v8
|
||||
Tag_CPU_arch_profile: Application
|
||||
Tag_ARM_ISA_use: Yes
|
||||
Tag_THUMB_ISA_use: Thumb-2
|
||||
Tag_Advanced_SIMD_arch: NEON for ARMv8.1
|
||||
Tag_MPextension_use: Allowed
|
||||
Tag_Virtualization_use: TrustZone and Virtualization Extensions
|
|
@ -0,0 +1,4 @@
|
|||
#name: Bfloat 16 failure cases
|
||||
#source: bfloat16-bad.s
|
||||
#as: -mno-warn-deprecated -march=armv8.6-a+simd
|
||||
#error_output: bfloat16-bad.l
|
|
@ -0,0 +1,112 @@
|
|||
[^ :]+: Assembler messages:
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vdot.b16 d0,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vmmla q0.b16,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vdot.bf32 d0,d0,d0\[1\]'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0.bf32,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0.bf32,d0.bf16,d0.bf16'
|
||||
[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne d0,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne d0,d0,d0\[1\]'
|
||||
[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vmmlane q0,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmatne.bf16 q0,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmatne.bf16 q0,d0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vfmabne.bf16 q0,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vfmabne.bf16 q0,d0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vcvtne.bf16.f32 d0,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d0,d32'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: indexed register must be less than 16 -- `vdot d0,d0,d16\[0\]'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvtne.bf16.f32 d32,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q0,q16'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q16,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q16,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,q16'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q16,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q16,d0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q0,q32,d0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q0,q32,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmab.bf16 q0,q0,d8\[0\]'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q16,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q16,d0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q0,q32,d0'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q0,q32,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmat.bf16 q0,q0,d8\[0\]'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvt.bf16.f32 d0,q16'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,q0,d5'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,d5,q0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot d5,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot q0,d5,q0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot d5,q0,q0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,d5'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,d5,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla d5,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab.bf16 d0,q0,d0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab.bf16 d0,q0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat.bf16 d0,q0,d0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat.bf16 d0,q0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: operand size must match register width
|
||||
[^ :]+:[0-9]+: Error: invalid neon suffix for non neon instruction
|
||||
[^ :]+:[0-9]+: Error: index must be 0 or 1 -- `vdot q0,q0,d0\[2\]'
|
||||
[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmab.bf16 q0,d0,d0\[4\]'
|
||||
[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmat.bf16 q0,d0,d0\[4\]'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvtb.b16.f32 s0,s0'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvtb.bf32.f32 s0,s0'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0.b16,s0.f32'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0.bf32,s0.f32'
|
||||
[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb s0.f32,s0.bf16'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvtt.b16.f32 s0,s0'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvtt.bf32.f32 s0,s0'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0.b16,s0.f32'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0.bf32,s0.f32'
|
||||
[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt s0.f32,s0.bf16'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvt.b16.f32 d0,q0'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvt.bf32.f32 d0,q0'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0.b16,q0.f32'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0.bf32,q0.f32'
|
||||
[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvt d0.f32,q0.bf16'
|
||||
[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtt.bf16.f32 s0,s0,#0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtt.bf16.f32 s0,s0,#1'
|
||||
[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt.bf16.f32 d0,s0'
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt.bf16.f32 s0'
|
||||
[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt.bf16.f32 s0,s0,s0,s0'
|
||||
[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt.bf16.f32 s0,s0,s0'
|
||||
[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt.bf16.f32 s0,s32'
|
||||
[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt.bf16.f32 s32,s32'
|
||||
[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtb.bf16.f32 s0,s0,#0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtb.bf16.f32 s0,s0,#1'
|
||||
[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb.bf16.f32 d0,s0'
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb.bf16.f32 s0'
|
||||
[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb.bf16.f32 s0,s0,s0,s0'
|
||||
[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb.bf16.f32 s0,s0,s0'
|
||||
[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb.bf16.f32 s0,s32'
|
||||
[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb.bf16.f32 s32,s32'
|
||||
[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vcvtne.bf16.f32 d0,q0'
|
||||
[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne.bf16 d0,d20,d11'
|
||||
[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne.bf16 d0,d20,d11\[1\]'
|
||||
[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vmmlane.bf16 q0,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: IT falling in the range of a previous IT block -- `ittt ne'
|
||||
[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot.bf16 d0,d20,d11'
|
||||
[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot.bf16 d0,d20,d11\[1\]'
|
|
@ -0,0 +1,119 @@
|
|||
.syntax unified
|
||||
|
||||
// Test warnings about type specifier being incorrect.
|
||||
vdot.b16 d0, d0, d0
|
||||
vmmla q0.b16, q0, q0
|
||||
vdot.bf32 d0, d0, d0[1]
|
||||
vdot d0.bf32, d0, d0
|
||||
vdot d0.bf32, d0.bf16, d0.bf16
|
||||
|
||||
// Test conditions are not allowed in ARM.
|
||||
vdotne d0, d0, d0
|
||||
vdotne d0, d0, d0[1]
|
||||
vmmlane q0, q0, q0
|
||||
vfmatne.bf16 q0, d0, d0
|
||||
vfmatne.bf16 q0, d0, d0[0]
|
||||
vfmabne.bf16 q0, d0, d0
|
||||
vfmabne.bf16 q0, d0, d0[0]
|
||||
vcvtne.bf16.f32 d0, q0
|
||||
|
||||
// d register out of range
|
||||
vdot d32, d0, d0
|
||||
vdot d0, d32, d0
|
||||
vdot d0, d0, d32
|
||||
vdot d32, d0, d0[0]
|
||||
vdot d0, d32, d0[0]
|
||||
vdot d0, d0, d16[0]
|
||||
vcvtne.bf16.f32 d32, q0
|
||||
|
||||
// q register out of range
|
||||
vdot q16, q0, q0
|
||||
vdot q0, q16, q0
|
||||
vdot q0, q0, q16
|
||||
vdot q16, q0, d0[0]
|
||||
vdot q0, q16, d0[0]
|
||||
vmmla q16, q0, q0
|
||||
vmmla q0, q16, q0
|
||||
vmmla q0, q0, q16
|
||||
vfmab.bf16 q16, d0, d0
|
||||
vfmab.bf16 q16, d0, d0[0]
|
||||
vfmab.bf16 q0, q32, d0
|
||||
vfmab.bf16 q0, q32, d0[0]
|
||||
vfmab.bf16 q0, q0, d8[0]
|
||||
vfmat.bf16 q16, d0, d0
|
||||
vfmat.bf16 q16, d0, d0[0]
|
||||
vfmat.bf16 q0, q32, d0
|
||||
vfmat.bf16 q0, q32, d0[0]
|
||||
vfmat.bf16 q0, q0, d8[0]
|
||||
vcvt.bf16.f32 d0, q16
|
||||
|
||||
// Incorrect set of arguments
|
||||
vdot q0, q0, d5
|
||||
vdot q0, d5, q0
|
||||
vdot d5, q0, q0
|
||||
vdot q0, d5, q0[0]
|
||||
vdot d5, q0, q0[0]
|
||||
vmmla q0, q0, d5
|
||||
vmmla q0, d5, q0
|
||||
vmmla d5, q0, q0
|
||||
vfmab.bf16 d0, q0, d0
|
||||
vfmab.bf16 d0, q0, d0[0]
|
||||
vfmat.bf16 d0, q0, d0
|
||||
vfmat.bf16 d0, q0, d0[0]
|
||||
vcvt.bf16.f32 q0, d0
|
||||
|
||||
// vdot index out of range
|
||||
vdot q0, q0, d0[2]
|
||||
|
||||
// vfma<bt> index out of range
|
||||
vfmab.bf16 q0, d0, d0[4]
|
||||
vfmat.bf16 q0, d0, d0[4]
|
||||
|
||||
// Non neon encodings (this file gets assembled more than once but with
|
||||
// different flags, providing different error messages each time).
|
||||
|
||||
// Type specifier warnings
|
||||
.macro conversion_type_specifier_check insn, dest, source
|
||||
\insn\().b16.f32 \dest, \source
|
||||
\insn\().bf32.f32 \dest, \source
|
||||
\insn \dest\().b16, \source\().f32
|
||||
\insn \dest\().bf32, \source\().f32
|
||||
\insn \dest\().f32, \source\().bf16
|
||||
.endm
|
||||
|
||||
conversion_type_specifier_check vcvtb, s0, s0
|
||||
conversion_type_specifier_check vcvtt, s0, s0
|
||||
conversion_type_specifier_check vcvt, d0, q0
|
||||
|
||||
// Conditions allowed (and checked in the "Valid" source file).
|
||||
|
||||
// Incorrect set of operands & registers out of range
|
||||
.macro bad_args insn
|
||||
\insn\().bf16.f32 s0, s0, #0
|
||||
\insn\().bf16.f32 s0, s0, #1
|
||||
\insn\().bf16.f32 d0, s0
|
||||
\insn\().bf16.f32 s0
|
||||
\insn\().bf16.f32 s0, s0, s0, s0
|
||||
\insn\().bf16.f32 s0, s0, s0
|
||||
\insn\().bf16.f32 s0, s32
|
||||
\insn\().bf16.f32 s32, s32
|
||||
.endm
|
||||
bad_args vcvtt
|
||||
bad_args vcvtb
|
||||
|
||||
// Allowed in thumb mode but not allowed in arm mode.
|
||||
it ne
|
||||
vcvtne.bf16.f32 d0, q0
|
||||
|
||||
// Ensure these instructions are not allowed to have a conditional suffix.
|
||||
ittt ne
|
||||
vdotne.bf16 d0, d20, d11
|
||||
vdotne.bf16 d0, d20, d11[1]
|
||||
vmmlane.bf16 q0, q0, q0
|
||||
|
||||
// Ensure we are warned these instructions are UNPREDICTABLE in an IT block in
|
||||
// thumb.
|
||||
ittt ne
|
||||
vdot.bf16 d0, d20, d11
|
||||
vdot.bf16 d0, d20, d11[1]
|
||||
vmmla.bf16 q0, q0, q0
|
|
@ -0,0 +1,4 @@
|
|||
#name: Bfloat 16 bad processor
|
||||
#source: bfloat16-non-neon.s
|
||||
#as: -mno-warn-deprecated -march=armv8.5-a+simd
|
||||
#error: .*Error: selected processor does not support bf16 instruction.*
|
|
@ -0,0 +1,4 @@
|
|||
#name: Bfloat 16 bad extension
|
||||
#source: bfloat16-non-neon.s
|
||||
#as: -mno-warn-deprecated -march=armv8.1-a+bf16
|
||||
#error: .*Error: extension does not apply to the base architecture.*
|
|
@ -0,0 +1,5 @@
|
|||
#name: Bfloat 16 bad FPU
|
||||
#source: bfloat16-neon.s
|
||||
#as: -mno-warn-deprecated -mfpu=vfpxd -march=armv8.6-a
|
||||
#error: .*Error: selected FPU does not support instruction.*
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
.syntax unified
|
||||
// Check argument encoding by having different arguments.
|
||||
// We use 20 and 11 since their binary encoding is 10100 and 01011
|
||||
// respectively which ensures that we distinguish between the D/M/N bit
|
||||
// encoding the first or last bit of the argument.
|
||||
// q registers are encoded as double their actual number.
|
||||
vdot.bf16 d0, d20, d11
|
||||
vdot d11.bf16, d0.bf16, d20.bf16
|
||||
|
||||
.macro conversion_type_specifier_check insn, dest, source
|
||||
\insn\().bf16.f32 \dest, \source
|
||||
\insn \dest\().bf16, \source\().f32
|
||||
\insn \dest\().bf16, \source\().f32
|
||||
.endm
|
||||
conversion_type_specifier_check vcvtt,s0,s0
|
||||
conversion_type_specifier_check vcvtb,s0,s0
|
||||
conversion_type_specifier_check vcvt,d0,q0
|
||||
|
||||
|
||||
// Here we follow the same encoding sequence as above.
|
||||
// Since the 'M' bit encodes the index and the last register is encoded in 4
|
||||
// bits that argument has a different number.
|
||||
vdot.bf16 d11, d0, d4[1]
|
||||
vdot d0.bf16, d20.bf16, d11.bf16[0]
|
||||
|
||||
// vmmla only works on q registers.
|
||||
// These registers are encoded as double the number given in the mnemonic.
|
||||
// Hence we choose different numbers to ensure a similar bit pattern as above.
|
||||
// 10 & 5 produce the bit patterns 10100 & 01010
|
||||
vmmla.bf16 q10, q5, q0
|
||||
vmmla q5.bf16, q0.bf16, q10.bf16
|
||||
|
||||
vfmat.bf16 q10, q11, q0
|
||||
vfmat.bf16 q10, q11, d0[3]
|
||||
vfmat.bf16 q10, q11, d0[0]
|
||||
|
||||
vfmab.bf16 q10, q11, q0
|
||||
vfmab.bf16 q10, q11, d0[3]
|
||||
vfmab.bf16 q10, q11, d0[0]
|
||||
|
||||
// vcvt
|
||||
// - no condition allowed in arm
|
||||
// - no condition allowed in thumb outside IT block
|
||||
// - Condition *allowed* in thumb in IT block
|
||||
// - different encoding between thumb and arm
|
||||
vcvt.bf16.f32 d20, q5
|
||||
vcvt.bf16.f32 d11, q10
|
||||
|
||||
// Only works for thumb mode.
|
||||
.ifdef COMPILING_FOR_THUMB
|
||||
it ne
|
||||
vcvtne.bf16.f32 d0, q0
|
||||
.endif
|
|
@ -0,0 +1,9 @@
|
|||
.syntax unified
|
||||
vcvtb.bf16.f32 s20, s11
|
||||
it ne
|
||||
vcvtbne.bf16.f32 s11, s20
|
||||
vcvtbal.bf16.f32 s0, s0
|
||||
vcvtt.bf16.f32 s20, s11
|
||||
it ne
|
||||
vcvttne.bf16.f32 s11, s20
|
||||
vcvttal.bf16.f32 s0, s0
|
|
@ -0,0 +1,4 @@
|
|||
#name: Bfloat 16 Thumb failure cases
|
||||
#source: bfloat16-bad.s
|
||||
#as: -mno-warn-deprecated -mthumb -march=armv8.6-a+simd
|
||||
#error_output: bfloat16-thumb-bad.l
|
|
@ -0,0 +1,112 @@
|
|||
[^ :]+: Assembler messages:
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vdot\.b16 d0,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vmmla q0\.b16,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vdot\.bf32 d0,d0,d0\[1\]'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0\.bf32,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0\.bf32,d0\.bf16,d0\.bf16'
|
||||
[^ :]+:[0-9]+: Error: operand types can't be inferred -- `vdotne d0,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: operand types can't be inferred -- `vdotne d0,d0,d0\[1\]'
|
||||
[^ :]+:[0-9]+: Error: operand types can't be inferred -- `vmmlane q0,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmatne\.bf16 q0,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmatne\.bf16 q0,d0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmabne\.bf16 q0,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmabne\.bf16 q0,d0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vcvtne\.bf16\.f32 d0,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d0,d32'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: indexed register must be less than 16 -- `vdot d0,d0,d16\[0\]'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvtne\.bf16\.f32 d32,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q0,q16'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q16,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q16,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,q16'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q16,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q16,d0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q0,q32,d0'
|
||||
[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q0,q32,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmab\.bf16 q0,q0,d8\[0\]'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q16,d0,d0'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q16,d0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q0,q32,d0'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q0,q32,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmat\.bf16 q0,q0,d8\[0\]'
|
||||
[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvt\.bf16\.f32 d0,q16'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,q0,d5'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,d5,q0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot d5,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot q0,d5,q0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot d5,q0,q0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,d5'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,d5,q0'
|
||||
[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla d5,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab\.bf16 d0,q0,d0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab\.bf16 d0,q0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat\.bf16 d0,q0,d0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat\.bf16 d0,q0,d0\[0\]'
|
||||
[^ :]+:[0-9]+: Error: operand size must match register width
|
||||
[^ :]+:[0-9]+: Error: invalid neon suffix for non neon instruction
|
||||
[^ :]+:[0-9]+: Error: index must be 0 or 1 -- `vdot q0,q0,d0\[2\]'
|
||||
[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmab\.bf16 q0,d0,d0\[4\]'
|
||||
[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmat\.bf16 q0,d0,d0\[4\]'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvtb\.b16\.f32 s0,s0'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvtb\.bf32\.f32 s0,s0'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0\.b16,s0\.f32'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0\.bf32,s0\.f32'
|
||||
[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb s0\.f32,s0\.bf16'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvtt\.b16\.f32 s0,s0'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvtt\.bf32\.f32 s0,s0'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0\.b16,s0\.f32'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0\.bf32,s0\.f32'
|
||||
[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt s0\.f32,s0\.bf16'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvt\.b16\.f32 d0,q0'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad instruction `vcvt\.bf32\.f32 d0,q0'
|
||||
[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0\.b16,q0\.f32'
|
||||
[^ :]+:[0-9]+: Error: bad size 32 in type specifier
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0\.bf32,q0\.f32'
|
||||
[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvt d0\.f32,q0\.bf16'
|
||||
[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtt\.bf16\.f32 s0,s0,#0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtt\.bf16\.f32 s0,s0,#1'
|
||||
[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt\.bf16\.f32 d0,s0'
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt\.bf16\.f32 s0'
|
||||
[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt\.bf16\.f32 s0,s0,s0,s0'
|
||||
[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt\.bf16\.f32 s0,s0,s0'
|
||||
[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt\.bf16\.f32 s0,s32'
|
||||
[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt\.bf16\.f32 s32,s32'
|
||||
[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtb\.bf16\.f32 s0,s0,#0'
|
||||
[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtb\.bf16\.f32 s0,s0,#1'
|
||||
[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb\.bf16\.f32 d0,s0'
|
||||
[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb\.bf16\.f32 s0'
|
||||
[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb\.bf16\.f32 s0,s0,s0,s0'
|
||||
[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb\.bf16\.f32 s0,s0,s0'
|
||||
[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb\.bf16\.f32 s0,s32'
|
||||
[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb\.bf16\.f32 s32,s32'
|
||||
[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdotne\.bf16 d0,d20,d11'
|
||||
[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdotne\.bf16 d0,d20,d11\[1\]'
|
||||
[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vmmlane\.bf16 q0,q0,q0'
|
||||
[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot\.bf16 d0,d20,d11'
|
||||
[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot\.bf16 d0,d20,d11\[1\]'
|
||||
[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vmmla\.bf16 q0,q0,q0'
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
#name: Bfloat 16 extension Thumb
|
||||
#source: bfloat16.s
|
||||
#as: -mno-warn-deprecated --defsym COMPILING_FOR_THUMB=1 -mthumb -march=armv8.6-a+simd -I$srcdir/$subdir
|
||||
#objdump: -dr --show-raw-insn
|
||||
#skip: *-*-pe *-*-wince
|
||||
|
||||
.*: +file format .*arm.*
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
00000000 <\.text>:
|
||||
*[0-9a-f]+: fc04 0d8b vdot\.bf16 d0, d20, d11
|
||||
*[0-9a-f]+: fc00 bd24 vdot\.bf16 d11, d0, d20
|
||||
*[0-9a-f]+: eeb3 09c0 vcvtt\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb3 09c0 vcvtt\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb3 09c0 vcvtt\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb3 0940 vcvtb\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb3 0940 vcvtb\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb3 0940 vcvtb\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: ffb6 0640 vcvt\.bf16\.f32 d0, q0
|
||||
*[0-9a-f]+: ffb6 0640 vcvt\.bf16\.f32 d0, q0
|
||||
*[0-9a-f]+: ffb6 0640 vcvt\.bf16\.f32 d0, q0
|
||||
*[0-9a-f]+: fe00 bd24 vdot\.bf16 d11, d0, d4\[1\]
|
||||
*[0-9a-f]+: fe04 0d8b vdot\.bf16 d0, d20, d11\[0\]
|
||||
*[0-9a-f]+: fc4a 4c40 vmmla\.bf16 q10, q5, q0
|
||||
*[0-9a-f]+: fc00 ac64 vmmla\.bf16 q5, q0, q10
|
||||
*[0-9a-f]+: fc76 48d0 vfmat\.bf16 q10, q11, q0
|
||||
*[0-9a-f]+: fe76 48f8 vfmat\.bf16 q10, q11, d0\[3\]
|
||||
*[0-9a-f]+: fe76 48d0 vfmat\.bf16 q10, q11, d0\[0\]
|
||||
*[0-9a-f]+: fc76 4890 vfmab\.bf16 q10, q11, q0
|
||||
*[0-9a-f]+: fe76 48b8 vfmab\.bf16 q10, q11, d0\[3\]
|
||||
*[0-9a-f]+: fe76 4890 vfmab\.bf16 q10, q11, d0\[0\]
|
||||
*[0-9a-f]+: fff6 464a vcvt\.bf16\.f32 d20, q5
|
||||
*[0-9a-f]+: ffb6 b664 vcvt\.bf16\.f32 d11, q10
|
||||
*[0-9a-f]+: bf18 it ne
|
||||
*[0-9a-f]+: ffb6 0640 vcvtne\.bf16\.f32 d0, q0
|
||||
*[0-9a-f]+: eeb3 a965 vcvtb\.bf16\.f32 s20, s11
|
||||
*[0-9a-f]+: bf18 it ne
|
||||
*[0-9a-f]+: eef3 594a vcvtbne\.bf16\.f32 s11, s20
|
||||
*[0-9a-f]+: eeb3 0940 vcvtb\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb3 a9e5 vcvtt\.bf16\.f32 s20, s11
|
||||
*[0-9a-f]+: bf18 it ne
|
||||
*[0-9a-f]+: eef3 59ca vcvttne\.bf16\.f32 s11, s20
|
||||
*[0-9a-f]+: eeb3 09c0 vcvtt\.bf16\.f32 s0, s0
|
|
@ -0,0 +1,16 @@
|
|||
#name: Bfloat 16 VFP
|
||||
#source: bfloat16-non-neon.s
|
||||
#as: -mno-warn-deprecated -mfpu=vfpxd -march=armv8.6-a -I$srcdir/$subdir
|
||||
#objdump: -dr --show-raw-insn
|
||||
|
||||
.*: +file format .*arm.*
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
00000000 <.text>:
|
||||
*[0-9a-f]*: eeb3a965 vcvtb.bf16.f32 s20, s11
|
||||
*[0-9a-f]*: 1ef3594a vcvtbne.bf16.f32 s11, s20
|
||||
*[0-9a-f]*: eeb30940 vcvtb.bf16.f32 s0, s0
|
||||
*[0-9a-f]*: eeb3a9e5 vcvtt.bf16.f32 s20, s11
|
||||
*[0-9a-f]*: 1ef359ca vcvttne.bf16.f32 s11, s20
|
||||
*[0-9a-f]*: eeb309c0 vcvtt.bf16.f32 s0, s0
|
|
@ -0,0 +1,39 @@
|
|||
#name: Bfloat 16 extension
|
||||
#source: bfloat16.s
|
||||
#as: -mno-warn-deprecated -march=armv8.6-a+simd -I$srcdir/$subdir
|
||||
#objdump: -dr --show-raw-insn
|
||||
|
||||
.*: file format .*
|
||||
|
||||
Disassembly of section \.text:
|
||||
|
||||
00000000 <.text>:
|
||||
*[0-9a-f]+: fc040d8b vdot\.bf16 d0, d20, d11
|
||||
*[0-9a-f]+: fc00bd24 vdot\.bf16 d11, d0, d20
|
||||
*[0-9a-f]+: eeb309c0 vcvtt\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb309c0 vcvtt\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb309c0 vcvtt\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb30940 vcvtb\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb30940 vcvtb\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb30940 vcvtb\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: f3b60640 vcvt\.bf16\.f32 d0, q0
|
||||
*[0-9a-f]+: f3b60640 vcvt\.bf16\.f32 d0, q0
|
||||
*[0-9a-f]+: f3b60640 vcvt\.bf16\.f32 d0, q0
|
||||
*[0-9a-f]+: fe00bd24 vdot\.bf16 d11, d0, d4\[1\]
|
||||
*[0-9a-f]+: fe040d8b vdot\.bf16 d0, d20, d11\[0\]
|
||||
*[0-9a-f]+: fc4a4c40 vmmla\.bf16 q10, q5, q0
|
||||
*[0-9a-f]+: fc00ac64 vmmla\.bf16 q5, q0, q10
|
||||
*[0-9a-f]*: fc7648d0 vfmat\.bf16 q10, q11, q0
|
||||
*[0-9a-f]*: fe7648f8 vfmat\.bf16 q10, q11, d0\[3\]
|
||||
*[0-9a-f]*: fe7648d0 vfmat\.bf16 q10, q11, d0\[0\]
|
||||
*[0-9a-f]*: fc764890 vfmab\.bf16 q10, q11, q0
|
||||
*[0-9a-f]*: fe7648b8 vfmab\.bf16 q10, q11, d0\[3\]
|
||||
*[0-9a-f]*: fe764890 vfmab\.bf16 q10, q11, d0\[0\]
|
||||
*[0-9a-f]+: f3f6464a vcvt\.bf16\.f32 d20, q5
|
||||
*[0-9a-f]+: f3b6b664 vcvt\.bf16\.f32 d11, q10
|
||||
*[0-9a-f]+: eeb3a965 vcvtb\.bf16\.f32 s20, s11
|
||||
*[0-9a-f]+: 1ef3594a vcvtbne\.bf16\.f32 s11, s20
|
||||
*[0-9a-f]+: eeb30940 vcvtb\.bf16\.f32 s0, s0
|
||||
*[0-9a-f]+: eeb3a9e5 vcvtt\.bf16\.f32 s20, s11
|
||||
*[0-9a-f]+: 1ef359ca vcvttne\.bf16\.f32 s11, s20
|
||||
*[0-9a-f]+: eeb309c0 vcvtt\.bf16\.f32 s0, s0
|
|
@ -0,0 +1,2 @@
|
|||
.include "bfloat16-neon.s"
|
||||
.include "bfloat16-non-neon.s"
|
|
@ -1,3 +1,11 @@
|
|||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
|
||||
|
||||
* opcode/arm.h (ARM_EXT2_V8_6A, ARM_AEXT2_V8_6A,
|
||||
ARM_ARCH_V8_6A): New.
|
||||
* opcode/arm.h (ARM_EXT2_BF16): New feature macro.
|
||||
(ARM_AEXT2_V8_6A): Include above macro in definition.
|
||||
|
||||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
|
||||
|
||||
|
|
|
@ -73,6 +73,8 @@
|
|||
#define ARM_EXT2_SB 0x00002000 /* Speculation Barrier instruction. */
|
||||
#define ARM_EXT2_PREDRES 0x00004000 /* Prediction Restriction insns. */
|
||||
#define ARM_EXT2_V8_1M_MAIN 0x00008000 /* ARMv8.1-M Mainline. */
|
||||
#define ARM_EXT2_V8_6A 0x00010000 /* ARM V8.6A. */
|
||||
#define ARM_EXT2_BF16 0x00020000 /* ARMv8 bfloat16. */
|
||||
|
||||
/* Co-processor space extensions. */
|
||||
#define ARM_CEXT_XSCALE 0x00000001 /* Allow MIA etc. */
|
||||
|
@ -169,6 +171,7 @@
|
|||
| ARM_EXT2_V8_4A)
|
||||
#define ARM_AEXT2_V8_5A (ARM_AEXT2_V8_4A | ARM_EXT2_V8_5A | ARM_EXT2_SB \
|
||||
| ARM_EXT2_PREDRES)
|
||||
#define ARM_AEXT2_V8_6A (ARM_AEXT2_V8_5A | ARM_EXT2_V8_6A | ARM_EXT2_BF16)
|
||||
#define ARM_AEXT_V8M_BASE (ARM_AEXT_V6SM | ARM_EXT_DIV)
|
||||
#define ARM_AEXT_V8M_MAIN ARM_AEXT_V7M
|
||||
#define ARM_AEXT_V8M_MAIN_DSP ARM_AEXT_V7EM
|
||||
|
@ -352,6 +355,9 @@
|
|||
#define ARM_ARCH_V8_5A ARM_FEATURE (ARM_AEXT_V8A, ARM_AEXT2_V8_5A, \
|
||||
CRC_EXT_ARMV8 | FPU_NEON_EXT_RDMA \
|
||||
| FPU_NEON_EXT_DOTPROD)
|
||||
#define ARM_ARCH_V8_6A ARM_FEATURE (ARM_AEXT_V8A, ARM_AEXT2_V8_6A, \
|
||||
CRC_EXT_ARMV8 | FPU_NEON_EXT_RDMA \
|
||||
| FPU_NEON_EXT_DOTPROD)
|
||||
#define ARM_ARCH_V8M_BASE ARM_FEATURE_CORE (ARM_AEXT_V8M_BASE, \
|
||||
ARM_AEXT2_V8M_BASE)
|
||||
#define ARM_ARCH_V8M_MAIN ARM_FEATURE_CORE (ARM_AEXT_V8M_MAIN, \
|
||||
|
|
|
@ -1,3 +1,15 @@
|
|||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
|
||||
|
||||
* arm-dis.c (select_arm_features): Update bfd_march_arm_8 with
|
||||
Armv8.6-A.
|
||||
(coprocessor_opcodes): Add bfloat16 vcvt{t,b}.
|
||||
(neon_opcodes): Add bfloat SIMD instructions.
|
||||
(print_insn_coprocessor): Add new control character %b to print
|
||||
condition code without checking cp_num.
|
||||
(print_insn_neon): Account for BFloat16 instructions that have no
|
||||
special top-byte handling.
|
||||
|
||||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
|
||||
|
||||
|
|
|
@ -396,6 +396,7 @@ struct opcode16
|
|||
%% %
|
||||
|
||||
%c print condition code (always bits 28-31 in ARM mode)
|
||||
%b print condition code allowing cp_num == 9
|
||||
%q print shifter argument
|
||||
%u print condition code (unconditional in ARM mode,
|
||||
UNPREDICTABLE if not AL in Thumb)
|
||||
|
@ -1207,11 +1208,15 @@ static const struct sopcode32 coprocessor_opcodes[] =
|
|||
{ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
|
||||
0xfea00800, 0xffa00f10, "vcmla%c.f32\t%12-15,22V, %16-19,7V, %0-3,5D[0], #%20?21%20?780"},
|
||||
|
||||
/* BFloat16 instructions. */
|
||||
{ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0x0eb30940, 0x0fbf0f50, "vcvt%7?tb%b.bf16.f32\t%y1, %y0"},
|
||||
|
||||
/* Dot Product instructions in the space of coprocessor 13. */
|
||||
{ANY, ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD),
|
||||
0xfc200d00, 0xffb00f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3,5V"},
|
||||
{ANY, ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD),
|
||||
0xfe000d00, 0xff000f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3D[%5?10]"},
|
||||
0xfe200d00, 0xff200f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3D[%5?10]"},
|
||||
|
||||
/* ARMv8.2 FMAC Long instructions in the space of coprocessor 8. */
|
||||
{ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST | ARM_EXT2_V8_2A),
|
||||
|
@ -1452,6 +1457,20 @@ static const struct opcode32 neon_opcodes[] =
|
|||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
|
||||
0xf2300c10, 0xffb00f10, "vfms%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
|
||||
|
||||
/* BFloat16 instructions. */
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xfc000d00, 0xffb00f10, "vdot.bf16\t%12-15,22R, %16-19,7R, %0-3,5R"},
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xfe000d00, 0xffb00f10, "vdot.bf16\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"},
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xfc000c40, 0xffb00f50, "vmmla.bf16\t%12-15,22R, %16-19,7R, %0-3,5R"},
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xf3b60640, 0xffbf0fd0, "vcvt%c.bf16.f32\t%12-15,22D, %0-3,5Q"},
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xfc300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-3,5Q"},
|
||||
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
|
||||
0xfe300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-2D[%3,5d]"},
|
||||
|
||||
/* Two registers, miscellaneous. */
|
||||
{ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
|
||||
0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"},
|
||||
|
@ -8159,6 +8178,8 @@ print_insn_coprocessor_1 (const struct sopcode32 *opcodes,
|
|||
if (cond != COND_UNCOND && cp_num == 9)
|
||||
is_unpredictable = TRUE;
|
||||
|
||||
/* Fall through. */
|
||||
case 'b':
|
||||
func (stream, "%s", arm_conditional[cond]);
|
||||
break;
|
||||
|
||||
|
@ -8772,6 +8793,10 @@ print_insn_neon (struct disassemble_info *info, long given, bfd_boolean thumb)
|
|||
}
|
||||
else if ((given & 0xff000000) == 0xf9000000)
|
||||
given ^= 0xf9000000 ^ 0xf4000000;
|
||||
/* BFloat16 neon instructions without special top byte handling. */
|
||||
else if ((given & 0xff000000) == 0xfe000000
|
||||
|| (given & 0xff000000) == 0xfc000000)
|
||||
;
|
||||
/* vdup is also a valid neon instruction. */
|
||||
else if ((given & 0xff910f5f) != 0xee800b10)
|
||||
return FALSE;
|
||||
|
@ -11625,11 +11650,11 @@ select_arm_features (unsigned long mach,
|
|||
case bfd_mach_arm_7EM: ARM_SET_FEATURES (ARM_ARCH_V7EM); break;
|
||||
case bfd_mach_arm_8:
|
||||
{
|
||||
/* Add bits for extensions that Armv8.5-A recognizes. */
|
||||
arm_feature_set armv8_5_ext_fset
|
||||
/* Add bits for extensions that Armv8.6-A recognizes. */
|
||||
arm_feature_set armv8_6_ext_fset
|
||||
= ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST);
|
||||
ARM_SET_FEATURES (ARM_ARCH_V8_5A);
|
||||
ARM_MERGE_FEATURE_SETS (arch_fset, arch_fset, armv8_5_ext_fset);
|
||||
ARM_SET_FEATURES (ARM_ARCH_V8_6A);
|
||||
ARM_MERGE_FEATURE_SETS (arch_fset, arch_fset, armv8_6_ext_fset);
|
||||
break;
|
||||
}
|
||||
case bfd_mach_arm_8R: ARM_SET_FEATURES (ARM_ARCH_V8R); break;
|
||||
|
|
Loading…
Reference in New Issue