[gas][arm] Enable VLDM, VSTM, VPUSH, VPOP for MVE
This patch enables a few instructions for Armv8.1-M MVE. Currently VLDM, VSTM, VSTR, VLDR, VPUSH and VPOP are enabled only when the Armv8-M Floating-point Extension is enabled. According to the ARMv8.1-M ARM, section A.1.4.2[1], they can be enabled by having "Armv8-M Floating-point Extension and/or Armv8.1-M MVE". [1]https://developer.arm.com/docs/ddi0553/bh/armv81-m-architecture-reference-manual 2019-11-12 Mihail Ionescu <mihail.ionescu@arm.com> * config/tc-arm.c (do_vfp_nsyn_push): Move in order to enable it for both fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vstm instruction for mve_ext. (do_vfp_nsyn_pop): Move in order to enable it for both fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vldm instruction for mve_ext. (do_neon_ldm_stm): Add fpu_vfp_ext_v1 and mve_ext checks. (insns): Enable vldm, vldmia, vldmdb, vstm, vstmia, vstmdb, vpop, vpush, and fldd, fstd, flds, fsts for arm_ext_v6t2 instead of fpu_vfp_ext_v1xd. * testsuite/gas/arm/v8_1m-mve.s: New. * testsuite/gas/arm/v8_1m-mve.d: New.
This commit is contained in:
parent
ce760a7620
commit
ef8f595f73
@ -1,3 +1,18 @@
|
||||
2019-11-12 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
|
||||
* config/tc-arm.c (do_vfp_nsyn_push): Move in order to enable it for
|
||||
both fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vstm
|
||||
instruction for mve_ext.
|
||||
(do_vfp_nsyn_pop): Move in order to enable it for both
|
||||
fpu_vfp_ext_v1xd and mve_ext and add call to the aliased vldm
|
||||
instruction for mve_ext.
|
||||
(do_neon_ldm_stm): Add fpu_vfp_ext_v1 and mve_ext checks.
|
||||
(insns): Enable vldm, vldmia, vldmdb, vstm, vstmia, vstmdb, vpop,
|
||||
vpush, and fldd, fstd, flds, fsts for arm_ext_v6t2 instead
|
||||
of fpu_vfp_ext_v1xd.
|
||||
* testsuite/gas/arm/v8_1m-mve.s: New.
|
||||
* testsuite/gas/arm/v8_1m-mve.d: New.
|
||||
|
||||
2019-11-12 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
|
||||
* gas/config/tc-arm.c (do_neon_mvn): Allow mve_ext cmode=0xd.
|
||||
|
@ -16527,36 +16527,6 @@ nsyn_insert_sp (void)
|
||||
inst.operands[0].present = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
do_vfp_nsyn_push (void)
|
||||
{
|
||||
nsyn_insert_sp ();
|
||||
|
||||
constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
|
||||
_("register list must contain at least 1 and at most 16 "
|
||||
"registers"));
|
||||
|
||||
if (inst.operands[1].issingle)
|
||||
do_vfp_nsyn_opcode ("fstmdbs");
|
||||
else
|
||||
do_vfp_nsyn_opcode ("fstmdbd");
|
||||
}
|
||||
|
||||
static void
|
||||
do_vfp_nsyn_pop (void)
|
||||
{
|
||||
nsyn_insert_sp ();
|
||||
|
||||
constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
|
||||
_("register list must contain at least 1 and at most 16 "
|
||||
"registers"));
|
||||
|
||||
if (inst.operands[1].issingle)
|
||||
do_vfp_nsyn_opcode ("fldmias");
|
||||
else
|
||||
do_vfp_nsyn_opcode ("fldmiad");
|
||||
}
|
||||
|
||||
/* Fix up Neon data-processing instructions, ORing in the correct bits for
|
||||
ARM mode or Thumb mode and moving the encoded bit 24 to bit 28. */
|
||||
|
||||
@ -20638,6 +20608,9 @@ do_neon_tbl_tbx (void)
|
||||
static void
|
||||
do_neon_ldm_stm (void)
|
||||
{
|
||||
constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)
|
||||
&& !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
|
||||
_(BAD_FPU));
|
||||
/* P, U and L bits are part of bitmask. */
|
||||
int is_dbmode = (inst.instruction & (1 << 24)) != 0;
|
||||
unsigned offsetbits = inst.operands[1].imm * 2;
|
||||
@ -20665,6 +20638,49 @@ do_neon_ldm_stm (void)
|
||||
do_vfp_cond_or_thumb ();
|
||||
}
|
||||
|
||||
static void
|
||||
do_vfp_nsyn_pop (void)
|
||||
{
|
||||
nsyn_insert_sp ();
|
||||
if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) {
|
||||
return do_vfp_nsyn_opcode ("vldm");
|
||||
}
|
||||
|
||||
constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
|
||||
_(BAD_FPU));
|
||||
|
||||
constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
|
||||
_("register list must contain at least 1 and at most 16 "
|
||||
"registers"));
|
||||
|
||||
if (inst.operands[1].issingle)
|
||||
do_vfp_nsyn_opcode ("fldmias");
|
||||
else
|
||||
do_vfp_nsyn_opcode ("fldmiad");
|
||||
}
|
||||
|
||||
static void
|
||||
do_vfp_nsyn_push (void)
|
||||
{
|
||||
nsyn_insert_sp ();
|
||||
if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) {
|
||||
return do_vfp_nsyn_opcode ("vstmdb");
|
||||
}
|
||||
|
||||
constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
|
||||
_(BAD_FPU));
|
||||
|
||||
constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
|
||||
_("register list must contain at least 1 and at most 16 "
|
||||
"registers"));
|
||||
|
||||
if (inst.operands[1].issingle)
|
||||
do_vfp_nsyn_opcode ("fstmdbs");
|
||||
else
|
||||
do_vfp_nsyn_opcode ("fstmdbd");
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
do_neon_ldr_str (void)
|
||||
{
|
||||
@ -20745,7 +20761,8 @@ do_vldr_vstr (void)
|
||||
/* VLDR/VSTR. */
|
||||
else
|
||||
{
|
||||
if (!mark_feature_used (&fpu_vfp_ext_v1xd))
|
||||
if (!mark_feature_used (&fpu_vfp_ext_v1xd)
|
||||
&& !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
|
||||
as_bad (_("Instruction not permitted on this architecture"));
|
||||
do_neon_ldr_str ();
|
||||
}
|
||||
@ -24966,6 +24983,10 @@ static const struct asm_opcode insns[] =
|
||||
#define THUMB_VARIANT & arm_ext_v6t2
|
||||
mcCE(vmrs, ef00a10, 2, (APSR_RR, RVC), vmrs),
|
||||
mcCE(vmsr, ee00a10, 2, (RVC, RR), vmsr),
|
||||
mcCE(fldd, d100b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst),
|
||||
mcCE(fstd, d000b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst),
|
||||
mcCE(flds, d100a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst),
|
||||
mcCE(fsts, d000a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst),
|
||||
#undef THUMB_VARIANT
|
||||
|
||||
/* Moves and type conversions. */
|
||||
@ -24980,8 +25001,6 @@ static const struct asm_opcode insns[] =
|
||||
cCE("fmxr", ee00a10, 2, (RVC, RR), rn_rd),
|
||||
|
||||
/* Memory operations. */
|
||||
cCE("flds", d100a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst),
|
||||
cCE("fsts", d000a00, 2, (RVS, ADDRGLDC), vfp_sp_ldst),
|
||||
cCE("fldmias", c900a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia),
|
||||
cCE("fldmfds", c900a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmia),
|
||||
cCE("fldmdbs", d300a00, 2, (RRnpctw, VRSLST), vfp_sp_ldstmdb),
|
||||
@ -25023,8 +25042,6 @@ static const struct asm_opcode insns[] =
|
||||
|
||||
/* Double precision load/store are still present on single precision
|
||||
implementations. */
|
||||
cCE("fldd", d100b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst),
|
||||
cCE("fstd", d000b00, 2, (RVD, ADDRGLDC), vfp_dp_ldst),
|
||||
cCE("fldmiad", c900b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmia),
|
||||
cCE("fldmfdd", c900b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmia),
|
||||
cCE("fldmdbd", d300b00, 2, (RRnpctw, VRDLST), vfp_dp_ldstmdb),
|
||||
@ -25077,6 +25094,19 @@ static const struct asm_opcode insns[] =
|
||||
Individual encoder functions perform additional architecture checks. */
|
||||
#undef ARM_VARIANT
|
||||
#define ARM_VARIANT & fpu_vfp_ext_v1xd
|
||||
#undef THUMB_VARIANT
|
||||
#define THUMB_VARIANT & arm_ext_v6t2
|
||||
|
||||
NCE(vldm, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
NCE(vldmia, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
NCE(vldmdb, d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
NCE(vstm, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
NCE(vstmia, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
NCE(vstmdb, d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
|
||||
NCE(vpop, 0, 1, (VRSDLST), vfp_nsyn_pop),
|
||||
NCE(vpush, 0, 1, (VRSDLST), vfp_nsyn_push),
|
||||
|
||||
#undef THUMB_VARIANT
|
||||
#define THUMB_VARIANT & fpu_vfp_ext_v1xd
|
||||
|
||||
@ -25086,20 +25116,11 @@ static const struct asm_opcode insns[] =
|
||||
nCE(vnmul, _vnmul, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
|
||||
nCE(vnmla, _vnmla, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
|
||||
nCE(vnmls, _vnmls, 3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
|
||||
NCE(vpush, 0, 1, (VRSDLST), vfp_nsyn_push),
|
||||
NCE(vpop, 0, 1, (VRSDLST), vfp_nsyn_pop),
|
||||
NCE(vcvtz, 0, 2, (RVSD, RVSD), vfp_nsyn_cvtz),
|
||||
|
||||
/* Mnemonics shared by Neon and VFP. */
|
||||
nCEF(vmls, _vmls, 3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar),
|
||||
|
||||
NCE(vldm, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
NCE(vldmia, c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
NCE(vldmdb, d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
NCE(vstm, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
NCE(vstmia, c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
NCE(vstmdb, d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
|
||||
|
||||
mnCEF(vcvt, _vcvt, 3, (RNSDQMQ, RNSDQMQ, oI32z), neon_cvt),
|
||||
nCEF(vcvtr, _vcvt, 2, (RNSDQ, RNSDQ), neon_cvtr),
|
||||
MNCEF(vcvtb, eb20a40, 3, (RVSDMQ, RVSDMQ, oI32b), neon_cvtb),
|
||||
|
27
gas/testsuite/gas/arm/v8_1m-mve.d
Normal file
27
gas/testsuite/gas/arm/v8_1m-mve.d
Normal file
@ -0,0 +1,27 @@
|
||||
# name: V8.1-m FP register instructions enabled by +mve
|
||||
# as: -march=armv8.1-m.main+mve
|
||||
# objdump: -dr --show-raw-insn -marmv8.1-m.main
|
||||
|
||||
.*: +file format .*arm.*
|
||||
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
00000000 <\.text>:
|
||||
*[0-9a-f]+: ec80 0b08 vstmia r0, {d0-d3}
|
||||
*[0-9a-f]+: ecb7 3b04 vldmia r7!, {d3-d4}
|
||||
*[0-9a-f]+: ecbd 0b06 vpop {d0-d2}
|
||||
*[0-9a-f]+: ed2d 0b06 vpush {d0-d2}
|
||||
*[0-9a-f]+: ecbd 2b08 vpop {d2-d5}
|
||||
*[0-9a-f]+: ed2d 1b0c vpush {d1-d6}
|
||||
*[0-9a-f]+: fe71 0f4d vpst
|
||||
*[0-9a-f]+: fd00 3e01 vstrwt\.32 q1, \[q0, #-4\]
|
||||
*[0-9a-f]+: ed82 2f80 vstr FPSCR, \[r2\]
|
||||
*[0-9a-f]+: ed80 0b00 vstr d0, \[r0\]
|
||||
*[0-9a-f]+: ed90 0b00 vldr d0, \[r0\]
|
||||
*[0-9a-f]+: ed80 0a00 vstr s0, \[r0\]
|
||||
*[0-9a-f]+: ed90 0a00 vldr s0, \[r0\]
|
||||
*[0-9a-f]+: ed81 fb00 vstr d15, \[r1\]
|
||||
*[0-9a-f]+: ed91 fb00 vldr d15, \[r1\]
|
||||
*[0-9a-f]+: edc1 fa00 vstr s31, \[r1\]
|
||||
*[0-9a-f]+: edd1 fa00 vldr s31, \[r1\]
|
24
gas/testsuite/gas/arm/v8_1m-mve.s
Normal file
24
gas/testsuite/gas/arm/v8_1m-mve.s
Normal file
@ -0,0 +1,24 @@
|
||||
.syntax unified
|
||||
|
||||
vstmia r0,{d0-d3}
|
||||
vldmia r7!, {d3-d4}
|
||||
|
||||
vpop {d0-d2}
|
||||
vpush {d0-d2}
|
||||
vpop {d2-d5}
|
||||
vpush {d1-d6}
|
||||
|
||||
vpst
|
||||
vstrwt.u32 q1, [q0, #-4]
|
||||
|
||||
vstr FPSCR, [r2] @ Accepts offset variant without immediate
|
||||
|
||||
vstr d0,[r0]
|
||||
vldr d0,[r0]
|
||||
vstr s0,[r0]
|
||||
vldr s0,[r0]
|
||||
|
||||
vstr d15,[r1]
|
||||
vldr d15,[r1]
|
||||
vstr s31,[r1]
|
||||
vldr s31,[r1]
|
Loading…
Reference in New Issue
Block a user