target-arm queue:

* convert aarch32 VFP decoder to decodetree
    (includes tightening up decode in a few places)
  * fix minor bugs in VFP short-vector handling
  * hw/core/bus.c: Only the main system bus can have no parent
  * smmuv3: Fix decoding of ID register range
  * Implement NSACR gating of floating point
  * Use tcg_gen_gvec_bitsel
 -----BEGIN PGP SIGNATURE-----
 
 iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAl0CWosZHHBldGVyLm1h
 eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3rJlD/4jOccv8t27ixQXRdAkc3KF
 E4mI28aSqzSpIZBQBeg06AuWK1kuO6e0TiivQ7xylDDc5EYsGVoqQgU/vFRzWrKJ
 dQ8TxerNd23bAmIYmFh2PEebmlWpXT++sppJGhA5mm5vyf3wKtl2Amo30ZItRCG7
 HhrKtM8pJJjI02dmVhgx8nlSs8U8vTDTwSD5pKCTl9uEd+PZ+dfENTkJwuyau37P
 UrV3rfBUhtQIyNTuqS0G/uqD1u966oe0AdX59MzOnitt9nbtKpPVuIcV6/qYwakL
 eGRvYpT6Ip/6pD34wKxw3PRqmIPZkvVl7TImtpBn9doH0eWutojI5j9wDMU/F6tT
 Z2xQ0x8tNPT0zGA+YjQrHFzGV6uA9isL2Mpa4Prm7MtlfFF10ibyRlwNzxNJq5F7
 OkGvPPPMWdZDPNKVonnu6R2+iLaIWCATE1OFL8/pw1DZN4qwYvW019Y43D36Xqb1
 fyyDFan50osu/57dkngQbvbWuvf/O6usoIL432U7mBJY/115bXXzBfD0KCm3nAnU
 hmGtxUX8dMXdm1OCdUS8QRZCupDByreLPut5ICA3VlmJBw2cEUMHfutlX8IIvLpH
 PQOquVyGpTmAT4ghWcNtT/xs2sfsg8n2r+riOFEyl56AUyz1LcQPcv6qbP58cfaw
 x2scWxih6EeL2+48hPt7QA==
 =rXhV
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20190613-1' into staging

target-arm queue:
 * convert aarch32 VFP decoder to decodetree
   (includes tightening up decode in a few places)
 * fix minor bugs in VFP short-vector handling
 * hw/core/bus.c: Only the main system bus can have no parent
 * smmuv3: Fix decoding of ID register range
 * Implement NSACR gating of floating point
 * Use tcg_gen_gvec_bitsel

# gpg: Signature made Thu 13 Jun 2019 15:15:39 BST
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20190613-1: (47 commits)
  target/arm: Fix short-vector increment behaviour
  target/arm: Convert float-to-integer VCVT insns to decodetree
  target/arm: Convert VCVT fp/fixed-point conversion insns to decodetree
  target/arm: Convert VJCVT to decodetree
  target/arm: Convert integer-to-float insns to decodetree
  target/arm: Convert double-single precision conversion insns to decodetree
  target/arm: Convert VFP round insns to decodetree
  target/arm: Convert the VCVT-to-f16 insns to decodetree
  target/arm: Convert the VCVT-from-f16 insns to decodetree
  target/arm: Convert VFP comparison insns to decodetree
  target/arm: Convert VMOV (register) to decodetree
  target/arm: Convert VSQRT to decodetree
  target/arm: Convert VNEG to decodetree
  target/arm: Convert VABS to decodetree
  target/arm: Convert VMOV (imm) to decodetree
  target/arm: Convert VFP fused multiply-add insns to decodetree
  target/arm: Convert VDIV to decodetree
  target/arm: Convert VSUB to decodetree
  target/arm: Convert VADD to decodetree
  target/arm: Convert VNMUL to decodetree
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2019-06-13 15:16:39 +01:00
commit 650a379d50
17 changed files with 3203 additions and 1572 deletions

View File

@ -1232,7 +1232,7 @@ static MemTxResult smmu_readl(SMMUv3State *s, hwaddr offset,
uint64_t *data, MemTxAttrs attrs)
{
switch (offset) {
case A_IDREGS ... A_IDREGS + 0x1f:
case A_IDREGS ... A_IDREGS + 0x2f:
*data = smmuv3_idreg(offset - A_IDREGS);
return MEMTX_OK;
case A_IDR0 ... A_IDR5:

View File

@ -97,10 +97,9 @@ static void qbus_realize(BusState *bus, DeviceState *parent, const char *name)
bus->parent->num_child_bus++;
object_property_add_child(OBJECT(bus->parent), bus->name, OBJECT(bus), NULL);
object_unref(OBJECT(bus));
} else if (bus != sysbus_get_default()) {
/* TODO: once all bus devices are qdevified,
only reset handler for main_system_bus should be registered here. */
qemu_register_reset(qbus_reset_all_fn, bus);
} else {
/* The only bus without a parent is the main system bus */
assert(bus == sysbus_get_default());
}
}
@ -109,18 +108,16 @@ static void bus_unparent(Object *obj)
BusState *bus = BUS(obj);
BusChild *kid;
/* Only the main system bus has no parent, and that bus is never freed */
assert(bus->parent);
while ((kid = QTAILQ_FIRST(&bus->children)) != NULL) {
DeviceState *dev = kid->child;
object_unparent(OBJECT(dev));
}
if (bus->parent) {
QLIST_REMOVE(bus, sibling);
bus->parent->num_child_bus--;
bus->parent = NULL;
} else {
assert(bus != sysbus_get_default()); /* main_system_bus is never freed */
qemu_unregister_reset(qbus_reset_all_fn, bus);
}
QLIST_REMOVE(bus, sibling);
bus->parent->num_child_bus--;
bus->parent = NULL;
}
void qbus_create_inplace(void *bus, size_t size, const char *typename,

View File

@ -184,7 +184,7 @@ class Field:
return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
def __eq__(self, other):
return self.sign == other.sign and self.sign == other.sign
return self.sign == other.sign and self.mask == other.mask
def __ne__(self, other):
return not self.__eq__(other)

View File

@ -19,5 +19,18 @@ target/arm/decode-sve.inc.c: $(SRC_PATH)/target/arm/sve.decode $(DECODETREE)
$(PYTHON) $(DECODETREE) --decode disas_sve -o $@ $<,\
"GEN", $(TARGET_DIR)$@)
target/arm/decode-vfp.inc.c: $(SRC_PATH)/target/arm/vfp.decode $(DECODETREE)
$(call quiet-command,\
$(PYTHON) $(DECODETREE) --static-decode disas_vfp -o $@ $<,\
"GEN", $(TARGET_DIR)$@)
target/arm/decode-vfp-uncond.inc.c: $(SRC_PATH)/target/arm/vfp-uncond.decode $(DECODETREE)
$(call quiet-command,\
$(PYTHON) $(DECODETREE) --static-decode disas_vfp_uncond -o $@ $<,\
"GEN", $(TARGET_DIR)$@)
target/arm/translate-sve.o: target/arm/decode-sve.inc.c
target/arm/translate.o: target/arm/decode-vfp.inc.c
target/arm/translate.o: target/arm/decode-vfp-uncond.inc.c
obj-$(TARGET_AARCH64) += translate-sve.o sve_helper.o

View File

@ -1609,6 +1609,8 @@ static void cortex_r5f_initfn(Object *obj)
cortex_r5_initfn(obj);
set_feature(&cpu->env, ARM_FEATURE_VFP3);
cpu->isar.mvfr0 = 0x10110221;
cpu->isar.mvfr1 = 0x00000011;
}
static const ARMCPRegInfo cortexa8_cp_reginfo[] = {
@ -2021,6 +2023,10 @@ static void arm_max_initfn(Object *obj)
kvm_arm_set_cpu_features_from_host(cpu);
} else {
cortex_a15_initfn(obj);
/* old-style VFP short-vector support */
cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
#ifdef CONFIG_USER_ONLY
/* We don't set these in system emulation mode for the moment,
* since we don't correctly set (all of) the ID registers to

View File

@ -3371,6 +3371,17 @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;
}
static inline bool isar_feature_aa32_fp_d32(const ARMISARegisters *id)
{
/* Return true if D16-D31 are implemented */
return FIELD_EX64(id->mvfr0, MVFR0, SIMDREG) >= 2;
}
static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id)
{
return FIELD_EX64(id->mvfr0, MVFR0, FPSHVEC) > 0;
}
/*
* We always set the FP and SIMD FP16 fields to indicate identical
* levels of support (assuming SIMD is implemented at all), so

View File

@ -930,9 +930,36 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
}
value &= mask;
}
/*
* For A-profile AArch32 EL3 (but not M-profile secure mode), if NSACR.CP10
* is 0 then CPACR.{CP11,CP10} ignore writes and read as 0b00.
*/
if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
!arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
value &= ~(0xf << 20);
value |= env->cp15.cpacr_el1 & (0xf << 20);
}
env->cp15.cpacr_el1 = value;
}
static uint64_t cpacr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
/*
* For A-profile AArch32 EL3 (but not M-profile secure mode), if NSACR.CP10
* is 0 then CPACR.{CP11,CP10} ignore writes and read as 0b00.
*/
uint64_t value = env->cp15.cpacr_el1;
if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
!arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
value &= ~(0xf << 20);
}
return value;
}
static void cpacr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
{
/* Call cpacr_write() so that we reset with the correct RAO bits set
@ -998,7 +1025,7 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
{ .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
.crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access,
.access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1),
.resetfn = cpacr_reset, .writefn = cpacr_write },
.resetfn = cpacr_reset, .writefn = cpacr_write, .readfn = cpacr_read },
REGINFO_SENTINEL
};
@ -4683,6 +4710,36 @@ uint64_t arm_hcr_el2_eff(CPUARMState *env)
return ret;
}
static void cptr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
/*
* For A-profile AArch32 EL3, if NSACR.CP10
* is 0 then HCPTR.{TCP11,TCP10} ignore writes and read as 1.
*/
if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
!arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
value &= ~(0x3 << 10);
value |= env->cp15.cptr_el[2] & (0x3 << 10);
}
env->cp15.cptr_el[2] = value;
}
static uint64_t cptr_el2_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
/*
* For A-profile AArch32 EL3, if NSACR.CP10
* is 0 then HCPTR.{TCP11,TCP10} ignore writes and read as 1.
*/
uint64_t value = env->cp15.cptr_el[2];
if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
!arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
value |= 0x3 << 10;
}
return value;
}
static const ARMCPRegInfo el2_cp_reginfo[] = {
{ .name = "HCR_EL2", .state = ARM_CP_STATE_AA64,
.type = ARM_CP_IO,
@ -4730,7 +4787,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
{ .name = "CPTR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 2,
.access = PL2_RW, .accessfn = cptr_access, .resetvalue = 0,
.fieldoffset = offsetof(CPUARMState, cp15.cptr_el[2]) },
.fieldoffset = offsetof(CPUARMState, cp15.cptr_el[2]),
.readfn = cptr_el2_read, .writefn = cptr_el2_write },
{ .name = "MAIR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 0,
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[2]),
@ -13587,6 +13645,19 @@ int fp_exception_el(CPUARMState *env, int cur_el)
break;
}
/*
* The NSACR allows A-profile AArch32 EL3 and M-profile secure mode
* to control non-secure access to the FPU. It doesn't have any
* effect if EL3 is AArch64 or if EL3 doesn't exist at all.
*/
if ((arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
cur_el <= 2 && !arm_is_secure_below_el3(env))) {
if (!extract32(env->cp15.nsacr, 10, 1)) {
/* FP insns act as UNDEF */
return cur_el == 2 ? 2 : 1;
}
}
/* For the CPTR registers we don't need to guard with an ARM_FEATURE
* check because zero bits in the registers mean "don't trap".
*/

View File

@ -344,9 +344,9 @@ static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier,
if (unlikely(extract64(test, bot_bit, top_bit - bot_bit))) {
int error_code = (keynumber << 1) | (keynumber ^ 1);
if (param.tbi) {
return deposit64(ptr, 53, 2, error_code);
return deposit64(orig_ptr, 53, 2, error_code);
} else {
return deposit64(ptr, 61, 2, error_code);
return deposit64(orig_ptr, 61, 2, error_code);
}
}
return orig_ptr;

View File

@ -704,6 +704,15 @@ static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
}
/* Expand a 4-operand AdvSIMD vector operation using an expander function. */
static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
int rx, GVecGen4Fn *gvec_fn, int vece)
{
gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
is_q ? 16 : 8, vec_full_reg_size(s));
}
/* Expand a 2-operand + immediate AdvSIMD vector operation using
* an op descriptor.
*/
@ -10918,13 +10927,13 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
return;
case 5: /* BSL bitwise select */
gen_gvec_op3(s, is_q, rd, rn, rm, &bsl_op);
gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
return;
case 6: /* BIT, bitwise insert if true */
gen_gvec_op3(s, is_q, rd, rn, rm, &bit_op);
gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
return;
case 7: /* BIF, bitwise insert if false */
gen_gvec_op3(s, is_q, rd, rn, rm, &bif_op);
gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
return;
default:

View File

@ -122,5 +122,7 @@ typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
uint32_t, uint32_t);
typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t);
typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t);
#endif /* TARGET_ARM_TRANSLATE_A64_H */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -238,9 +238,6 @@ static inline void gen_ss_advance(DisasContext *s)
}
/* Vector operations shared between ARM and AArch64. */
extern const GVecGen3 bsl_op;
extern const GVecGen3 bit_op;
extern const GVecGen3 bif_op;
extern const GVecGen3 mla_op[4];
extern const GVecGen3 mls_op[4];
extern const GVecGen3 cmtst_op[4];

View File

@ -0,0 +1,63 @@
# AArch32 VFP instruction descriptions (unconditional insns)
#
# Copyright (c) 2019 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# Encodings for the unconditional VFP instructions are here:
# generally anything matching A32
# 1111 1110 .... .... .... 101. ...0 ....
# and T32
# 1111 110. .... .... .... 101. .... ....
# 1111 1110 .... .... .... 101. .... ....
# (but those patterns might also cover some Neon instructions,
# which do not live in this file.)
# VFP registers have an odd encoding with a four-bit field
# and a one-bit field which are assembled in different orders
# depending on whether the register is double or single precision.
# Each individual instruction function must do the checks for
# "double register selected but CPU does not have double support"
# and "double register number has bit 4 set but CPU does not
# support D16-D31" (which should UNDEF).
%vm_dp 5:1 0:4
%vm_sp 0:4 5:1
%vn_dp 7:1 16:4
%vn_sp 16:4 7:1
%vd_dp 22:1 12:4
%vd_sp 12:4 22:1
VSEL 1111 1110 0. cc:2 .... .... 1010 .0.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp dp=0
VSEL 1111 1110 0. cc:2 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1
VMINMAXNM 1111 1110 1.00 .... .... 1010 . op:1 .0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp dp=0
VMINMAXNM 1111 1110 1.00 .... .... 1011 . op:1 .0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1
VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
vm=%vm_sp vd=%vd_sp dp=0
VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
vm=%vm_dp vd=%vd_dp dp=1
# VCVT float to int with specified rounding mode; Vd is always single-precision
VCVT 1111 1110 1.11 11 rm:2 .... 1010 op:1 1.0 .... \
vm=%vm_sp vd=%vd_sp dp=0
VCVT 1111 1110 1.11 11 rm:2 .... 1011 op:1 1.0 .... \
vm=%vm_dp vd=%vd_sp dp=1

242
target/arm/vfp.decode Normal file
View File

@ -0,0 +1,242 @@
# AArch32 VFP instruction descriptions (conditional insns)
#
# Copyright (c) 2019 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# Encodings for the conditional VFP instructions are here:
# generally anything matching A32
# cccc 11.. .... .... .... 101. .... ....
# and T32
# 1110 110. .... .... .... 101. .... ....
# 1110 1110 .... .... .... 101. .... ....
# (but those patterns might also cover some Neon instructions,
# which do not live in this file.)
# VFP registers have an odd encoding with a four-bit field
# and a one-bit field which are assembled in different orders
# depending on whether the register is double or single precision.
# Each individual instruction function must do the checks for
# "double register selected but CPU does not have double support"
# and "double register number has bit 4 set but CPU does not
# support D16-D31" (which should UNDEF).
%vm_dp 5:1 0:4
%vm_sp 0:4 5:1
%vn_dp 7:1 16:4
%vn_sp 16:4 7:1
%vd_dp 22:1 12:4
%vd_sp 12:4 22:1
%vmov_idx_b 21:1 5:2
%vmov_idx_h 21:1 6:1
# VMOV scalar to general-purpose register; note that this does
# include some Neon cases.
VMOV_to_gp ---- 1110 u:1 1. 1 .... rt:4 1011 ... 1 0000 \
vn=%vn_dp size=0 index=%vmov_idx_b
VMOV_to_gp ---- 1110 u:1 0. 1 .... rt:4 1011 ..1 1 0000 \
vn=%vn_dp size=1 index=%vmov_idx_h
VMOV_to_gp ---- 1110 0 0 index:1 1 .... rt:4 1011 .00 1 0000 \
vn=%vn_dp size=2 u=0
VMOV_from_gp ---- 1110 0 1. 0 .... rt:4 1011 ... 1 0000 \
vn=%vn_dp size=0 index=%vmov_idx_b
VMOV_from_gp ---- 1110 0 0. 0 .... rt:4 1011 ..1 1 0000 \
vn=%vn_dp size=1 index=%vmov_idx_h
VMOV_from_gp ---- 1110 0 0 index:1 0 .... rt:4 1011 .00 1 0000 \
vn=%vn_dp size=2
VDUP ---- 1110 1 b:1 q:1 0 .... rt:4 1011 . 0 e:1 1 0000 \
vn=%vn_dp
VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
VMOV_single ---- 1110 000 l:1 .... rt:4 1010 . 001 0000 \
vn=%vn_sp
VMOV_64_sp ---- 1100 010 op:1 rt2:4 rt:4 1010 00.1 .... \
vm=%vm_sp
VMOV_64_dp ---- 1100 010 op:1 rt2:4 rt:4 1011 00.1 .... \
vm=%vm_dp
# Note that the half-precision variants of VLDR and VSTR are
# not part of this decodetree at all because they have bits [9:8] == 0b01
VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8 \
vd=%vd_sp
VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8 \
vd=%vd_dp
# We split the load/store multiple up into two patterns to avoid
# overlap with other insns in the "Advanced SIMD load/store and 64-bit move"
# grouping:
# P=0 U=0 W=0 is 64-bit VMOV
# P=1 W=0 is VLDR/VSTR
# P=U W=1 is UNDEF
# leaving P=0 U=1 W=x and P=1 U=0 W=1 for load/store multiple.
# These include FSTM/FLDM.
VLDM_VSTM_sp ---- 1100 1 . w:1 l:1 rn:4 .... 1010 imm:8 \
vd=%vd_sp p=0 u=1
VLDM_VSTM_dp ---- 1100 1 . w:1 l:1 rn:4 .... 1011 imm:8 \
vd=%vd_dp p=0 u=1
VLDM_VSTM_sp ---- 1101 0.1 l:1 rn:4 .... 1010 imm:8 \
vd=%vd_sp p=1 u=0 w=1
VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
vd=%vd_dp p=1 u=0 w=1
# 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VFM_sp ---- 1110 1.01 .... .... 1010 . o2:1 . 0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp o1=1
VFM_dp ---- 1110 1.01 .... .... 1011 . o2:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=1
VFM_sp ---- 1110 1.10 .... .... 1010 . o2:1 . 0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp o1=2
VFM_dp ---- 1110 1.10 .... .... 1011 . o2:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=2
VMOV_imm_sp ---- 1110 1.11 imm4h:4 .... 1010 0000 imm4l:4 \
vd=%vd_sp
VMOV_imm_dp ---- 1110 1.11 imm4h:4 .... 1011 0000 imm4l:4 \
vd=%vd_dp
VMOV_reg_sp ---- 1110 1.11 0000 .... 1010 01.0 .... \
vd=%vd_sp vm=%vm_sp
VMOV_reg_dp ---- 1110 1.11 0000 .... 1011 01.0 .... \
vd=%vd_dp vm=%vm_dp
VABS_sp ---- 1110 1.11 0000 .... 1010 11.0 .... \
vd=%vd_sp vm=%vm_sp
VABS_dp ---- 1110 1.11 0000 .... 1011 11.0 .... \
vd=%vd_dp vm=%vm_dp
VNEG_sp ---- 1110 1.11 0001 .... 1010 01.0 .... \
vd=%vd_sp vm=%vm_sp
VNEG_dp ---- 1110 1.11 0001 .... 1011 01.0 .... \
vd=%vd_dp vm=%vm_dp
VSQRT_sp ---- 1110 1.11 0001 .... 1010 11.0 .... \
vd=%vd_sp vm=%vm_sp
VSQRT_dp ---- 1110 1.11 0001 .... 1011 11.0 .... \
vd=%vd_dp vm=%vm_dp
VCMP_sp ---- 1110 1.11 010 z:1 .... 1010 e:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCMP_dp ---- 1110 1.11 010 z:1 .... 1011 e:1 1.0 .... \
vd=%vd_dp vm=%vm_dp
# VCVTT and VCVTB from f16: Vd format depends on size bit; Vm is always vm_sp
VCVT_f32_f16 ---- 1110 1.11 0010 .... 1010 t:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \
vd=%vd_dp vm=%vm_sp
# VCVTB and VCVTT to f16: Vd format is always vd_sp; Vm format depends on size bit
VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
vd=%vd_sp vm=%vm_dp
VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... \
vd=%vd_sp vm=%vm_sp
VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... \
vd=%vd_dp vm=%vm_dp
VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... \
vd=%vd_sp vm=%vm_sp
VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... \
vd=%vd_dp vm=%vm_dp
VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... \
vd=%vd_sp vm=%vm_sp
VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... \
vd=%vd_dp vm=%vm_dp
# VCVT between single and double: Vm precision depends on size; Vd is its reverse
VCVT_sp ---- 1110 1.11 0111 .... 1010 11.0 .... \
vd=%vd_dp vm=%vm_sp
VCVT_dp ---- 1110 1.11 0111 .... 1011 11.0 .... \
vd=%vd_sp vm=%vm_dp
# VCVT from integer to floating point: Vm always single; Vd depends on size
VCVT_int_sp ---- 1110 1.11 1000 .... 1010 s:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_int_dp ---- 1110 1.11 1000 .... 1011 s:1 1.0 .... \
vd=%vd_dp vm=%vm_sp
# VJCVT is always dp to sp
VJCVT ---- 1110 1.11 1001 .... 1011 11.0 .... \
vd=%vd_sp vm=%vm_dp
# VCVT between floating-point and fixed-point. The immediate value
# is in the same format as a Vm single-precision register number.
# We assemble bits 18 (op), 16 (u) and 7 (sx) into a single opc field
# for the convenience of the trans_VCVT_fix functions.
%vcvt_fix_op 18:1 16:1 7:1
VCVT_fix_sp ---- 1110 1.11 1.1. .... 1010 .1.0 .... \
vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op
VCVT_fix_dp ---- 1110 1.11 1.1. .... 1011 .1.0 .... \
vd=%vd_dp imm=%vm_sp opc=%vcvt_fix_op
# VCVT float to integer (VCVT and VCVTR): Vd always single; Vd depends on size
VCVT_sp_int ---- 1110 1.11 110 s:1 .... 1010 rz:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_dp_int ---- 1110 1.11 110 s:1 .... 1011 rz:1 1.0 .... \
vd=%vd_sp vm=%vm_dp

View File

@ -15,7 +15,7 @@ run-fcvt: fcvt
$(call run-test,$<,$(QEMU) $<, "$< on $(TARGET_NAME)")
$(call diff-out,$<,$(AARCH64_SRC)/fcvt.ref)
AARCH64_TESTS += pauth-1
AARCH64_TESTS += pauth-1 pauth-2
run-pauth-%: QEMU += -cpu max
TESTS:=$(AARCH64_TESTS)

View File

@ -0,0 +1,61 @@
#include <stdint.h>
#include <assert.h>
asm(".arch armv8.4-a");
void do_test(uint64_t value)
{
uint64_t salt1, salt2;
uint64_t encode, decode;
/*
* With TBI enabled and a 48-bit VA, there are 7 bits of auth,
* and so a 1/128 chance of encode = pac(value,key,salt) producing
* an auth for which leaves value unchanged.
* Iterate until we find a salt for which encode != value.
*/
for (salt1 = 1; ; salt1++) {
asm volatile("pacda %0, %2" : "=r"(encode) : "0"(value), "r"(salt1));
if (encode != value) {
break;
}
}
/* A valid salt must produce a valid authorization. */
asm volatile("autda %0, %2" : "=r"(decode) : "0"(encode), "r"(salt1));
assert(decode == value);
/*
* An invalid salt usually fails authorization, but again there
* is a chance of choosing another salt that works.
* Iterate until we find another salt which does fail.
*/
for (salt2 = salt1 + 1; ; salt2++) {
asm volatile("autda %0, %2" : "=r"(decode) : "0"(encode), "r"(salt2));
if (decode != value) {
break;
}
}
/* The VA bits, bit 55, and the TBI bits, should be unchanged. */
assert(((decode ^ value) & 0xff80ffffffffffffull) == 0);
/*
* Bits [54:53] are an error indicator based on the key used;
* the DA key above is keynumber 0, so error == 0b01. Otherwise
* bit 55 of the original is sign-extended into the rest of the auth.
*/
if ((value >> 55) & 1) {
assert(((decode >> 48) & 0xff) == 0b10111111);
} else {
assert(((decode >> 48) & 0xff) == 0b00100000);
}
}
int main()
{
do_test(0);
do_test(-1);
do_test(0xda004acedeadbeefull);
return 0;
}