diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 20c71d7d96..586baa9b64 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -24,6 +24,7 @@ #include "qemu/config-file.h" #include "qemu/option.h" #include "exec/address-spaces.h" +#include "qemu/units.h" /* Kernel boot protocol is specified in the kernel docs * Documentation/arm/Booting and Documentation/arm64/booting.txt @@ -36,6 +37,8 @@ #define ARM64_TEXT_OFFSET_OFFSET 8 #define ARM64_MAGIC_OFFSET 56 +#define BOOTLOADER_MAX_SIZE (4 * KiB) + AddressSpace *arm_boot_address_space(ARMCPU *cpu, const struct arm_boot_info *info) { @@ -184,6 +187,8 @@ static void write_bootloader(const char *name, hwaddr addr, code[i] = tswap32(insn); } + assert((len * sizeof(uint32_t)) < BOOTLOADER_MAX_SIZE); + rom_add_blob_fixed_as(name, code, len * sizeof(uint32_t), addr, as); g_free(code); @@ -919,6 +924,19 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, memcpy(&hdrvals, buffer + ARM64_TEXT_OFFSET_OFFSET, sizeof(hdrvals)); if (hdrvals[1] != 0) { kernel_load_offset = le64_to_cpu(hdrvals[0]); + + /* + * We write our startup "bootloader" at the very bottom of RAM, + * so that bit can't be used for the image. Luckily the Image + * format specification is that the image requests only an offset + * from a 2MB boundary, not an absolute load address. So if the + * image requests an offset that might mean it overlaps with the + * bootloader, we can just load it starting at 2MB+offset rather + * than 0MB + offset. + */ + if (kernel_load_offset < BOOTLOADER_MAX_SIZE) { + kernel_load_offset += 2 * MiB; + } } } diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 0d816fdd2c..0beefb05d4 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -1055,17 +1055,17 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) case 0xd5c: /* MMFR3. */ return cpu->id_mmfr3; case 0xd60: /* ISAR0. */ - return cpu->id_isar0; + return cpu->isar.id_isar0; case 0xd64: /* ISAR1. */ - return cpu->id_isar1; + return cpu->isar.id_isar1; case 0xd68: /* ISAR2. */ - return cpu->id_isar2; + return cpu->isar.id_isar2; case 0xd6c: /* ISAR3. */ - return cpu->id_isar3; + return cpu->isar.id_isar3; case 0xd70: /* ISAR4. */ - return cpu->id_isar4; + return cpu->isar.id_isar4; case 0xd74: /* ISAR5. */ - return cpu->id_isar5; + return cpu->isar.id_isar5; case 0xd78: /* CLIDR */ return cpu->clidr; case 0xd7c: /* CTR */ diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 1795998928..d95cc27f58 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -142,6 +142,7 @@ #define GEM_DESCONF4 (0x0000028C/4) #define GEM_DESCONF5 (0x00000290/4) #define GEM_DESCONF6 (0x00000294/4) +#define GEM_DESCONF6_64B_MASK (1U << 23) #define GEM_DESCONF7 (0x00000298/4) #define GEM_INT_Q1_STATUS (0x00000400 / 4) @@ -1283,6 +1284,7 @@ static void gem_reset(DeviceState *d) int i; CadenceGEMState *s = CADENCE_GEM(d); const uint8_t *a; + uint32_t queues_mask = 0; DB_PRINT("\n"); @@ -1299,7 +1301,12 @@ static void gem_reset(DeviceState *d) s->regs[GEM_DESCONF] = 0x02500111; s->regs[GEM_DESCONF2] = 0x2ab13fff; s->regs[GEM_DESCONF5] = 0x002f2045; - s->regs[GEM_DESCONF6] = 0x00000200; + s->regs[GEM_DESCONF6] = GEM_DESCONF6_64B_MASK; + + if (s->num_priority_queues > 1) { + queues_mask = MAKE_64BIT_MASK(1, s->num_priority_queues - 1); + s->regs[GEM_DESCONF6] |= queues_mask; + } /* Set MAC address */ a = &s->conf.macaddr.a[0]; diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c index 95a143bfba..623d0333e8 100644 --- a/hw/sd/ssi-sd.c +++ b/hw/sd/ssi-sd.c @@ -284,6 +284,8 @@ static void ssi_sd_class_init(ObjectClass *klass, void *data) k->cs_polarity = SSI_CS_LOW; dc->vmsd = &vmstate_ssi_sd; dc->reset = ssi_sd_reset; + /* Reason: init() method uses drive_get_next() */ + dc->user_creatable = false; } static const TypeInfo ssi_sd_info = { diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c index 07fedfc33c..f84a9cf28a 100644 --- a/linux-user/aarch64/signal.c +++ b/linux-user/aarch64/signal.c @@ -314,7 +314,7 @@ static int target_restore_sigframe(CPUARMState *env, break; case TARGET_SVE_MAGIC: - if (arm_feature(env, ARM_FEATURE_SVE)) { + if (cpu_isar_feature(aa64_sve, arm_env_get_cpu(env))) { vq = (env->vfp.zcr_el[1] & 0xf) + 1; sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16); if (!sve && size == sve_size) { @@ -433,7 +433,7 @@ static void target_setup_frame(int usig, struct target_sigaction *ka, &layout); /* SVE state needs saving only if it exists. */ - if (arm_feature(env, ARM_FEATURE_SVE)) { + if (cpu_isar_feature(aa64_sve, arm_env_get_cpu(env))) { vq = (env->vfp.zcr_el[1] & 0xf) + 1; sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16); sve_ofs = alloc_sigframe_space(sve_size, &layout); diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 10bca65b99..055f6a95ab 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -458,6 +458,10 @@ static uint32_t get_elf_hwcap(void) /* probe for the extra features */ #define GET_FEATURE(feat, hwcap) \ do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0) + +#define GET_FEATURE_ID(feat, hwcap) \ + do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0) + /* EDSP is in v5TE and above, but all our v5 CPUs are v5TE */ GET_FEATURE(ARM_FEATURE_V5, ARM_HWCAP_ARM_EDSP); GET_FEATURE(ARM_FEATURE_VFP, ARM_HWCAP_ARM_VFP); @@ -467,8 +471,8 @@ static uint32_t get_elf_hwcap(void) GET_FEATURE(ARM_FEATURE_VFP3, ARM_HWCAP_ARM_VFPv3); GET_FEATURE(ARM_FEATURE_V6K, ARM_HWCAP_ARM_TLS); GET_FEATURE(ARM_FEATURE_VFP4, ARM_HWCAP_ARM_VFPv4); - GET_FEATURE(ARM_FEATURE_ARM_DIV, ARM_HWCAP_ARM_IDIVA); - GET_FEATURE(ARM_FEATURE_THUMB_DIV, ARM_HWCAP_ARM_IDIVT); + GET_FEATURE_ID(arm_div, ARM_HWCAP_ARM_IDIVA); + GET_FEATURE_ID(thumb_div, ARM_HWCAP_ARM_IDIVT); /* All QEMU's VFPv3 CPUs have 32 registers, see VFP_DREG in translate.c. * Note that the ARM_HWCAP_ARM_VFPv3D16 bit is always the inverse of * ARM_HWCAP_ARM_VFPD32 (and so always clear for QEMU); it is unrelated @@ -485,15 +489,16 @@ static uint32_t get_elf_hwcap2(void) ARMCPU *cpu = ARM_CPU(thread_cpu); uint32_t hwcaps = 0; - GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES); - GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL); - GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1); - GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2); - GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32); + GET_FEATURE_ID(aa32_aes, ARM_HWCAP2_ARM_AES); + GET_FEATURE_ID(aa32_pmull, ARM_HWCAP2_ARM_PMULL); + GET_FEATURE_ID(aa32_sha1, ARM_HWCAP2_ARM_SHA1); + GET_FEATURE_ID(aa32_sha2, ARM_HWCAP2_ARM_SHA2); + GET_FEATURE_ID(aa32_crc32, ARM_HWCAP2_ARM_CRC32); return hwcaps; } #undef GET_FEATURE +#undef GET_FEATURE_ID #else /* 64 bit ARM definitions */ @@ -568,25 +573,26 @@ static uint32_t get_elf_hwcap(void) hwcaps |= ARM_HWCAP_A64_ASIMD; /* probe for the extra features */ -#define GET_FEATURE(feat, hwcap) \ - do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0) - GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_AES); - GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL); - GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP_A64_SHA1); - GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP_A64_SHA2); - GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32); - GET_FEATURE(ARM_FEATURE_V8_SHA3, ARM_HWCAP_A64_SHA3); - GET_FEATURE(ARM_FEATURE_V8_SM3, ARM_HWCAP_A64_SM3); - GET_FEATURE(ARM_FEATURE_V8_SM4, ARM_HWCAP_A64_SM4); - GET_FEATURE(ARM_FEATURE_V8_SHA512, ARM_HWCAP_A64_SHA512); - GET_FEATURE(ARM_FEATURE_V8_FP16, - ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP); - GET_FEATURE(ARM_FEATURE_V8_ATOMICS, ARM_HWCAP_A64_ATOMICS); - GET_FEATURE(ARM_FEATURE_V8_RDM, ARM_HWCAP_A64_ASIMDRDM); - GET_FEATURE(ARM_FEATURE_V8_DOTPROD, ARM_HWCAP_A64_ASIMDDP); - GET_FEATURE(ARM_FEATURE_V8_FCMA, ARM_HWCAP_A64_FCMA); - GET_FEATURE(ARM_FEATURE_SVE, ARM_HWCAP_A64_SVE); -#undef GET_FEATURE +#define GET_FEATURE_ID(feat, hwcap) \ + do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0) + + GET_FEATURE_ID(aa64_aes, ARM_HWCAP_A64_AES); + GET_FEATURE_ID(aa64_pmull, ARM_HWCAP_A64_PMULL); + GET_FEATURE_ID(aa64_sha1, ARM_HWCAP_A64_SHA1); + GET_FEATURE_ID(aa64_sha256, ARM_HWCAP_A64_SHA2); + GET_FEATURE_ID(aa64_sha512, ARM_HWCAP_A64_SHA512); + GET_FEATURE_ID(aa64_crc32, ARM_HWCAP_A64_CRC32); + GET_FEATURE_ID(aa64_sha3, ARM_HWCAP_A64_SHA3); + GET_FEATURE_ID(aa64_sm3, ARM_HWCAP_A64_SM3); + GET_FEATURE_ID(aa64_sm4, ARM_HWCAP_A64_SM4); + GET_FEATURE_ID(aa64_fp16, ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP); + GET_FEATURE_ID(aa64_atomics, ARM_HWCAP_A64_ATOMICS); + GET_FEATURE_ID(aa64_rdm, ARM_HWCAP_A64_ASIMDRDM); + GET_FEATURE_ID(aa64_dp, ARM_HWCAP_A64_ASIMDDP); + GET_FEATURE_ID(aa64_fcma, ARM_HWCAP_A64_FCMA); + GET_FEATURE_ID(aa64_sve, ARM_HWCAP_A64_SVE); + +#undef GET_FEATURE_ID return hwcaps; } diff --git a/linux-user/syscall.c b/linux-user/syscall.c index cf4511b0e4..15b03e17b9 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -9544,7 +9544,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, * even though the current architectural maximum is VQ=16. */ ret = -TARGET_EINVAL; - if (arm_feature(cpu_env, ARM_FEATURE_SVE) + if (cpu_isar_feature(aa64_sve, arm_env_get_cpu(cpu_env)) && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) { CPUARMState *env = cpu_env; ARMCPU *cpu = arm_env_get_cpu(env); @@ -9563,9 +9563,11 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, return ret; case TARGET_PR_SVE_GET_VL: ret = -TARGET_EINVAL; - if (arm_feature(cpu_env, ARM_FEATURE_SVE)) { - CPUARMState *env = cpu_env; - ret = ((env->vfp.zcr_el[1] & 0xf) + 1) * 16; + { + ARMCPU *cpu = arm_env_get_cpu(cpu_env); + if (cpu_isar_feature(aa64_sve, cpu)) { + ret = ((cpu->env.vfp.zcr_el[1] & 0xf) + 1) * 16; + } } return ret; #endif /* AARCH64 */ diff --git a/target/arm/cpu.c b/target/arm/cpu.c index cd48ad42d8..8f16e96b6c 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -144,9 +144,9 @@ static void arm_cpu_reset(CPUState *s) g_hash_table_foreach(cpu->cp_regs, cp_reg_check_reset, cpu); env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid; - env->vfp.xregs[ARM_VFP_MVFR0] = cpu->mvfr0; - env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1; - env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2; + env->vfp.xregs[ARM_VFP_MVFR0] = cpu->isar.mvfr0; + env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1; + env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON; s->halted = cpu->start_powered_off; @@ -814,7 +814,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) /* Some features automatically imply others: */ if (arm_feature(env, ARM_FEATURE_V8)) { - set_feature(env, ARM_FEATURE_V7VE); + if (arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_V7); + } else { + set_feature(env, ARM_FEATURE_V7VE); + } } if (arm_feature(env, ARM_FEATURE_V7VE)) { /* v7 Virtualization Extensions. In real hardware this implies @@ -825,7 +829,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Presence of EL2 itself is ARM_FEATURE_EL2, and of the * Security Extensions is ARM_FEATURE_EL3. */ - set_feature(env, ARM_FEATURE_ARM_DIV); + assert(cpu_isar_feature(arm_div, cpu)); set_feature(env, ARM_FEATURE_LPAE); set_feature(env, ARM_FEATURE_V7); } @@ -850,20 +854,14 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (arm_feature(env, ARM_FEATURE_V6)) { set_feature(env, ARM_FEATURE_V5); - set_feature(env, ARM_FEATURE_JAZELLE); if (!arm_feature(env, ARM_FEATURE_M)) { + assert(cpu_isar_feature(jazelle, cpu)); set_feature(env, ARM_FEATURE_AUXCR); } } if (arm_feature(env, ARM_FEATURE_V5)) { set_feature(env, ARM_FEATURE_V4T); } - if (arm_feature(env, ARM_FEATURE_M)) { - set_feature(env, ARM_FEATURE_THUMB_DIV); - } - if (arm_feature(env, ARM_FEATURE_ARM_DIV)) { - set_feature(env, ARM_FEATURE_THUMB_DIV); - } if (arm_feature(env, ARM_FEATURE_VFP4)) { set_feature(env, ARM_FEATURE_VFP3); set_feature(env, ARM_FEATURE_VFP_FP16); @@ -938,7 +936,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12]. */ cpu->id_pfr1 &= ~0xf0; - cpu->id_aa64pfr0 &= ~0xf000; + cpu->isar.id_aa64pfr0 &= ~0xf000; } if (!cpu->has_el2) { @@ -955,7 +953,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * registers if we don't have EL2. These are id_pfr1[15:12] and * id_aa64pfr0_el1[11:8]. */ - cpu->id_aa64pfr0 &= ~0xf00; + cpu->isar.id_aa64pfr0 &= ~0xf00; cpu->id_pfr1 &= ~0xf000; } @@ -1084,11 +1082,16 @@ static void arm926_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_VFP); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); - set_feature(&cpu->env, ARM_FEATURE_JAZELLE); cpu->midr = 0x41069265; cpu->reset_fpsid = 0x41011090; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00090078; + + /* + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ + cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); } static void arm946_initfn(Object *obj) @@ -1114,12 +1117,18 @@ static void arm1026_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_AUXCR); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); - set_feature(&cpu->env, ARM_FEATURE_JAZELLE); cpu->midr = 0x4106a262; cpu->reset_fpsid = 0x410110a0; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00090078; cpu->reset_auxcr = 1; + + /* + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ + cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + { /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */ ARMCPRegInfo ifar = { @@ -1151,8 +1160,8 @@ static void arm1136_r2_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); cpu->midr = 0x4107b362; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1162,11 +1171,11 @@ static void arm1136_r2_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222110; - cpu->id_isar0 = 0x00140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231111; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 7; } @@ -1183,8 +1192,8 @@ static void arm1136_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); cpu->midr = 0x4117b363; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1194,11 +1203,11 @@ static void arm1136_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222110; - cpu->id_isar0 = 0x00140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231111; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 7; } @@ -1216,8 +1225,8 @@ static void arm1176_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_EL3); cpu->midr = 0x410fb767; cpu->reset_fpsid = 0x410120b5; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1227,11 +1236,11 @@ static void arm1176_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222100; - cpu->id_isar0 = 0x0140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231121; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x01141; + cpu->isar.id_isar0 = 0x0140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231121; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x01141; cpu->reset_auxcr = 7; } @@ -1247,8 +1256,8 @@ static void arm11mpcore_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); cpu->midr = 0x410fb022; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ cpu->id_pfr0 = 0x111; cpu->id_pfr1 = 0x1; @@ -1257,11 +1266,11 @@ static void arm11mpcore_initfn(Object *obj) cpu->id_mmfr0 = 0x01100103; cpu->id_mmfr1 = 0x10020302; cpu->id_mmfr2 = 0x01222000; - cpu->id_isar0 = 0x00100011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11221011; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00100011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11221011; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 1; } @@ -1290,13 +1299,13 @@ static void cortex_m3_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x00000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01141110; - cpu->id_isar1 = 0x02111000; - cpu->id_isar2 = 0x21112231; - cpu->id_isar3 = 0x01111110; - cpu->id_isar4 = 0x01310102; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; } static void cortex_m4_initfn(Object *obj) @@ -1317,13 +1326,13 @@ static void cortex_m4_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x00000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01141110; - cpu->id_isar1 = 0x02111000; - cpu->id_isar2 = 0x21112231; - cpu->id_isar3 = 0x01111110; - cpu->id_isar4 = 0x01310102; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; } static void cortex_m33_initfn(Object *obj) @@ -1346,13 +1355,13 @@ static void cortex_m33_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x01000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01101110; - cpu->id_isar1 = 0x02212000; - cpu->id_isar2 = 0x20232232; - cpu->id_isar3 = 0x01111131; - cpu->id_isar4 = 0x01310132; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01101110; + cpu->isar.id_isar1 = 0x02212000; + cpu->isar.id_isar2 = 0x20232232; + cpu->isar.id_isar3 = 0x01111131; + cpu->isar.id_isar4 = 0x01310132; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; cpu->clidr = 0x00000000; cpu->ctr = 0x8000c000; } @@ -1384,8 +1393,6 @@ static void cortex_r5_initfn(Object *obj) ARMCPU *cpu = ARM_CPU(obj); set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DIV); - set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_PMSA); cpu->midr = 0x411fc153; /* r1p3 */ @@ -1397,13 +1404,13 @@ static void cortex_r5_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x01200000; cpu->id_mmfr3 = 0x0211; - cpu->id_isar0 = 0x02101111; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232141; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x0010142; - cpu->id_isar5 = 0x0; - cpu->id_isar6 = 0x0; + cpu->isar.id_isar0 = 0x02101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232141; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x0010142; + cpu->isar.id_isar5 = 0x0; + cpu->isar.id_isar6 = 0x0; cpu->mp_is_up = true; cpu->pmsav7_dregion = 16; define_arm_cp_regs(cpu, cortexr5_cp_reginfo); @@ -1438,8 +1445,8 @@ static void cortex_a8_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_EL3); cpu->midr = 0x410fc080; cpu->reset_fpsid = 0x410330c0; - cpu->mvfr0 = 0x11110222; - cpu->mvfr1 = 0x00011111; + cpu->isar.mvfr0 = 0x11110222; + cpu->isar.mvfr1 = 0x00011111; cpu->ctr = 0x82048004; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x1031; @@ -1450,11 +1457,11 @@ static void cortex_a8_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01202000; cpu->id_mmfr3 = 0x11; - cpu->id_isar0 = 0x00101111; - cpu->id_isar1 = 0x12112111; - cpu->id_isar2 = 0x21232031; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x00111142; + cpu->isar.id_isar0 = 0x00101111; + cpu->isar.id_isar1 = 0x12112111; + cpu->isar.id_isar2 = 0x21232031; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x00111142; cpu->dbgdidr = 0x15141000; cpu->clidr = (1 << 27) | (2 << 24) | 3; cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ @@ -1512,8 +1519,8 @@ static void cortex_a9_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CBAR); cpu->midr = 0x410fc090; cpu->reset_fpsid = 0x41033090; - cpu->mvfr0 = 0x11110222; - cpu->mvfr1 = 0x01111111; + cpu->isar.mvfr0 = 0x11110222; + cpu->isar.mvfr1 = 0x01111111; cpu->ctr = 0x80038003; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x1031; @@ -1524,11 +1531,11 @@ static void cortex_a9_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01230000; cpu->id_mmfr3 = 0x00002111; - cpu->id_isar0 = 0x00101111; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x00111142; + cpu->isar.id_isar0 = 0x00101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x00111142; cpu->dbgdidr = 0x35141000; cpu->clidr = (1 << 27) | (1 << 24) | 3; cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ @@ -1573,8 +1580,8 @@ static void cortex_a7_initfn(Object *obj) cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7; cpu->midr = 0x410fc075; cpu->reset_fpsid = 0x41023075; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x11111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x84448003; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x00001131; @@ -1590,11 +1597,11 @@ static void cortex_a7_initfn(Object *obj) /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but * table 4-41 gives 0x02101110, which includes the arm div insns. */ - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x10011142; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x10011142; cpu->dbgdidr = 0x3515f005; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ @@ -1619,8 +1626,8 @@ static void cortex_a15_initfn(Object *obj) cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15; cpu->midr = 0x412fc0f1; cpu->reset_fpsid = 0x410430f0; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x11111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x00001131; @@ -1633,11 +1640,11 @@ static void cortex_a15_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01240000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x10011142; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x10011142; cpu->dbgdidr = 0x3515f021; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ @@ -1830,17 +1837,26 @@ static void arm_max_initfn(Object *obj) cortex_a15_initfn(obj); #ifdef CONFIG_USER_ONLY /* We don't set these in system emulation mode for the moment, - * since we don't correctly set the ID registers to advertise them, + * since we don't correctly set (all of) the ID registers to + * advertise them. */ set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); - set_feature(&cpu->env, ARM_FEATURE_V8_RDM); - set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD); - set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); + { + uint32_t t; + + t = cpu->isar.id_isar5; + t = FIELD_DP32(t, ID_ISAR5, AES, 2); + t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); + t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); + t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); + t = FIELD_DP32(t, ID_ISAR5, RDM, 1); + t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); + cpu->isar.id_isar5 = t; + + t = cpu->isar.id_isar6; + t = FIELD_DP32(t, ID_ISAR6, DP, 1); + cpu->isar.id_isar6 = t; + } #endif } } diff --git a/target/arm/cpu.h b/target/arm/cpu.h index f00c0444c4..8e6779936e 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -531,6 +531,13 @@ typedef struct CPUARMState { */ } exception; + /* Information associated with an SError */ + struct { + uint8_t pending; + uint8_t has_esr; + uint64_t esr; + } serror; + /* Thumb-2 EE state. */ uint32_t teecr; uint32_t teehbr; @@ -669,6 +676,8 @@ typedef enum ARMPSCIState { PSCI_ON_PENDING = 2 } ARMPSCIState; +typedef struct ARMISARegisters ARMISARegisters; + /** * ARMCPU: * @env: #CPUARMState @@ -788,13 +797,28 @@ struct ARMCPU { * ARMv7AR ARM Architecture Reference Manual. A reset_ prefix * is used for reset values of non-constant registers; no reset_ * prefix means a constant register. + * Some of these registers are split out into a substructure that + * is shared with the translators to control the ISA. */ + struct ARMISARegisters { + uint32_t id_isar0; + uint32_t id_isar1; + uint32_t id_isar2; + uint32_t id_isar3; + uint32_t id_isar4; + uint32_t id_isar5; + uint32_t id_isar6; + uint32_t mvfr0; + uint32_t mvfr1; + uint32_t mvfr2; + uint64_t id_aa64isar0; + uint64_t id_aa64isar1; + uint64_t id_aa64pfr0; + uint64_t id_aa64pfr1; + } isar; uint32_t midr; uint32_t revidr; uint32_t reset_fpsid; - uint32_t mvfr0; - uint32_t mvfr1; - uint32_t mvfr2; uint32_t ctr; uint32_t reset_sctlr; uint32_t id_pfr0; @@ -808,21 +832,10 @@ struct ARMCPU { uint32_t id_mmfr2; uint32_t id_mmfr3; uint32_t id_mmfr4; - uint32_t id_isar0; - uint32_t id_isar1; - uint32_t id_isar2; - uint32_t id_isar3; - uint32_t id_isar4; - uint32_t id_isar5; - uint32_t id_isar6; - uint64_t id_aa64pfr0; - uint64_t id_aa64pfr1; uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; uint64_t id_aa64afr0; uint64_t id_aa64afr1; - uint64_t id_aa64isar0; - uint64_t id_aa64isar1; uint64_t id_aa64mmfr0; uint64_t id_aa64mmfr1; uint32_t dbgdidr; @@ -1531,6 +1544,16 @@ FIELD(ID_AA64ISAR1, FRINTTS, 32, 4) FIELD(ID_AA64ISAR1, SB, 36, 4) FIELD(ID_AA64ISAR1, SPECRES, 40, 4) +FIELD(ID_AA64PFR0, EL0, 0, 4) +FIELD(ID_AA64PFR0, EL1, 4, 4) +FIELD(ID_AA64PFR0, EL2, 8, 4) +FIELD(ID_AA64PFR0, EL3, 12, 4) +FIELD(ID_AA64PFR0, FP, 16, 4) +FIELD(ID_AA64PFR0, ADVSIMD, 20, 4) +FIELD(ID_AA64PFR0, GIC, 24, 4) +FIELD(ID_AA64PFR0, RAS, 28, 4) +FIELD(ID_AA64PFR0, SVE, 32, 4) + QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); /* If adding a feature bit which corresponds to a Linux ELF @@ -1550,7 +1573,6 @@ enum arm_features { ARM_FEATURE_VFP3, ARM_FEATURE_VFP_FP16, ARM_FEATURE_NEON, - ARM_FEATURE_THUMB_DIV, /* divide supported in Thumb encoding */ ARM_FEATURE_M, /* Microcontroller profile. */ ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */ ARM_FEATURE_THUMB2EE, @@ -1560,7 +1582,6 @@ enum arm_features { ARM_FEATURE_V5, ARM_FEATURE_STRONGARM, ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */ - ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */ ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */ ARM_FEATURE_GENERIC_TIMER, ARM_FEATURE_MVFR, /* Media and VFP Feature Registers 0 and 1 */ @@ -1573,30 +1594,15 @@ enum arm_features { ARM_FEATURE_LPAE, /* has Large Physical Address Extension */ ARM_FEATURE_V8, ARM_FEATURE_AARCH64, /* supports 64 bit mode */ - ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */ ARM_FEATURE_CBAR, /* has cp15 CBAR */ ARM_FEATURE_CRC, /* ARMv8 CRC instructions */ ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */ ARM_FEATURE_EL2, /* has EL2 Virtualization support */ ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ - ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */ ARM_FEATURE_THUMB_DSP, /* DSP insns supported in the Thumb encodings */ ARM_FEATURE_PMU, /* has PMU support */ ARM_FEATURE_VBAR, /* has cp15 VBAR */ ARM_FEATURE_M_SECURITY, /* M profile Security Extension */ - ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */ - ARM_FEATURE_SVE, /* has Scalable Vector Extension */ - ARM_FEATURE_V8_SHA512, /* implements SHA512 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */ - ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */ - ARM_FEATURE_V8_DOTPROD, /* implements v8.2 simd dot product */ - ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */ - ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */ ARM_FEATURE_M_MAIN, /* M profile Main Extension */ }; @@ -3148,4 +3154,157 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) /* Shared between translate-sve.c and sve_helper.c. */ extern const uint64_t pred_esz_masks[4]; +/* + * 32-bit feature tests via id registers. + */ +static inline bool isar_feature_thumb_div(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; +} + +static inline bool isar_feature_arm_div(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; +} + +static inline bool isar_feature_jazelle(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; +} + +static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; +} + +static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; +} + +static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; +} + +static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; +} + +static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; +} + +static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; +} + +static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; +} + +static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; +} + +static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) +{ + /* + * This is a placeholder for use by VCMA until the rest of + * the ARMv8.2-FP16 extension is implemented for aa32 mode. + * At which point we can properly set and check MVFR1.FPHP. + */ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; +} + +/* + * 64-bit feature tests via id registers. + */ +static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; +} + +static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; +} + +static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; +} + +static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; +} + +static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; +} + +static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; +} + +static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; +} + +static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; +} + +static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; +} + +static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; +} + +static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; +} + +static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; +} + +static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; +} + +static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) +{ + /* We always set the AdvSIMD and FP fields identically wrt FP16. */ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; +} + +static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; +} + +/* + * Forward to the above feature tests given an ARMCPU pointer. + */ +#define cpu_isar_feature(name, cpu) \ + ({ ARMCPU *cpu_ = (cpu); isar_feature_##name(&cpu_->isar); }) + #endif diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 44fdf0f6fa..873f059bf2 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -109,11 +109,6 @@ static void aarch64_a57_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); @@ -121,9 +116,9 @@ static void aarch64_a57_initfn(Object *obj) cpu->midr = 0x411fd070; cpu->revidr = 0x00000000; cpu->reset_fpsid = 0x41034070; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x12111111; - cpu->mvfr2 = 0x00000043; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; @@ -134,18 +129,18 @@ static void aarch64_a57_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01260000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232042; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x00011142; - cpu->id_isar5 = 0x00011121; - cpu->id_isar6 = 0; - cpu->id_aa64pfr0 = 0x00002222; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; cpu->id_aa64dfr0 = 0x10305106; cpu->pmceid0 = 0x00000000; cpu->pmceid1 = 0x00000000; - cpu->id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar0 = 0x00011120; cpu->id_aa64mmfr0 = 0x00001124; cpu->dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; @@ -170,11 +165,6 @@ static void aarch64_a53_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); @@ -182,9 +172,9 @@ static void aarch64_a53_initfn(Object *obj) cpu->midr = 0x410fd034; cpu->revidr = 0x00000000; cpu->reset_fpsid = 0x41034070; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x12111111; - cpu->mvfr2 = 0x00000043; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x84448004; /* L1Ip = VIPT */ cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; @@ -195,16 +185,16 @@ static void aarch64_a53_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01260000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232042; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x00011142; - cpu->id_isar5 = 0x00011121; - cpu->id_isar6 = 0; - cpu->id_aa64pfr0 = 0x00002222; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; cpu->id_aa64dfr0 = 0x10305106; - cpu->id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar0 = 0x00011120; cpu->id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ cpu->dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; @@ -229,20 +219,15 @@ static void aarch64_a72_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); cpu->midr = 0x410fd083; cpu->revidr = 0x00000000; cpu->reset_fpsid = 0x41034080; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x12111111; - cpu->mvfr2 = 0x00000043; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; @@ -253,17 +238,17 @@ static void aarch64_a72_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01260000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232042; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x00011142; - cpu->id_isar5 = 0x00011121; - cpu->id_aa64pfr0 = 0x00002222; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_aa64pfr0 = 0x00002222; cpu->id_aa64dfr0 = 0x10305106; cpu->pmceid0 = 0x00000000; cpu->pmceid1 = 0x00000000; - cpu->id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar0 = 0x00011120; cpu->id_aa64mmfr0 = 0x00001124; cpu->dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; @@ -312,24 +297,55 @@ static void aarch64_max_initfn(Object *obj) if (kvm_enabled()) { kvm_arm_set_cpu_features_from_host(cpu); } else { + uint64_t t; + uint32_t u; aarch64_a57_initfn(obj); -#ifdef CONFIG_USER_ONLY - /* We don't set these in system emulation mode for the moment, - * since we don't correctly set the ID registers to advertise them, - * and in some cases they're only available in AArch64 and not AArch32, - * whereas the architecture requires them to be present in both if - * present in either. + + t = cpu->isar.id_aa64isar0; + t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* AES + PMULL */ + t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* SHA512 */ + t = FIELD_DP64(t, ID_AA64ISAR0, CRC32, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 2); + t = FIELD_DP64(t, ID_AA64ISAR0, RDM, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1); + cpu->isar.id_aa64isar0 = t; + + t = cpu->isar.id_aa64isar1; + t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); + cpu->isar.id_aa64isar1 = t; + + t = cpu->isar.id_aa64pfr0; + t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); + t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); + t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); + cpu->isar.id_aa64pfr0 = t; + + /* Replicate the same data to the 32-bit id registers. */ + u = cpu->isar.id_isar5; + u = FIELD_DP32(u, ID_ISAR5, AES, 2); /* AES + PMULL */ + u = FIELD_DP32(u, ID_ISAR5, SHA1, 1); + u = FIELD_DP32(u, ID_ISAR5, SHA2, 1); + u = FIELD_DP32(u, ID_ISAR5, CRC32, 1); + u = FIELD_DP32(u, ID_ISAR5, RDM, 1); + u = FIELD_DP32(u, ID_ISAR5, VCMA, 1); + cpu->isar.id_isar5 = u; + + u = cpu->isar.id_isar6; + u = FIELD_DP32(u, ID_ISAR6, DP, 1); + cpu->isar.id_isar6 = u; + + /* + * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, + * so do not set MVFR1.FPHP. Strictly speaking this is not legal, + * but it is also not legal to enable SVE without support for FP16, + * and enabling SVE in system mode is more useful in the short term. */ - set_feature(&cpu->env, ARM_FEATURE_V8_SHA512); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA3); - set_feature(&cpu->env, ARM_FEATURE_V8_SM3); - set_feature(&cpu->env, ARM_FEATURE_V8_SM4); - set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS); - set_feature(&cpu->env, ARM_FEATURE_V8_RDM); - set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD); - set_feature(&cpu->env, ARM_FEATURE_V8_FP16); - set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); - set_feature(&cpu->env, ARM_FEATURE_SVE); + +#ifdef CONFIG_USER_ONLY /* For usermode -cpu max we can use a larger and more efficient DCZ * blocksize since we don't have to follow what the hardware does. */ diff --git a/target/arm/helper.c b/target/arm/helper.c index e3946562aa..0ea95b0815 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -56,6 +56,8 @@ static void v8m_security_lookup(CPUARMState *env, uint32_t address, V8M_SAttributes *sattrs); #endif +static void switch_mode(CPUARMState *env, int mode); + static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) { int nregs; @@ -552,42 +554,6 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); } -static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate all (TLBIALL) */ - ARMCPU *cpu = arm_env_get_cpu(env); - - tlb_flush(CPU(cpu)); -} - -static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */ - ARMCPU *cpu = arm_env_get_cpu(env); - - tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); -} - -static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by ASID (TLBIASID) */ - ARMCPU *cpu = arm_env_get_cpu(env); - - tlb_flush(CPU(cpu)); -} - -static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */ - ARMCPU *cpu = arm_env_get_cpu(env); - - tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); -} - /* IS variants of TLB operations must affect all cores */ static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) @@ -621,6 +587,73 @@ static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); } +/* + * Non-IS variants of TLB operations are upgraded to + * IS versions if we are at NS EL1 and HCR_EL2.FB is set to + * force broadcast of these operations. + */ +static bool tlb_force_broadcast(CPUARMState *env) +{ + return (env->cp15.hcr_el2 & HCR_FB) && + arm_current_el(env) == 1 && arm_is_secure_below_el3(env); +} + +static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate all (TLBIALL) */ + ARMCPU *cpu = arm_env_get_cpu(env); + + if (tlb_force_broadcast(env)) { + tlbiall_is_write(env, NULL, value); + return; + } + + tlb_flush(CPU(cpu)); +} + +static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */ + ARMCPU *cpu = arm_env_get_cpu(env); + + if (tlb_force_broadcast(env)) { + tlbimva_is_write(env, NULL, value); + return; + } + + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); +} + +static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by ASID (TLBIASID) */ + ARMCPU *cpu = arm_env_get_cpu(env); + + if (tlb_force_broadcast(env)) { + tlbiasid_is_write(env, NULL, value); + return; + } + + tlb_flush(CPU(cpu)); +} + +static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */ + ARMCPU *cpu = arm_env_get_cpu(env); + + if (tlb_force_broadcast(env)) { + tlbimvaa_is_write(env, NULL, value); + return; + } + + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); +} + static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -1296,12 +1329,26 @@ static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri) CPUState *cs = ENV_GET_CPU(env); uint64_t ret = 0; - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { - ret |= CPSR_I; + if (arm_hcr_el2_imo(env)) { + if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { + ret |= CPSR_I; + } + } else { + if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + ret |= CPSR_I; + } } - if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { - ret |= CPSR_F; + + if (arm_hcr_el2_fmo(env)) { + if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) { + ret |= CPSR_F; + } + } else { + if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { + ret |= CPSR_F; + } } + /* External aborts are not possible in QEMU so A bit is always clear */ return ret; } @@ -2270,13 +2317,15 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, * * The Non-secure TTBCR.EAE bit is set to 1 * * The implementation includes EL2, and the value of HCR.VM is 1 * + * (Note that HCR.DC makes HCR.VM behave as if it is 1.) + * * ATS1Hx always uses the 64bit format (not supported yet). */ format64 = arm_s1_regime_using_lpae_format(env, mmu_idx); if (arm_feature(env, ARM_FEATURE_EL2)) { if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) { - format64 |= env->cp15.hcr_el2 & HCR_VM; + format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC); } else { format64 |= arm_current_el(env) == 2; } @@ -2709,12 +2758,10 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* 64 bit accesses to the TTBRs can change the ASID and so we - * must flush the TLB. - */ - if (cpreg_field_is_64bit(ri)) { + /* If the ASID changes (with a 64-bit write), we must flush the TLB. */ + if (cpreg_field_is_64bit(ri) && + extract64(raw_read(env, ri) ^ value, 48, 16) != 0) { ARMCPU *cpu = arm_env_get_cpu(env); - tlb_flush(CPU(cpu)); } raw_write(env, ri, value); @@ -3083,22 +3130,6 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env, * Page D4-1736 (DDI0487A.b) */ -static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - if (arm_is_secure_below_el3(env)) { - tlb_flush_by_mmuidx(cs, - ARMMMUIdxBit_S1SE1 | - ARMMMUIdxBit_S1SE0); - } else { - tlb_flush_by_mmuidx(cs, - ARMMMUIdxBit_S12NSE1 | - ARMMMUIdxBit_S12NSE0); - } -} - static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3116,6 +3147,27 @@ static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + if (tlb_force_broadcast(env)) { + tlbi_aa64_vmalle1_write(env, NULL, value); + return; + } + + if (arm_is_secure_below_el3(env)) { + tlb_flush_by_mmuidx(cs, + ARMMMUIdxBit_S1SE1 | + ARMMMUIdxBit_S1SE0); + } else { + tlb_flush_by_mmuidx(cs, + ARMMMUIdxBit_S12NSE1 | + ARMMMUIdxBit_S12NSE0); + } +} + static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3205,29 +3257,6 @@ static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E3); } -static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by VA, EL1&0 (AArch64 version). - * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, - * since we don't support flush-for-specific-ASID-only or - * flush-last-level-only. - */ - ARMCPU *cpu = arm_env_get_cpu(env); - CPUState *cs = CPU(cpu); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - - if (arm_is_secure_below_el3(env)) { - tlb_flush_page_by_mmuidx(cs, pageaddr, - ARMMMUIdxBit_S1SE1 | - ARMMMUIdxBit_S1SE0); - } else { - tlb_flush_page_by_mmuidx(cs, pageaddr, - ARMMMUIdxBit_S12NSE1 | - ARMMMUIdxBit_S12NSE0); - } -} - static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3275,6 +3304,34 @@ static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by VA, EL1&0 (AArch64 version). + * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, + * since we don't support flush-for-specific-ASID-only or + * flush-last-level-only. + */ + ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = CPU(cpu); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + if (tlb_force_broadcast(env)) { + tlbi_aa64_vae1is_write(env, NULL, value); + return; + } + + if (arm_is_secure_below_el3(env)) { + tlb_flush_page_by_mmuidx(cs, pageaddr, + ARMMMUIdxBit_S1SE1 | + ARMMMUIdxBit_S1SE0); + } else { + tlb_flush_page_by_mmuidx(cs, pageaddr, + ARMMMUIdxBit_S12NSE1 | + ARMMMUIdxBit_S12NSE0); + } +} + static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3872,6 +3929,7 @@ static const ARMCPRegInfo el3_no_el2_v8_cp_reginfo[] = { static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = ENV_GET_CPU(env); uint64_t valid_mask = HCR_MASK; if (arm_feature(env, ARM_FEATURE_EL3)) { @@ -3890,6 +3948,28 @@ static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* Clear RES0 bits. */ value &= valid_mask; + /* + * VI and VF are kept in cs->interrupt_request. Modifying that + * requires that we have the iothread lock, which is done by + * marking the reginfo structs as ARM_CP_IO. + * Note that if a write to HCR pends a VIRQ or VFIQ it is never + * possible for it to be taken immediately, because VIRQ and + * VFIQ are masked unless running at EL0 or EL1, and HCR + * can only be written at EL2. + */ + g_assert(qemu_mutex_iothread_locked()); + if (value & HCR_VI) { + cs->interrupt_request |= CPU_INTERRUPT_VIRQ; + } else { + cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + } + if (value & HCR_VF) { + cs->interrupt_request |= CPU_INTERRUPT_VFIQ; + } else { + cs->interrupt_request &= ~CPU_INTERRUPT_VFIQ; + } + value &= ~(HCR_VI | HCR_VF); + /* These bits change the MMU setup: * HCR_VM enables stage 2 translation * HCR_PTW forbids certain page-table setups @@ -3917,16 +3997,32 @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri, hcr_write(env, NULL, value); } +static uint64_t hcr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* The VI and VF bits live in cs->interrupt_request */ + uint64_t ret = env->cp15.hcr_el2 & ~(HCR_VI | HCR_VF); + CPUState *cs = ENV_GET_CPU(env); + + if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { + ret |= HCR_VI; + } + if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) { + ret |= HCR_VF; + } + return ret; +} + static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_IO, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_write }, + .writefn = hcr_write, .readfn = hcr_read }, { .name = "HCR", .state = ARM_CP_STATE_AA32, - .type = ARM_CP_ALIAS, + .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_writelow }, + .writefn = hcr_writelow, .readfn = hcr_read }, { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, @@ -4163,7 +4259,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { static const ARMCPRegInfo el2_v8_cp_reginfo[] = { { .name = "HCR2", .state = ARM_CP_STATE_AA32, - .type = ARM_CP_ALIAS, + .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4, .access = PL2_RW, .fieldoffset = offsetofhigh32(CPUARMState, cp15.hcr_el2), @@ -4214,7 +4310,7 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.mvbar) }, { .name = "TTBR0_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 0, - .access = PL3_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0, + .access = PL3_RW, .resetvalue = 0, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[3]) }, { .name = "TCR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 2, @@ -4873,7 +4969,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = arm_env_get_cpu(env); - uint64_t pfr0 = cpu->id_aa64pfr0; + uint64_t pfr0 = cpu->isar.id_aa64pfr0; if (env->gicv3state) { pfr0 |= 1 << 24; @@ -4940,27 +5036,27 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar0 }, + .resetvalue = cpu->isar.id_isar0 }, { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar1 }, + .resetvalue = cpu->isar.id_isar1 }, { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar2 }, + .resetvalue = cpu->isar.id_isar2 }, { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar3 }, + .resetvalue = cpu->isar.id_isar3 }, { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar4 }, + .resetvalue = cpu->isar.id_isar4 }, { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar5 }, + .resetvalue = cpu->isar.id_isar5 }, { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, @@ -4968,7 +5064,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar6 }, + .resetvalue = cpu->isar.id_isar6 }, REGINFO_SENTINEL }; define_arm_cp_regs(cpu, v6_idregs); @@ -5039,7 +5135,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64pfr1}, + .resetvalue = cpu->isar.id_aa64pfr1}, { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -5100,11 +5196,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64isar0 }, + .resetvalue = cpu->isar.id_aa64isar0 }, { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64isar1 }, + .resetvalue = cpu->isar.id_aa64isar1 }, { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -5164,15 +5260,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr0 }, + .resetvalue = cpu->isar.mvfr0 }, { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr1 }, + .resetvalue = cpu->isar.mvfr1 }, { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr2 }, + .resetvalue = cpu->isar.mvfr2 }, { .name = "MVFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -5618,7 +5714,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, &sctlr); } - if (arm_feature(env, ARM_FEATURE_SVE)) { + if (cpu_isar_feature(aa64_sve, cpu)) { define_one_arm_cp_reg(cpu, &zcr_el1_reginfo); if (arm_feature(env, ARM_FEATURE_EL2)) { define_one_arm_cp_reg(cpu, &zcr_el2_reginfo); @@ -6208,7 +6304,17 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, mask |= CPSR_IL; val |= CPSR_IL; } + qemu_log_mask(LOG_GUEST_ERROR, + "Illegal AArch32 mode switch attempt from %s to %s\n", + aarch32_mode_name(env->uncached_cpsr), + aarch32_mode_name(val)); } else { + qemu_log_mask(CPU_LOG_INT, "%s %s to %s PC 0x%" PRIx32 "\n", + write_type == CPSRWriteExceptionReturn ? + "Exception return from AArch32" : + "AArch32 mode switch from", + aarch32_mode_name(env->uncached_cpsr), + aarch32_mode_name(val), env->regs[15]); switch_mode(env, val & CPSR_M); } } @@ -6306,7 +6412,7 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) return 0; } -void switch_mode(CPUARMState *env, int mode) +static void switch_mode(CPUARMState *env, int mode) { ARMCPU *cpu = arm_env_get_cpu(env); @@ -6328,7 +6434,7 @@ void aarch64_sync_64_to_32(CPUARMState *env) #else -void switch_mode(CPUARMState *env, int mode) +static void switch_mode(CPUARMState *env, int mode) { int old_mode; int i; @@ -8194,6 +8300,19 @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs) } if (cs->exception_index != EXCP_IRQ && cs->exception_index != EXCP_FIQ) { + if (!arm_feature(env, ARM_FEATURE_V8)) { + /* + * QEMU syndrome values are v8-style. v7 has the IL bit + * UNK/SBZP for "field not valid" cases, where v8 uses RES1. + * If this is a v7 CPU, squash the IL bit in those cases. + */ + if (cs->exception_index == EXCP_PREFETCH_ABORT || + (cs->exception_index == EXCP_DATA_ABORT && + !(env->exception.syndrome & ARM_EL_ISV)) || + syn_get_ec(env->exception.syndrome) == EC_UNCATEGORIZED) { + env->exception.syndrome &= ~ARM_EL_IL; + } + } env->cp15.esr_el[2] = env->exception.syndrome; } @@ -8228,7 +8347,7 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) uint32_t moe; /* If this is a debug exception we must update the DBGDSCR.MOE bits */ - switch (env->exception.syndrome >> ARM_EL_EC_SHIFT) { + switch (syn_get_ec(env->exception.syndrome)) { case EC_BREAKPOINT: case EC_BREAKPOINT_SAME_EL: moe = 1; @@ -8425,6 +8544,15 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) case EXCP_HVC: case EXCP_HYP_TRAP: case EXCP_SMC: + if (syn_get_ec(env->exception.syndrome) == EC_ADVSIMDFPACCESSTRAP) { + /* + * QEMU internal FP/SIMD syndromes from AArch32 include the + * TA and coproc fields which are only exposed if the exception + * is taken to AArch32 Hyp mode. Mask them out to get a valid + * AArch64 format syndrome. + */ + env->exception.syndrome &= ~MAKE_64BIT_MASK(0, 20); + } env->cp15.esr_el[new_el] = env->exception.syndrome; break; case EXCP_IRQ: @@ -8568,7 +8696,7 @@ void arm_cpu_do_interrupt(CPUState *cs) if (qemu_loglevel_mask(CPU_LOG_INT) && !excp_is_internal(cs->exception_index)) { qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%x/0x%" PRIx32 "\n", - env->exception.syndrome >> ARM_EL_EC_SHIFT, + syn_get_ec(env->exception.syndrome), env->exception.syndrome); } @@ -8665,7 +8793,8 @@ static inline bool regime_translation_disabled(CPUARMState *env, } if (mmu_idx == ARMMMUIdx_S2NS) { - return (env->cp15.hcr_el2 & HCR_VM) == 0; + /* HCR.DC means HCR.VM behaves as 1 */ + return (env->cp15.hcr_el2 & (HCR_DC | HCR_VM)) == 0; } if (env->cp15.hcr_el2 & HCR_TGE) { @@ -8675,6 +8804,12 @@ static inline bool regime_translation_disabled(CPUARMState *env, } } + if ((env->cp15.hcr_el2 & HCR_DC) && + (mmu_idx == ARMMMUIdx_S1NSE0 || mmu_idx == ARMMMUIdx_S1NSE1)) { + /* HCR.DC means SCTLR_EL1.M behaves as 0 */ + return true; + } + return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; } @@ -9026,9 +9161,20 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, hwaddr s2pa; int s2prot; int ret; + ARMCacheAttrs cacheattrs = {}; + ARMCacheAttrs *pcacheattrs = NULL; + + if (env->cp15.hcr_el2 & HCR_PTW) { + /* + * PTW means we must fault if this S1 walk touches S2 Device + * memory; otherwise we don't care about the attributes and can + * save the S2 translation the effort of computing them. + */ + pcacheattrs = &cacheattrs; + } ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_S2NS, &s2pa, - &txattrs, &s2prot, &s2size, fi, NULL); + &txattrs, &s2prot, &s2size, fi, pcacheattrs); if (ret) { assert(fi->type != ARMFault_None); fi->s2addr = addr; @@ -9036,6 +9182,14 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, fi->s1ptw = true; return ~0; } + if (pcacheattrs && (pcacheattrs->attrs & 0xf0) == 0) { + /* Access was to Device memory: generate Permission fault */ + fi->type = ARMFault_Permission; + fi->s2addr = addr; + fi->stage2 = true; + fi->s1ptw = true; + return ~0; + } addr = s2pa; } return addr; @@ -10655,6 +10809,16 @@ static bool get_phys_addr(CPUARMState *env, target_ulong address, /* Combine the S1 and S2 cache attributes, if needed */ if (!ret && cacheattrs != NULL) { + if (env->cp15.hcr_el2 & HCR_DC) { + /* + * HCR.DC forces the first stage attributes to + * Normal Non-Shareable, + * Inner Write-Back Read-Allocate Write-Allocate, + * Outer Write-Back Read-Allocate Write-Allocate. + */ + cacheattrs->attrs = 0xff; + cacheattrs->shareability = 0; + } *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); } @@ -11612,7 +11776,7 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) uint32_t changed; /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ - if (!arm_feature(env, ARM_FEATURE_V8_FP16)) { + if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) { val &= ~FPCR_FZ16; } @@ -12671,13 +12835,15 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, uint32_t flags; if (is_a64(env)) { + ARMCPU *cpu = arm_env_get_cpu(env); + *pc = env->pc; flags = ARM_TBFLAG_AARCH64_STATE_MASK; /* Get control bits for tagged addresses */ flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT); flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT); - if (arm_feature(env, ARM_FEATURE_SVE)) { + if (cpu_isar_feature(aa64_sve, cpu)) { int sve_el = sve_exception_el(env, current_el); uint32_t zcr_len; @@ -12801,11 +12967,12 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el, bool el0_a64) { + ARMCPU *cpu = arm_env_get_cpu(env); int old_len, new_len; bool old_a64, new_a64; /* Nothing to do if no SVE. */ - if (!arm_feature(env, ARM_FEATURE_SVE)) { + if (!cpu_isar_feature(aa64_sve, cpu)) { return; } diff --git a/target/arm/internals.h b/target/arm/internals.h index a4fc709bcc..6c2bb2deeb 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -145,7 +145,6 @@ static inline int bank_number(int mode) g_assert_not_reached(); } -void switch_mode(CPUARMState *, int); void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); void arm_translate_init(void); @@ -279,14 +278,19 @@ enum arm_exception_class { #define ARM_EL_IL (1 << ARM_EL_IL_SHIFT) #define ARM_EL_ISV (1 << ARM_EL_ISV_SHIFT) +static inline uint32_t syn_get_ec(uint32_t syn) +{ + return syn >> ARM_EL_EC_SHIFT; +} + /* Utility functions for constructing various kinds of syndrome value. * Note that in general we follow the AArch64 syndrome values; in a * few cases the value in HSR for exceptions taken to AArch32 Hyp - * mode differs slightly, so if we ever implemented Hyp mode then the - * syndrome value would need some massaging on exception entry. - * (One example of this is that AArch64 defaults to IL bit set for - * exceptions which don't specifically indicate information about the - * trapping instruction, whereas AArch32 defaults to IL bit clear.) + * mode differs slightly, and we fix this up when populating HSR in + * arm_cpu_do_interrupt_aarch32_hyp(). + * The exception is FP/SIMD access traps -- these report extra information + * when taking an exception to AArch32. For those we include the extra coproc + * and TA fields, and mask them out when taking the exception to AArch64. */ static inline uint32_t syn_uncategorized(void) { @@ -386,9 +390,18 @@ static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm, static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_16bit) { + /* AArch32 FP trap or any AArch64 FP/SIMD trap: TA == 0 coproc == 0xa */ return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT) | (is_16bit ? 0 : ARM_EL_IL) - | (cv << 24) | (cond << 20); + | (cv << 24) | (cond << 20) | 0xa; +} + +static inline uint32_t syn_simd_access_trap(int cv, int cond, bool is_16bit) +{ + /* AArch32 SIMD trap: TA == 1 coproc == 0 */ + return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT) + | (is_16bit ? 0 : ARM_EL_IL) + | (cv << 24) | (cond << 20) | (1 << 5); } static inline uint32_t syn_sve_access_trap(void) @@ -840,4 +853,22 @@ static inline uint32_t v7m_sp_limit(CPUARMState *env) } } +/** + * aarch32_mode_name(): Return name of the AArch32 CPU mode + * @psr: Program Status Register indicating CPU mode + * + * Returns, for debug logging purposes, a printable representation + * of the AArch32 CPU mode ("svc", "usr", etc) as indicated by + * the low bits of the specified PSR. + */ +static inline const char *aarch32_mode_name(uint32_t psr) +{ + static const char cpu_mode_names[16][4] = { + "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt", + "???", "???", "hyp", "und", "???", "???", "???", "sys" + }; + + return cpu_mode_names[psr & 0xf]; +} + #endif diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 54ef5f711b..09a86e2820 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -34,6 +34,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { }; static bool cap_has_mp_state; +static bool cap_has_inject_serror_esr; static ARMHostCPUFeatures arm_host_cpu_features; @@ -48,6 +49,12 @@ int kvm_arm_vcpu_init(CPUState *cs) return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); } +void kvm_arm_init_serror_injection(CPUState *cs) +{ + cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state, + KVM_CAP_ARM_INJECT_SERROR_ESR); +} + bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, int *fdarray, struct kvm_vcpu_init *init) @@ -522,6 +529,59 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) return 0; } +int kvm_put_vcpu_events(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + struct kvm_vcpu_events events; + int ret; + + if (!kvm_has_vcpu_events()) { + return 0; + } + + memset(&events, 0, sizeof(events)); + events.exception.serror_pending = env->serror.pending; + + /* Inject SError to guest with specified syndrome if host kernel + * supports it, otherwise inject SError without syndrome. + */ + if (cap_has_inject_serror_esr) { + events.exception.serror_has_esr = env->serror.has_esr; + events.exception.serror_esr = env->serror.esr; + } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); + if (ret) { + error_report("failed to put vcpu events"); + } + + return ret; +} + +int kvm_get_vcpu_events(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + struct kvm_vcpu_events events; + int ret; + + if (!kvm_has_vcpu_events()) { + return 0; + } + + memset(&events, 0, sizeof(events)); + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events); + if (ret) { + error_report("failed to get vcpu events"); + return ret; + } + + env->serror.pending = events.exception.serror_pending; + env->serror.has_esr = events.exception.serror_has_esr; + env->serror.esr = events.exception.serror_esr; + + return 0; +} + void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { } diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index 4e91c11796..0f1e94c7b5 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -217,6 +217,9 @@ int kvm_arch_init_vcpu(CPUState *cs) } cpu->mp_affinity = mpidr & ARM32_AFFINITY_MASK; + /* Check whether userspace can specify guest syndrome value */ + kvm_arm_init_serror_injection(cs); + return kvm_arm_init_cpreg_list(cpu); } @@ -358,6 +361,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + ret = kvm_put_vcpu_events(cpu); + if (ret) { + return ret; + } + /* Note that we do not call write_cpustate_to_list() * here, so we are only writing the tuple list back to * KVM. This is safe because nothing can change the @@ -445,6 +453,11 @@ int kvm_arch_get_registers(CPUState *cs) } vfp_set_fpscr(env, fpscr); + ret = kvm_get_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_kvmstate_to_list(cpu)) { return EINVAL; } diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index e0b8246283..5de8ff0ac5 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -546,6 +546,9 @@ int kvm_arch_init_vcpu(CPUState *cs) kvm_arm_init_debug(cs); + /* Check whether user space can specify guest syndrome value */ + kvm_arm_init_serror_injection(cs); + return kvm_arm_init_cpreg_list(cpu); } @@ -727,6 +730,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + ret = kvm_put_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_list_to_kvmstate(cpu, level)) { return EINVAL; } @@ -863,6 +871,11 @@ int kvm_arch_get_registers(CPUState *cs) } vfp_set_fpcr(env, fpr); + ret = kvm_get_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_kvmstate_to_list(cpu)) { return EINVAL; } @@ -920,7 +933,7 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) { - int hsr_ec = debug_exit->hsr >> ARM_EL_EC_SHIFT; + int hsr_ec = syn_get_ec(debug_exit->hsr); ARMCPU *cpu = ARM_CPU(cs); CPUClass *cc = CPU_GET_CLASS(cs); CPUARMState *env = &cpu->env; diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 5948e8b560..21c0129da2 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -121,6 +121,30 @@ bool write_kvmstate_to_list(ARMCPU *cpu); */ void kvm_arm_reset_vcpu(ARMCPU *cpu); +/** + * kvm_arm_init_serror_injection: + * @cs: CPUState + * + * Check whether KVM can set guest SError syndrome. + */ +void kvm_arm_init_serror_injection(CPUState *cs); + +/** + * kvm_get_vcpu_events: + * @cpu: ARMCPU + * + * Get VCPU related state from kvm. + */ +int kvm_get_vcpu_events(ARMCPU *cpu); + +/** + * kvm_put_vcpu_events: + * @cpu: ARMCPU + * + * Put VCPU related state to kvm. + */ +int kvm_put_vcpu_events(ARMCPU *cpu); + #ifdef CONFIG_KVM /** * kvm_arm_create_scratch_host_vcpu: diff --git a/target/arm/machine.c b/target/arm/machine.c index ff4ec22bf7..239fe4e84d 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -131,9 +131,8 @@ static const VMStateDescription vmstate_iwmmxt = { static bool sve_needed(void *opaque) { ARMCPU *cpu = opaque; - CPUARMState *env = &cpu->env; - return arm_feature(env, ARM_FEATURE_SVE); + return cpu_isar_feature(aa64_sve, cpu); } /* The first two words of each Zreg is stored in VFP state. */ @@ -172,6 +171,27 @@ static const VMStateDescription vmstate_sve = { }; #endif /* AARCH64 */ +static bool serror_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return env->serror.pending != 0; +} + +static const VMStateDescription vmstate_serror = { + .name = "cpu/serror", + .version_id = 1, + .minimum_version_id = 1, + .needed = serror_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(env.serror.pending, ARMCPU), + VMSTATE_UINT8(env.serror.has_esr, ARMCPU), + VMSTATE_UINT64(env.serror.esr, ARMCPU), + VMSTATE_END_OF_LIST() + } +}; + static bool m_needed(void *opaque) { ARMCPU *cpu = opaque; @@ -726,6 +746,7 @@ const VMStateDescription vmstate_arm_cpu = { #ifdef TARGET_AARCH64 &vmstate_sve, #endif + &vmstate_serror, NULL } }; diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index d915579712..90741f6331 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -42,7 +42,7 @@ void raise_exception(CPUARMState *env, uint32_t excp, * (see DDI0478C.a D1.10.4) */ target_el = 2; - if (syndrome >> ARM_EL_EC_SHIFT == EC_ADVSIMDFPACCESSTRAP) { + if (syn_get_ec(syndrome) == EC_ADVSIMDFPACCESSTRAP) { syndrome = syn_uncategorized(); } } diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index bb9c4d8ac7..88195ab949 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -174,7 +174,7 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f, cpu_fprintf(f, " FPCR=%08x FPSR=%08x\n", vfp_get_fpcr(env), vfp_get_fpsr(env)); - if (arm_feature(env, ARM_FEATURE_SVE) && sve_exception_el(env, el) == 0) { + if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) { int j, zcr_len = sve_zcr_len_for_el(env, el); for (i = 0; i <= FFR_PRED_NUM; i++) { @@ -1201,25 +1201,23 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, /* Store from vector register to memory */ static void do_vec_st(DisasContext *s, int srcidx, int element, - TCGv_i64 tcg_addr, int size) + TCGv_i64 tcg_addr, int size, TCGMemOp endian) { - TCGMemOp memop = s->be_data + size; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); read_vec_element(s, tcg_tmp, srcidx, element, size); - tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); tcg_temp_free_i64(tcg_tmp); } /* Load from memory to vector register */ static void do_vec_ld(DisasContext *s, int destidx, int element, - TCGv_i64 tcg_addr, int size) + TCGv_i64 tcg_addr, int size, TCGMemOp endian) { - TCGMemOp memop = s->be_data + size; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); write_vec_element(s, tcg_tmp, destidx, element, size); tcg_temp_free_i64(tcg_tmp); @@ -2328,7 +2326,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } if (rt2 == 31 && ((rt | rs) & 1) == 0 - && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + && dc_isar_feature(aa64_atomics, s)) { /* CASP / CASPL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; @@ -2350,7 +2348,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } if (rt2 == 31 && ((rt | rs) & 1) == 0 - && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + && dc_isar_feature(aa64_atomics, s)) { /* CASPA / CASPAL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; @@ -2361,7 +2359,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) case 0xb: /* CASL */ case 0xe: /* CASA */ case 0xf: /* CASAL */ - if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { gen_compare_and_swap(s, rs, rt, rn, size); return; } @@ -2900,11 +2898,10 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int rs = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int o3_opc = extract32(insn, 12, 4); - int feature = ARM_FEATURE_V8_ATOMICS; TCGv_i64 tcg_rn, tcg_rs; AtomicThreeOpFn *fn; - if (is_vector) { + if (is_vector || !dc_isar_feature(aa64_atomics, s)) { unallocated_encoding(s); return; } @@ -2940,10 +2937,6 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { - unallocated_encoding(s); - return; - } if (rn == 31) { gen_check_sp_alignment(s); @@ -3023,10 +3016,11 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) bool is_store = !extract32(insn, 22, 1); bool is_postidx = extract32(insn, 23, 1); bool is_q = extract32(insn, 30, 1); - TCGv_i64 tcg_addr, tcg_rn; + TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; + TCGMemOp endian = s->be_data; - int ebytes = 1 << size; - int elements = (is_q ? 128 : 64) / (8 << size); + int ebytes; /* bytes per element */ + int elements; /* elements per vector */ int rpt; /* num iterations */ int selem; /* structure elements */ int r; @@ -3085,39 +3079,55 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) gen_check_sp_alignment(s); } + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + + /* Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (selem == 1 && endian == MO_LE) { + size = 3; + } + ebytes = 1 << size; + elements = (is_q ? 16 : 8) / ebytes; + tcg_rn = cpu_reg_sp(s, rn); tcg_addr = tcg_temp_new_i64(); tcg_gen_mov_i64(tcg_addr, tcg_rn); + tcg_ebytes = tcg_const_i64(ebytes); for (r = 0; r < rpt; r++) { int e; for (e = 0; e < elements; e++) { - int tt = (rt + r) % 32; int xs; for (xs = 0; xs < selem; xs++) { + int tt = (rt + r + xs) % 32; if (is_store) { - do_vec_st(s, tt, e, tcg_addr, size); + do_vec_st(s, tt, e, tcg_addr, size, endian); } else { - do_vec_ld(s, tt, e, tcg_addr, size); - - /* For non-quad operations, setting a slice of the low - * 64 bits of the register clears the high 64 bits (in - * the ARM ARM pseudocode this is implicit in the fact - * that 'rval' is a 64 bit wide variable). - * For quad operations, we might still need to zero the - * high bits of SVE. We optimize by noticing that we only - * need to do this the first time we touch a register. - */ - if (e == 0 && (r == 0 || xs == selem - 1)) { - clear_vec_high(s, is_q, tt); - } + do_vec_ld(s, tt, e, tcg_addr, size, endian); } - tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); - tt = (tt + 1) % 32; + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes); } } } + if (!is_store) { + /* For non-quad operations, setting a slice of the low + * 64 bits of the register clears the high 64 bits (in + * the ARM ARM pseudocode this is implicit in the fact + * that 'rval' is a 64 bit wide variable). + * For quad operations, we might still need to zero the + * high bits of SVE. + */ + for (r = 0; r < rpt * selem; r++) { + int tt = (rt + r) % 32; + clear_vec_high(s, is_q, tt); + } + } + if (is_postidx) { int rm = extract32(insn, 16, 5); if (rm == 31) { @@ -3126,6 +3136,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } } + tcg_temp_free_i64(tcg_ebytes); tcg_temp_free_i64(tcg_addr); } @@ -3168,7 +3179,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) bool replicate = false; int index = is_q << 3 | S << 2 | size; int ebytes, xs; - TCGv_i64 tcg_addr, tcg_rn; + TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; switch (scale) { case 3: @@ -3221,49 +3232,28 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_rn = cpu_reg_sp(s, rn); tcg_addr = tcg_temp_new_i64(); tcg_gen_mov_i64(tcg_addr, tcg_rn); + tcg_ebytes = tcg_const_i64(ebytes); for (xs = 0; xs < selem; xs++) { if (replicate) { /* Load and replicate to all elements */ - uint64_t mulconst; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), s->be_data + scale); - switch (scale) { - case 0: - mulconst = 0x0101010101010101ULL; - break; - case 1: - mulconst = 0x0001000100010001ULL; - break; - case 2: - mulconst = 0x0000000100000001ULL; - break; - case 3: - mulconst = 0; - break; - default: - g_assert_not_reached(); - } - if (mulconst) { - tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst); - } - write_vec_element(s, tcg_tmp, rt, 0, MO_64); - if (is_q) { - write_vec_element(s, tcg_tmp, rt, 1, MO_64); - } + tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), + (is_q + 1) * 8, vec_full_reg_size(s), + tcg_tmp); tcg_temp_free_i64(tcg_tmp); - clear_vec_high(s, is_q, rt); } else { /* Load/store one element per register */ if (is_load) { - do_vec_ld(s, rt, index, tcg_addr, scale); + do_vec_ld(s, rt, index, tcg_addr, scale, s->be_data); } else { - do_vec_st(s, rt, index, tcg_addr, scale); + do_vec_st(s, rt, index, tcg_addr, scale, s->be_data); } } - tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes); rt = (rt + 1) % 32; } @@ -3275,6 +3265,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } } + tcg_temp_free_i64(tcg_ebytes); tcg_temp_free_i64(tcg_addr); } @@ -4574,7 +4565,7 @@ static void handle_crc32(DisasContext *s, TCGv_i64 tcg_acc, tcg_val; TCGv_i32 tcg_bytes; - if (!arm_dc_feature(s, ARM_FEATURE_CRC) + if (!dc_isar_feature(aa64_crc32, s) || (sf == 1 && sz != 3) || (sf == 0 && sz == 3)) { unallocated_encoding(s); @@ -4816,7 +4807,7 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn) break; case 3: size = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -4867,7 +4858,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn) break; case 3: size = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -4933,7 +4924,7 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) break; case 3: sz = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -5266,7 +5257,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) handle_fp_1src_double(s, opcode, rd, rn); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5481,7 +5472,7 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn) handle_fp_2src_double(s, opcode, rd, rn, rm); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5639,7 +5630,7 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn) handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5709,7 +5700,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn) break; case 3: sz = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -5934,7 +5925,7 @@ static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) case 1: /* float64 */ break; case 3: /* float16 */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6064,7 +6055,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) break; case 0x6: /* 16-bit float, 32-bit int */ case 0xe: /* 16-bit float, 64-bit int */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6091,7 +6082,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) case 1: /* float64 */ break; case 3: /* float16 */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6528,7 +6519,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) */ is_min = extract32(size, 1, 1); is_fp = true; - if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!is_u && dc_isar_feature(aa64_fp16, s)) { size = 1; } else if (!is_u || !is_q || extract32(size, 0, 1)) { unallocated_encoding(s); @@ -6924,7 +6915,7 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { /* Check for FMOV (vector, immediate) - half-precision */ - if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) { + if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { unallocated_encoding(s); return; } @@ -7091,7 +7082,7 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) case 0x2f: /* FMINP */ /* FP op, size[0] is 32 or 64 bit*/ if (!u) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } else { @@ -7736,7 +7727,7 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, size = MO_32; } else if (immh & 2) { size = MO_16; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -7781,7 +7772,7 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, size = MO_32; } else if (immh & 0x2) { size = MO_16; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -8046,28 +8037,6 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) } } -/* CMTST : test is "if (X & Y != 0)". */ -static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_and_i32(d, a, b); - tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0); - tcg_gen_neg_i32(d, d); -} - -static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_and_i64(d, a, b); - tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0); - tcg_gen_neg_i64(d, d); -} - -static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_and_vec(vece, d, a, b); - tcg_gen_dupi_vec(vece, a, 0); - tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); -} - static void handle_3same_64(DisasContext *s, int opcode, bool u, TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) { @@ -8545,7 +8514,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, return; } - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); } @@ -8618,7 +8587,7 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, bool u = extract32(insn, 29, 1); TCGv_i32 ele1, ele2, ele3; TCGv_i64 res; - int feature; + bool feature; switch (u * 16 + opcode) { case 0x10: /* SQRDMLAH (vector) */ @@ -8627,13 +8596,13 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_RDM; + feature = dc_isar_feature(aa64_rdm, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -9407,191 +9376,10 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } } -static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_sari_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_sari_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_sari_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_shri_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); -} - -static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); -} - -static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - uint64_t mask = (2ull << ((8 << vece) - 1)) - 1; - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh)); - tcg_gen_shri_vec(vece, t, a, sh); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(m); -} - /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, int immh, int immb, int opcode, int rn, int rd) { - static const GVecGen2i ssra_op[4] = { - { .fni8 = gen_ssra8_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_8 }, - { .fni8 = gen_ssra16_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_16 }, - { .fni4 = gen_ssra32_i32, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_32 }, - { .fni8 = gen_ssra64_i64, - .fniv = gen_ssra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_64 }, - }; - static const GVecGen2i usra_op[4] = { - { .fni8 = gen_usra8_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_8, }, - { .fni8 = gen_usra16_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_16, }, - { .fni4 = gen_usra32_i32, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_32, }, - { .fni8 = gen_usra64_i64, - .fniv = gen_usra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_64, }, - }; - static const GVecGen2i sri_op[4] = { - { .fni8 = gen_shr8_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_8 }, - { .fni8 = gen_shr16_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_16 }, - { .fni4 = gen_shr32_ins_i32, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_32 }, - { .fni8 = gen_shr64_ins_i64, - .fniv = gen_shr_ins_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_64 }, - }; - int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = 2 * (8 << size) - immhb; @@ -9687,85 +9475,10 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, clear_vec_high(s, is_q, rd); } -static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); -} - -static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); -} - -static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - uint64_t mask = (1ull << sh) - 1; - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_dupi_vec(vece, m, mask); - tcg_gen_shli_vec(vece, t, a, sh); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(m); -} - /* SHL/SLI - Vector shift left */ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, int immh, int immb, int opcode, int rn, int rd) { - static const GVecGen2i shi_op[4] = { - { .fni8 = gen_shl8_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_8 }, - { .fni8 = gen_shl16_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_shl32_ins_i32, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_shl64_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = immhb - (8 << size); @@ -9785,7 +9498,7 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, } if (insert) { - gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]); + gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]); } else { gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); } @@ -10362,7 +10075,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) return; } if (size == 3) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { + if (!dc_isar_feature(aa64_pmull, s)) { unallocated_encoding(s); return; } @@ -10407,70 +10120,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } -static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rm); - tcg_gen_and_i64(rn, rn, rd); - tcg_gen_xor_i64(rd, rm, rn); -} - -static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rd); - tcg_gen_and_i64(rn, rn, rm); - tcg_gen_xor_i64(rd, rd, rn); -} - -static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rd); - tcg_gen_andc_i64(rn, rn, rm); - tcg_gen_xor_i64(rd, rd, rn); -} - -static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rm); - tcg_gen_and_vec(vece, rn, rn, rd); - tcg_gen_xor_vec(vece, rd, rm, rn); -} - -static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rd); - tcg_gen_and_vec(vece, rn, rn, rm); - tcg_gen_xor_vec(vece, rd, rd, rn); -} - -static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rd); - tcg_gen_andc_vec(vece, rn, rn, rm); - tcg_gen_xor_vec(vece, rd, rd, rn); -} - /* Logic op (opcode == 3) subgroup of C3.6.16. */ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) { - static const GVecGen3 bsl_op = { - .fni8 = gen_bsl_i64, - .fniv = gen_bsl_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - static const GVecGen3 bit_op = { - .fni8 = gen_bit_i64, - .fniv = gen_bit_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - static const GVecGen3 bif_op = { - .fni8 = gen_bif_i64, - .fniv = gen_bif_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - int rd = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); int rm = extract32(insn, 16, 5); @@ -10742,131 +10394,9 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) } } -static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_add_u8(d, d, a); -} - -static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_add_u16(d, d, a); -} - -static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_add_i32(d, d, a); -} - -static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_add_i64(d, d, a); -} - -static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_sub_u8(d, d, a); -} - -static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_sub_u16(d, d, a); -} - -static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_sub_i32(d, d, a); -} - -static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_sub_i64(d, d, a); -} - -static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_sub_vec(vece, d, d, a); -} - /* Integer op subgroup of C3.6.16. */ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) { - static const GVecGen3 cmtst_op[4] = { - { .fni4 = gen_helper_neon_tst_u8, - .fniv = gen_cmtst_vec, - .vece = MO_8 }, - { .fni4 = gen_helper_neon_tst_u16, - .fniv = gen_cmtst_vec, - .vece = MO_16 }, - { .fni4 = gen_cmtst_i32, - .fniv = gen_cmtst_vec, - .vece = MO_32 }, - { .fni8 = gen_cmtst_i64, - .fniv = gen_cmtst_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .vece = MO_64 }, - }; - static const GVecGen3 mla_op[4] = { - { .fni4 = gen_mla8_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_8 }, - { .fni4 = gen_mla16_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_mla32_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_mla64_i64, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; - static const GVecGen3 mls_op[4] = { - { .fni4 = gen_mls8_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_8 }, - { .fni4 = gen_mls16_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_mls32_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_mls64_i64, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; - int is_q = extract32(insn, 30, 1); int u = extract32(insn, 29, 1); int size = extract32(insn, 22, 2); @@ -11226,7 +10756,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) TCGv_ptr fpst; bool pairwise = false; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -11414,7 +10944,8 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) int size = extract32(insn, 22, 2); bool u = extract32(insn, 29, 1); bool is_q = extract32(insn, 30, 1); - int feature, rot; + bool feature; + int rot; switch (u * 16 + opcode) { case 0x10: /* SQRDMLAH (vector) */ @@ -11423,7 +10954,7 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_RDM; + feature = dc_isar_feature(aa64_rdm, s); break; case 0x02: /* SDOT (vector) */ case 0x12: /* UDOT (vector) */ @@ -11431,7 +10962,7 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_DOTPROD; + feature = dc_isar_feature(aa64_dp, s); break; case 0x18: /* FCMLA, #0 */ case 0x19: /* FCMLA, #90 */ @@ -11440,18 +10971,18 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) case 0x1c: /* FCADD, #90 */ case 0x1e: /* FCADD, #270 */ if (size == 0 - || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) + || (size == 1 && !dc_isar_feature(aa64_fp16, s)) || (size == 3 && !is_q)) { unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_FCMA; + feature = dc_isar_feature(aa64_fcma, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -12320,7 +11851,7 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) bool need_fpst = true; int rmode; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -12665,14 +12196,14 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) break; case 0x1d: /* SQRDMLAH */ case 0x1f: /* SQRDMLSH */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (!dc_isar_feature(aa64_rdm, s)) { unallocated_encoding(s); return; } break; case 0x0e: /* SDOT */ case 0x1e: /* UDOT */ - if (size != MO_32 || !arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (size != MO_32 || !dc_isar_feature(aa64_dp, s)) { unallocated_encoding(s); return; } @@ -12681,7 +12212,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x13: /* FCMLA #90 */ case 0x15: /* FCMLA #180 */ case 0x17: /* FCMLA #270 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { + if (!dc_isar_feature(aa64_fcma, s)) { unallocated_encoding(s); return; } @@ -12737,7 +12268,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } break; } - if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -13208,8 +12739,7 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) TCGv_i32 tcg_decrypt; CryptoThreeOpIntFn *genfn; - if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) - || size != 0) { + if (!dc_isar_feature(aa64_aes, s) || size != 0) { unallocated_encoding(s); return; } @@ -13266,7 +12796,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) int rd = extract32(insn, 0, 5); CryptoThreeOpFn *genfn; TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - int feature = ARM_FEATURE_V8_SHA256; + bool feature; if (size != 0) { unallocated_encoding(s); @@ -13279,23 +12809,26 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) case 2: /* SHA1M */ case 3: /* SHA1SU0 */ genfn = NULL; - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); break; case 4: /* SHA256H */ genfn = gen_helper_crypto_sha256h; + feature = dc_isar_feature(aa64_sha256, s); break; case 5: /* SHA256H2 */ genfn = gen_helper_crypto_sha256h2; + feature = dc_isar_feature(aa64_sha256, s); break; case 6: /* SHA256SU1 */ genfn = gen_helper_crypto_sha256su1; + feature = dc_isar_feature(aa64_sha256, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13336,7 +12869,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); CryptoTwoOpFn *genfn; - int feature; + bool feature; TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; if (size != 0) { @@ -13346,15 +12879,15 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SHA1H */ - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); genfn = gen_helper_crypto_sha1h; break; case 1: /* SHA1SU1 */ - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); genfn = gen_helper_crypto_sha1su1; break; case 2: /* SHA256SU0 */ - feature = ARM_FEATURE_V8_SHA256; + feature = dc_isar_feature(aa64_sha256, s); genfn = gen_helper_crypto_sha256su0; break; default: @@ -13362,7 +12895,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13393,40 +12926,40 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - int feature; + bool feature; CryptoThreeOpFn *genfn; if (o == 0) { switch (opcode) { case 0: /* SHA512H */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512h; break; case 1: /* SHA512H2 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512h2; break; case 2: /* SHA512SU1 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512su1; break; case 3: /* RAX1 */ - feature = ARM_FEATURE_V8_SHA3; + feature = dc_isar_feature(aa64_sha3, s); genfn = NULL; break; } } else { switch (opcode) { case 0: /* SM3PARTW1 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); genfn = gen_helper_crypto_sm3partw1; break; case 1: /* SM3PARTW2 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); genfn = gen_helper_crypto_sm3partw2; break; case 2: /* SM4EKEY */ - feature = ARM_FEATURE_V8_SM4; + feature = dc_isar_feature(aa64_sm4, s); genfn = gen_helper_crypto_sm4ekey; break; default: @@ -13435,7 +12968,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) } } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13494,16 +13027,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; - int feature; + bool feature; CryptoTwoOpFn *genfn; switch (opcode) { case 0: /* SHA512SU0 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512su0; break; case 1: /* SM4E */ - feature = ARM_FEATURE_V8_SM4; + feature = dc_isar_feature(aa64_sm4, s); genfn = gen_helper_crypto_sm4e; break; default: @@ -13511,7 +13044,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13542,22 +13075,22 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) int ra = extract32(insn, 10, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - int feature; + bool feature; switch (op0) { case 0: /* EOR3 */ case 1: /* BCAX */ - feature = ARM_FEATURE_V8_SHA3; + feature = dc_isar_feature(aa64_sha3, s); break; case 2: /* SM3SS1 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13644,7 +13177,7 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn) TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; int pass; - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) { + if (!dc_isar_feature(aa64_sha3, s)) { unallocated_encoding(s); return; } @@ -13690,7 +13223,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; TCGv_i32 tcg_imm2, tcg_opcode; - if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) { + if (!dc_isar_feature(aa64_sm3, s)) { unallocated_encoding(s); return; } @@ -13798,7 +13331,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) unallocated_encoding(s); break; case 0x2: - if (!arm_dc_feature(s, ARM_FEATURE_SVE) || !disas_sve(s, insn)) { + if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) { unallocated_encoding(s); } break; @@ -13839,6 +13372,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, ARMCPU *arm_cpu = arm_env_get_cpu(env); int bound; + dc->isar = &arm_cpu->isar; dc->pc = dc->base.pc_first; dc->condjmp = 0; @@ -13902,7 +13436,6 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) { - tcg_clear_temp_count(); } static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) diff --git a/target/arm/translate.c b/target/arm/translate.c index 1b4bacb522..7c4675ffd8 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -42,7 +42,7 @@ #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5) /* currently all emulated v5 cores are also v5TE, so don't bother */ #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5) -#define ENABLE_ARCH_5J arm_dc_feature(s, ARM_FEATURE_JAZELLE) +#define ENABLE_ARCH_5J dc_isar_feature(jazelle, s) #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6) #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K) #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2) @@ -72,7 +72,7 @@ static TCGv_i64 cpu_F0d, cpu_F1d; #include "exec/gen-icount.h" -static const char *regnames[] = +static const char * const regnames[] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" }; @@ -1585,6 +1585,25 @@ neon_reg_offset (int reg, int n) return vfp_reg_offset(0, sreg); } +/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE, + * where 0 is the least significant end of the register. + */ +static inline long +neon_element_offset(int reg, int element, TCGMemOp size) +{ + int element_size = 1 << size; + int ofs = element * element_size; +#ifdef HOST_WORDS_BIGENDIAN + /* Calculate the offset assuming fully little-endian, + * then XOR to account for the order of the 8-byte units. + */ + if (element_size < 8) { + ofs ^= 8 - element_size; + } +#endif + return neon_reg_offset(reg, 0) + ofs; +} + static TCGv_i32 neon_load_reg(int reg, int pass) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -1592,12 +1611,94 @@ static TCGv_i32 neon_load_reg(int reg, int pass) return tmp; } +static void neon_load_element(TCGv_i32 var, int reg, int ele, TCGMemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i32(var, cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i32(var, cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld_i32(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_load_element64(TCGv_i64 var, int reg, int ele, TCGMemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i64(var, cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i64(var, cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld32u_i64(var, cpu_env, offset); + break; + case MO_Q: + tcg_gen_ld_i64(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static void neon_store_reg(int reg, int pass, TCGv_i32 var) { tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass)); tcg_temp_free_i32(var); } +static void neon_store_element(int reg, int ele, TCGMemOp size, TCGv_i32 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i32(var, cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i32(var, cpu_env, offset); + break; + case MO_32: + tcg_gen_st_i32(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_store_element64(int reg, int ele, TCGMemOp size, TCGv_i64 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i64(var, cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i64(var, cpu_env, offset); + break; + case MO_32: + tcg_gen_st32_i64(var, cpu_env, offset); + break; + case MO_64: + tcg_gen_st_i64(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static inline void neon_load_reg64(TCGv_i64 var, int reg) { tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg)); @@ -2974,19 +3075,6 @@ static void gen_vfp_msr(TCGv_i32 tmp) tcg_temp_free_i32(tmp); } -static void gen_neon_dup_u8(TCGv_i32 var, int shift) -{ - TCGv_i32 tmp = tcg_temp_new_i32(); - if (shift) - tcg_gen_shri_i32(var, var, shift); - tcg_gen_ext8u_i32(var, var); - tcg_gen_shli_i32(tmp, var, 8); - tcg_gen_or_i32(var, var, tmp); - tcg_gen_shli_i32(tmp, var, 16); - tcg_gen_or_i32(var, var, tmp); - tcg_temp_free_i32(tmp); -} - static void gen_neon_dup_low16(TCGv_i32 var) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -3005,28 +3093,6 @@ static void gen_neon_dup_high16(TCGv_i32 var) tcg_temp_free_i32(tmp); } -static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size) -{ - /* Load a single Neon element and replicate into a 32 bit TCG reg */ - TCGv_i32 tmp = tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - gen_neon_dup_u8(tmp, 0); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - gen_neon_dup_low16(tmp); - break; - case 2: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: /* Avoid compiler warnings. */ - abort(); - } - return tmp; -} - static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm, uint32_t dp) { @@ -3432,17 +3498,10 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) tmp = load_reg(s, rd); if (insn & (1 << 23)) { /* VDUP */ - if (size == 0) { - gen_neon_dup_u8(tmp, 0); - } else if (size == 1) { - gen_neon_dup_low16(tmp); - } - for (n = 0; n <= pass * 2; n++) { - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rn, n, tmp2); - } - neon_store_reg(rn, n, tmp); + int vec_size = pass ? 16 : 8; + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rn, 0), + vec_size, vec_size, tmp); + tcg_temp_free_i32(tmp); } else { /* VMOV */ switch (size) { @@ -4907,17 +4966,17 @@ static struct { int nregs; int interleave; int spacing; -} neon_ls_element_type[11] = { - {4, 4, 1}, - {4, 4, 2}, +} const neon_ls_element_type[11] = { + {1, 4, 1}, + {1, 4, 2}, {4, 1, 1}, - {4, 2, 1}, - {3, 3, 1}, - {3, 3, 2}, + {2, 2, 2}, + {1, 3, 1}, + {1, 3, 2}, {3, 1, 1}, {1, 1, 1}, - {2, 2, 1}, - {2, 2, 2}, + {1, 2, 1}, + {1, 2, 2}, {2, 1, 1} }; @@ -4933,10 +4992,11 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) int stride; int size; int reg; - int pass; int load; - int shift; int n; + int vec_size; + int mmu_idx; + TCGMemOp endian; TCGv_i32 addr; TCGv_i32 tmp; TCGv_i32 tmp2; @@ -4948,7 +5008,7 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) */ if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } @@ -4958,6 +5018,8 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) rn = (insn >> 16) & 0xf; rm = insn & 0xf; load = (insn & (1 << 21)) != 0; + endian = s->be_data; + mmu_idx = get_mem_index(s); if ((insn & (1 << 23)) == 0) { /* Load store all elements. */ op = (insn >> 8) & 0xf; @@ -4982,104 +5044,44 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) nregs = neon_ls_element_type[op].nregs; interleave = neon_ls_element_type[op].interleave; spacing = neon_ls_element_type[op].spacing; - if (size == 3 && (interleave | spacing) != 1) + if (size == 3 && (interleave | spacing) != 1) { return 1; + } + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + /* Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (interleave == 1 && endian == MO_LE) { + size = 3; + } + tmp64 = tcg_temp_new_i64(); addr = tcg_temp_new_i32(); + tmp2 = tcg_const_i32(1 << size); load_reg_var(s, addr, rn); - stride = (1 << size) * interleave; for (reg = 0; reg < nregs; reg++) { - if (interleave > 2 || (interleave == 2 && nregs == 2)) { - load_reg_var(s, addr, rn); - tcg_gen_addi_i32(addr, addr, (1 << size) * reg); - } else if (interleave == 2 && nregs == 4 && reg == 2) { - load_reg_var(s, addr, rn); - tcg_gen_addi_i32(addr, addr, 1 << size); - } - if (size == 3) { - tmp64 = tcg_temp_new_i64(); - if (load) { - gen_aa32_ld64(s, tmp64, addr, get_mem_index(s)); - neon_store_reg64(tmp64, rd); - } else { - neon_load_reg64(tmp64, rd); - gen_aa32_st64(s, tmp64, addr, get_mem_index(s)); - } - tcg_temp_free_i64(tmp64); - tcg_gen_addi_i32(addr, addr, stride); - } else { - for (pass = 0; pass < 2; pass++) { - if (size == 2) { - if (load) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - neon_store_reg(rd, pass, tmp); - } else { - tmp = neon_load_reg(rd, pass); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - } - tcg_gen_addi_i32(addr, addr, stride); - } else if (size == 1) { - if (load) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - tmp2 = tcg_temp_new_i32(); - gen_aa32_ld16u(s, tmp2, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - tcg_gen_shli_i32(tmp2, tmp2, 16); - tcg_gen_or_i32(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); - neon_store_reg(rd, pass, tmp); - } else { - tmp = neon_load_reg(rd, pass); - tmp2 = tcg_temp_new_i32(); - tcg_gen_shri_i32(tmp2, tmp, 16); - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, stride); - gen_aa32_st16(s, tmp2, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp2); - tcg_gen_addi_i32(addr, addr, stride); - } - } else /* size == 0 */ { - if (load) { - tmp2 = NULL; - for (n = 0; n < 4; n++) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - if (n == 0) { - tmp2 = tmp; - } else { - tcg_gen_shli_i32(tmp, tmp, n * 8); - tcg_gen_or_i32(tmp2, tmp2, tmp); - tcg_temp_free_i32(tmp); - } - } - neon_store_reg(rd, pass, tmp2); - } else { - tmp2 = neon_load_reg(rd, pass); - for (n = 0; n < 4; n++) { - tmp = tcg_temp_new_i32(); - if (n == 0) { - tcg_gen_mov_i32(tmp, tmp2); - } else { - tcg_gen_shri_i32(tmp, tmp2, n * 8); - } - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, stride); - } - tcg_temp_free_i32(tmp2); - } + for (n = 0; n < 8 >> size; n++) { + int xs; + for (xs = 0; xs < interleave; xs++) { + int tt = rd + reg + spacing * xs; + + if (load) { + gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); + neon_store_element64(tt, n, size, tmp64); + } else { + neon_load_element64(tmp64, tt, n, size); + gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); } + tcg_gen_add_i32(addr, addr, tmp2); } } - rd += spacing; } tcg_temp_free_i32(addr); - stride = nregs * 8; + tcg_temp_free_i32(tmp2); + tcg_temp_free_i64(tmp64); + stride = nregs * interleave * 8; } else { size = (insn >> 10) & 3; if (size == 3) { @@ -5106,45 +5108,50 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) } addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); - if (nregs == 1) { - /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */ - tmp = gen_load_and_replicate(s, addr, size); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); - if (insn & (1 << 5)) { - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1)); - } - tcg_temp_free_i32(tmp); - } else { - /* VLD2/3/4 to all lanes: bit 5 indicates register stride */ - stride = (insn & (1 << 5)) ? 2 : 1; - for (reg = 0; reg < nregs; reg++) { - tmp = gen_load_and_replicate(s, addr, size); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, 1 << size); - rd += stride; + + /* VLD1 to all lanes: bit 5 indicates how many Dregs to write. + * VLD2/3/4 to all lanes: bit 5 indicates register stride. + */ + stride = (insn & (1 << 5)) ? 2 : 1; + vec_size = nregs == 1 ? stride * 8 : 8; + + tmp = tcg_temp_new_i32(); + for (reg = 0; reg < nregs; reg++) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + if ((rd & 1) && vec_size == 16) { + /* We cannot write 16 bytes at once because the + * destination is unaligned. + */ + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), + 8, 8, tmp); + tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0), + neon_reg_offset(rd, 0), 8, 8); + } else { + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), + vec_size, vec_size, tmp); } + tcg_gen_addi_i32(addr, addr, 1 << size); + rd += stride; } + tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); stride = (1 << size) * nregs; } else { /* Single element. */ int idx = (insn >> 4) & 0xf; - pass = (insn >> 7) & 1; + int reg_idx; switch (size) { case 0: - shift = ((insn >> 5) & 3) * 8; + reg_idx = (insn >> 5) & 7; stride = 1; break; case 1: - shift = ((insn >> 6) & 1) * 16; + reg_idx = (insn >> 6) & 3; stride = (insn & (1 << 5)) ? 2 : 1; break; case 2: - shift = 0; + reg_idx = (insn >> 7) & 1; stride = (insn & (1 << 6)) ? 2 : 1; break; default: @@ -5184,52 +5191,24 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) */ return 1; } + tmp = tcg_temp_new_i32(); addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); for (reg = 0; reg < nregs; reg++) { if (load) { - tmp = tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - break; - case 2: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: /* Avoid compiler warnings. */ - abort(); - } - if (size != 2) { - tmp2 = neon_load_reg(rd, pass); - tcg_gen_deposit_i32(tmp, tmp2, tmp, - shift, size ? 16 : 8); - tcg_temp_free_i32(tmp2); - } - neon_store_reg(rd, pass, tmp); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + neon_store_element(rd, reg_idx, size, tmp); } else { /* Store */ - tmp = neon_load_reg(rd, pass); - if (shift) - tcg_gen_shri_i32(tmp, tmp, shift); - switch (size) { - case 0: - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - break; - case 2: - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - break; - } - tcg_temp_free_i32(tmp); + neon_load_element(tmp, rd, reg_idx, size); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); } rd += stride; tcg_gen_addi_i32(addr, addr, 1 << size); } tcg_temp_free_i32(addr); + tcg_temp_free_i32(tmp); stride = nregs * (1 << size); } } @@ -5250,14 +5229,6 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) return 0; } -/* Bitwise select. dest = c ? t : f. Clobbers T and F. */ -static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c) -{ - tcg_gen_and_i32(t, t, c); - tcg_gen_andc_i32(f, f, c); - tcg_gen_or_i32(dest, t, f); -} - static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src) { switch (size) { @@ -5464,7 +5435,7 @@ static void gen_neon_narrow_op(int op, int u, int size, #define NEON_3R_VABA 15 #define NEON_3R_VADD_VSUB 16 #define NEON_3R_VTST_VCEQ 17 -#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */ +#define NEON_3R_VML 18 /* VMLA, VMLS */ #define NEON_3R_VMUL 19 #define NEON_3R_VPMAX 20 #define NEON_3R_VPMIN 21 @@ -5689,7 +5660,7 @@ static const uint8_t neon_2rm_sizes[] = { static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, int q, int rd, int rn, int rm) { - if (arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (dc_isar_feature(aa32_rdm, s)) { int opr_sz = (1 + q) * 8; tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), vfp_reg_offset(1, rn), @@ -5700,6 +5671,483 @@ static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, return 1; } +/* + * Expanders for VBitOps_VBIF, VBIT, VBSL. + */ +static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rm); + tcg_gen_and_i64(rn, rn, rd); + tcg_gen_xor_i64(rd, rm, rn); +} + +static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rd); + tcg_gen_and_i64(rn, rn, rm); + tcg_gen_xor_i64(rd, rd, rn); +} + +static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rd); + tcg_gen_andc_i64(rn, rn, rm); + tcg_gen_xor_i64(rd, rd, rn); +} + +static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rm); + tcg_gen_and_vec(vece, rn, rn, rd); + tcg_gen_xor_vec(vece, rd, rm, rn); +} + +static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rd); + tcg_gen_and_vec(vece, rn, rn, rm); + tcg_gen_xor_vec(vece, rd, rd, rn); +} + +static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rd); + tcg_gen_andc_vec(vece, rn, rn, rm); + tcg_gen_xor_vec(vece, rd, rd, rn); +} + +const GVecGen3 bsl_op = { + .fni8 = gen_bsl_i64, + .fniv = gen_bsl_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +const GVecGen3 bit_op = { + .fni8 = gen_bit_i64, + .fniv = gen_bit_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +const GVecGen3 bif_op = { + .fni8 = gen_bif_i64, + .fniv = gen_bif_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_sari_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_sari_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_sari_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +const GVecGen2i ssra_op[4] = { + { .fni8 = gen_ssra8_i64, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_8 }, + { .fni8 = gen_ssra16_i64, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_16 }, + { .fni4 = gen_ssra32_i32, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_32 }, + { .fni8 = gen_ssra64_i64, + .fniv = gen_ssra_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_64 }, +}; + +static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_shri_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +const GVecGen2i usra_op[4] = { + { .fni8 = gen_usra8_i64, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_8, }, + { .fni8 = gen_usra16_i64, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_16, }, + { .fni4 = gen_usra32_i32, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_32, }, + { .fni8 = gen_usra64_i64, + .fniv = gen_usra_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_64, }, +}; + +static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); +} + +static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); +} + +static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + if (sh == 0) { + tcg_gen_mov_vec(d, a); + } else { + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); + tcg_gen_shri_vec(vece, t, a, sh); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); + + tcg_temp_free_vec(t); + tcg_temp_free_vec(m); + } +} + +const GVecGen2i sri_op[4] = { + { .fni8 = gen_shr8_ins_i64, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_8 }, + { .fni8 = gen_shr16_ins_i64, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_16 }, + { .fni4 = gen_shr32_ins_i32, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_32 }, + { .fni8 = gen_shr64_ins_i64, + .fniv = gen_shr_ins_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_64 }, +}; + +static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); +} + +static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); +} + +static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + if (sh == 0) { + tcg_gen_mov_vec(d, a); + } else { + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); + tcg_gen_shli_vec(vece, t, a, sh); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); + + tcg_temp_free_vec(t); + tcg_temp_free_vec(m); + } +} + +const GVecGen2i sli_op[4] = { + { .fni8 = gen_shl8_ins_i64, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_8 }, + { .fni8 = gen_shl16_ins_i64, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_16 }, + { .fni4 = gen_shl32_ins_i32, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_32 }, + { .fni8 = gen_shl64_ins_i64, + .fniv = gen_shl_ins_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_64 }, +}; + +static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_add_u8(d, d, a); +} + +static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_sub_u8(d, d, a); +} + +static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_add_u16(d, d, a); +} + +static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_sub_u16(d, d, a); +} + +static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_add_i32(d, d, a); +} + +static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_sub_i32(d, d, a); +} + +static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_add_i64(d, d, a); +} + +static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_sub_i64(d, d, a); +} + +static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_add_vec(vece, d, d, a); +} + +static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_sub_vec(vece, d, d, a); +} + +/* Note that while NEON does not support VMLA and VMLS as 64-bit ops, + * these tables are shared with AArch64 which does support them. + */ +const GVecGen3 mla_op[4] = { + { .fni4 = gen_mla8_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_8 }, + { .fni4 = gen_mla16_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_mla32_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_mla64_i64, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .vece = MO_64 }, +}; + +const GVecGen3 mls_op[4] = { + { .fni4 = gen_mls8_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_8 }, + { .fni4 = gen_mls16_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_mls32_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_mls64_i64, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .vece = MO_64 }, +}; + +/* CMTST : test is "if (X & Y != 0)". */ +static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_and_i32(d, a, b); + tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0); + tcg_gen_neg_i32(d, d); +} + +void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_and_i64(d, a, b); + tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0); + tcg_gen_neg_i64(d, d); +} + +static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_and_vec(vece, d, a, b); + tcg_gen_dupi_vec(vece, a, 0); + tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); +} + +const GVecGen3 cmtst_op[4] = { + { .fni4 = gen_helper_neon_tst_u8, + .fniv = gen_cmtst_vec, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_tst_u16, + .fniv = gen_cmtst_vec, + .vece = MO_16 }, + { .fni4 = gen_cmtst_i32, + .fniv = gen_cmtst_vec, + .vece = MO_32 }, + { .fni8 = gen_cmtst_i64, + .fniv = gen_cmtst_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -5709,14 +6157,15 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) { int op; int q; - int rd, rn, rm; + int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; int size; int shift; int pass; int count; int pairwise; int u; - uint32_t imm, mask; + int vec_size; + uint32_t imm; TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; TCGv_ptr ptr1, ptr2, ptr3; TCGv_i64 tmp64; @@ -5727,7 +6176,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) */ if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } @@ -5739,6 +6188,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) VFP_DREG_N(rn, insn); VFP_DREG_M(rm, insn); size = (insn >> 20) & 3; + vec_size = q ? 16 : 8; + rd_ofs = neon_reg_offset(rd, 0); + rn_ofs = neon_reg_offset(rn, 0); + rm_ofs = neon_reg_offset(rm, 0); + if ((insn & (1 << 23)) == 0) { /* Three register same length. */ op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); @@ -5763,7 +6217,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } if (!u) { /* SHA-1 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { + if (!dc_isar_feature(aa32_sha1, s)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -5773,7 +6227,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4); tcg_temp_free_i32(tmp4); } else { /* SHA-256 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) { + if (!dc_isar_feature(aa32_sha2, s) || size == 3) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -5829,8 +6283,100 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) q, rd, rn, rm); } return 1; + + case NEON_3R_LOGIC: /* Logic ops. */ + switch ((u << 2) | size) { + case 0: /* VAND */ + tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 1: /* VBIC */ + tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 2: + if (rn == rm) { + /* VMOV */ + tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size); + } else { + /* VORR */ + tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + break; + case 3: /* VORN */ + tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 4: /* VEOR */ + tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 5: /* VBSL */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bsl_op); + break; + case 6: /* VBIT */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bit_op); + break; + case 7: /* VBIF */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bif_op); + break; + } + return 0; + + case NEON_3R_VADD_VSUB: + if (u) { + tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { + tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + return 0; + + case NEON_3R_VMUL: /* VMUL */ + if (u) { + /* Polynomial case allows only P8 and is handled below. */ + if (size != 0) { + return 1; + } + } else { + tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + return 0; + } + break; + + case NEON_3R_VML: /* VMLA, VMLS */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, + u ? &mls_op[size] : &mla_op[size]); + return 0; + + case NEON_3R_VTST_VCEQ: + if (u) { /* VCEQ */ + tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { /* VTST */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &cmtst_op[size]); + } + return 0; + + case NEON_3R_VCGT: + tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size, + rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return 0; + + case NEON_3R_VCGE: + tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size, + rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return 0; } - if (size == 3 && op != NEON_3R_LOGIC) { + + if (size == 3) { /* 64-bit element instructions. */ for (pass = 0; pass < (q ? 2 : 1); pass++) { neon_load_reg64(cpu_V0, rn + pass); @@ -5886,13 +6432,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) cpu_V1, cpu_V0); } break; - case NEON_3R_VADD_VSUB: - if (u) { - tcg_gen_sub_i64(CPU_V001); - } else { - tcg_gen_add_i64(CPU_V001); - } - break; default: abort(); } @@ -5942,12 +6481,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } break; - case NEON_3R_VMUL: - if (u && (size != 0)) { - /* UNDEF on invalid size for polynomial subcase */ - return 1; - } - break; case NEON_3R_VFM_VQRDMLSH: if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) { return 1; @@ -5988,52 +6521,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VRHADD: GEN_NEON_INTEGER_OP(rhadd); break; - case NEON_3R_LOGIC: /* Logic ops. */ - switch ((u << 2) | size) { - case 0: /* VAND */ - tcg_gen_and_i32(tmp, tmp, tmp2); - break; - case 1: /* BIC */ - tcg_gen_andc_i32(tmp, tmp, tmp2); - break; - case 2: /* VORR */ - tcg_gen_or_i32(tmp, tmp, tmp2); - break; - case 3: /* VORN */ - tcg_gen_orc_i32(tmp, tmp, tmp2); - break; - case 4: /* VEOR */ - tcg_gen_xor_i32(tmp, tmp, tmp2); - break; - case 5: /* VBSL */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp, tmp2, tmp3); - tcg_temp_free_i32(tmp3); - break; - case 6: /* VBIT */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp, tmp3, tmp2); - tcg_temp_free_i32(tmp3); - break; - case 7: /* VBIF */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp3, tmp, tmp2); - tcg_temp_free_i32(tmp3); - break; - } - break; case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; case NEON_3R_VQSUB: GEN_NEON_INTEGER_OP_ENV(qsub); break; - case NEON_3R_VCGT: - GEN_NEON_INTEGER_OP(cgt); - break; - case NEON_3R_VCGE: - GEN_NEON_INTEGER_OP(cge); - break; case NEON_3R_VSHL: GEN_NEON_INTEGER_OP(shl); break; @@ -6061,61 +6554,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(rd, pass); gen_neon_add(size, tmp, tmp2); break; - case NEON_3R_VADD_VSUB: - if (!u) { /* VADD */ - gen_neon_add(size, tmp, tmp2); - } else { /* VSUB */ - switch (size) { - case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break; - default: abort(); - } - } - break; - case NEON_3R_VTST_VCEQ: - if (!u) { /* VTST */ - switch (size) { - case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break; - default: abort(); - } - } else { /* VCEQ */ - switch (size) { - case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; - default: abort(); - } - } - break; - case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */ - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tmp2); - tmp2 = neon_load_reg(rd, pass); - if (u) { /* VMLS */ - gen_neon_rsb(size, tmp, tmp2); - } else { /* VMLA */ - gen_neon_add(size, tmp, tmp2); - } - break; case NEON_3R_VMUL: - if (u) { /* polynomial */ - gen_helper_neon_mul_p8(tmp, tmp, tmp2); - } else { /* Integer */ - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: abort(); - } - } + /* VMUL.P8; other cases already eliminated. */ + gen_helper_neon_mul_p8(tmp, tmp, tmp2); break; case NEON_3R_VPMAX: GEN_NEON_INTEGER_OP(pmax); @@ -6297,8 +6738,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) size--; } shift = (insn >> 16) & ((1 << (3 + size)) - 1); - /* To avoid excessive duplication of ops we implement shift - by immediate using the variable shift operations. */ if (op < 8) { /* Shift by immediate: VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ @@ -6310,43 +6749,99 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } /* Right shifts are encoded as N - shift, where N is the element size in bits. */ - if (op <= 4) + if (op <= 4) { shift = shift - (1 << (size + 3)); + } + + switch (op) { + case 0: /* VSHR */ + /* Right shift comes here negative. */ + shift = -shift; + /* Shifts larger than the element size are architecturally + * valid. Unsigned results in all zeros; signed results + * in all sign bits. + */ + if (!u) { + tcg_gen_gvec_sari(size, rd_ofs, rm_ofs, + MIN(shift, (8 << size) - 1), + vec_size, vec_size); + } else if (shift >= 8 << size) { + tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0); + } else { + tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift, + vec_size, vec_size); + } + return 0; + + case 1: /* VSRA */ + /* Right shift comes here negative. */ + shift = -shift; + /* Shifts larger than the element size are architecturally + * valid. Unsigned results in all zeros; signed results + * in all sign bits. + */ + if (!u) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + MIN(shift, (8 << size) - 1), + &ssra_op[size]); + } else if (shift >= 8 << size) { + /* rd += 0 */ + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + shift, &usra_op[size]); + } + return 0; + + case 4: /* VSRI */ + if (!u) { + return 1; + } + /* Right shift comes here negative. */ + shift = -shift; + /* Shift out of range leaves destination unchanged. */ + if (shift < 8 << size) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + shift, &sri_op[size]); + } + return 0; + + case 5: /* VSHL, VSLI */ + if (u) { /* VSLI */ + /* Shift out of range leaves destination unchanged. */ + if (shift < 8 << size) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, + vec_size, shift, &sli_op[size]); + } + } else { /* VSHL */ + /* Shifts larger than the element size are + * architecturally valid and results in zero. + */ + if (shift >= 8 << size) { + tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0); + } else { + tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift, + vec_size, vec_size); + } + } + return 0; + } + if (size == 3) { count = q + 1; } else { count = q ? 4: 2; } - switch (size) { - case 0: - imm = (uint8_t) shift; - imm |= imm << 8; - imm |= imm << 16; - break; - case 1: - imm = (uint16_t) shift; - imm |= imm << 16; - break; - case 2: - case 3: - imm = shift; - break; - default: - abort(); - } + + /* To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + imm = dup_const(size, shift); for (pass = 0; pass < count; pass++) { if (size == 3) { neon_load_reg64(cpu_V0, rm + pass); tcg_gen_movi_i64(cpu_V1, imm); switch (op) { - case 0: /* VSHR */ - case 1: /* VSRA */ - if (u) - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - else - gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1); - break; case 2: /* VRSHR */ case 3: /* VRSRA */ if (u) @@ -6354,10 +6849,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) else gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); break; - case 4: /* VSRI */ - case 5: /* VSHL, VSLI */ - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - break; case 6: /* VQSHLU */ gen_helper_neon_qshlu_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1); @@ -6371,26 +6862,13 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) cpu_V0, cpu_V1); } break; + default: + g_assert_not_reached(); } - if (op == 1 || op == 3) { + if (op == 3) { /* Accumulate. */ neon_load_reg64(cpu_V1, rd + pass); tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1); - } else if (op == 4 || (op == 5 && u)) { - /* Insert */ - neon_load_reg64(cpu_V1, rd + pass); - uint64_t mask; - if (shift < -63 || shift > 63) { - mask = 0; - } else { - if (op == 4) { - mask = 0xffffffffffffffffull >> -shift; - } else { - mask = 0xffffffffffffffffull << shift; - } - } - tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask); - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); } neon_store_reg64(cpu_V0, rd + pass); } else { /* size < 3 */ @@ -6399,23 +6877,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, imm); switch (op) { - case 0: /* VSHR */ - case 1: /* VSRA */ - GEN_NEON_INTEGER_OP(shl); - break; case 2: /* VRSHR */ case 3: /* VRSRA */ GEN_NEON_INTEGER_OP(rshl); break; - case 4: /* VSRI */ - case 5: /* VSHL, VSLI */ - switch (size) { - case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break; - default: abort(); - } - break; case 6: /* VQSHLU */ switch (size) { case 0: @@ -6437,50 +6902,16 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case 7: /* VQSHL */ GEN_NEON_INTEGER_OP_ENV(qshl); break; + default: + g_assert_not_reached(); } tcg_temp_free_i32(tmp2); - if (op == 1 || op == 3) { + if (op == 3) { /* Accumulate. */ tmp2 = neon_load_reg(rd, pass); gen_neon_add(size, tmp, tmp2); tcg_temp_free_i32(tmp2); - } else if (op == 4 || (op == 5 && u)) { - /* Insert */ - switch (size) { - case 0: - if (op == 4) - mask = 0xff >> -shift; - else - mask = (uint8_t)(0xff << shift); - mask |= mask << 8; - mask |= mask << 16; - break; - case 1: - if (op == 4) - mask = 0xffff >> -shift; - else - mask = (uint16_t)(0xffff << shift); - mask |= mask << 16; - break; - case 2: - if (shift < -31 || shift > 31) { - mask = 0; - } else { - if (op == 4) - mask = 0xffffffffu >> -shift; - else - mask = 0xffffffffu << shift; - } - break; - default: - abort(); - } - tmp2 = neon_load_reg(rd, pass); - tcg_gen_andi_i32(tmp, tmp, mask); - tcg_gen_andi_i32(tmp2, tmp2, ~mask); - tcg_gen_or_i32(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); } neon_store_reg(rd, pass, tmp); } @@ -6629,7 +7060,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } } else { /* (insn & 0x00380080) == 0 */ - int invert; + int invert, reg_ofs, vec_size; + if (q && (rd & 1)) { return 1; } @@ -6669,8 +7101,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; case 14: imm |= (imm << 8) | (imm << 16) | (imm << 24); - if (invert) + if (invert) { imm = ~imm; + } break; case 15: if (invert) { @@ -6680,36 +7113,45 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); break; } - if (invert) + if (invert) { imm = ~imm; + } - for (pass = 0; pass < (q ? 4 : 2); pass++) { - if (op & 1 && op < 12) { - tmp = neon_load_reg(rd, pass); - if (invert) { - /* The immediate value has already been inverted, so - BIC becomes AND. */ - tcg_gen_andi_i32(tmp, tmp, imm); - } else { - tcg_gen_ori_i32(tmp, tmp, imm); - } + reg_ofs = neon_reg_offset(rd, 0); + vec_size = q ? 16 : 8; + + if (op & 1 && op < 12) { + if (invert) { + /* The immediate value has already been inverted, + * so BIC becomes AND. + */ + tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm, + vec_size, vec_size); } else { - /* VMOV, VMVN. */ - tmp = tcg_temp_new_i32(); - if (op == 14 && invert) { - int n; - uint32_t val; - val = 0; - for (n = 0; n < 4; n++) { - if (imm & (1 << (n + (pass & 1) * 4))) - val |= 0xff << (n * 8); - } - tcg_gen_movi_i32(tmp, val); - } else { - tcg_gen_movi_i32(tmp, imm); - } + tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm, + vec_size, vec_size); + } + } else { + /* VMOV, VMVN. */ + if (op == 14 && invert) { + TCGv_i64 t64 = tcg_temp_new_i64(); + + for (pass = 0; pass <= q; ++pass) { + uint64_t val = 0; + int n; + + for (n = 0; n < 8; n++) { + if (imm & (1 << (n + pass * 8))) { + val |= 0xffull << (n * 8); + } + } + tcg_gen_movi_i64(t64, val); + neon_store_reg64(t64, rd + pass); + } + tcg_temp_free_i64(t64); + } else { + tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm); } - neon_store_reg(rd, pass, tmp); } } } else { /* (insn & 0x00800010 == 0x00800000) */ @@ -6768,7 +7210,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) if (op == 14 && size == 2) { TCGv_i64 tcg_rn, tcg_rm, tcg_rd; - if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { + if (!dc_isar_feature(aa32_pmull, s)) { return 1; } tcg_rn = tcg_temp_new_i64(); @@ -7085,7 +7527,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) { NeonGenThreeOpEnvFn *fn; - if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (!dc_isar_feature(aa32_rdm, s)) { return 1; } if (u && ((rd | rn) & 1)) { @@ -7359,8 +7801,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; } case NEON_2RM_AESE: case NEON_2RM_AESMC: - if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) - || ((rm | rd) & 1)) { + if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7381,8 +7822,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp3); break; case NEON_2RM_SHA1H: - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1) - || ((rm | rd) & 1)) { + if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7399,10 +7839,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */ if (q) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) { + if (!dc_isar_feature(aa32_sha2, s)) { return 1; } - } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { + } else if (!dc_isar_feature(aa32_sha1, s)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7415,6 +7855,14 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(ptr1); tcg_temp_free_ptr(ptr2); break; + + case NEON_2RM_VMVN: + tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size); + break; + case NEON_2RM_VNEG: + tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size); + break; + default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { @@ -7455,9 +7903,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_VCNT: gen_helper_neon_cnt_u8(tmp, tmp); break; - case NEON_2RM_VMVN: - tcg_gen_not_i32(tmp, tmp); - break; case NEON_2RM_VQABS: switch (size) { case 0: @@ -7530,11 +7975,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) default: abort(); } break; - case NEON_2RM_VNEG: - tmp2 = tcg_const_i32(0); - gen_neon_rsb(size, tmp, tmp2); - tcg_temp_free_i32(tmp2); - break; case NEON_2RM_VCGT0_F: { TCGv_ptr fpstatus = get_fpstatus_ptr(1); @@ -7757,28 +8197,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp); } else if ((insn & 0x380) == 0) { /* VDUP */ + int element; + TCGMemOp size; + if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { return 1; } - if (insn & (1 << 19)) { - tmp = neon_load_reg(rm, 1); - } else { - tmp = neon_load_reg(rm, 0); - } if (insn & (1 << 16)) { - gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8); + size = MO_8; + element = (insn >> 17) & 7; } else if (insn & (1 << 17)) { - if ((insn >> 18) & 1) - gen_neon_dup_high16(tmp); - else - gen_neon_dup_low16(tmp); + size = MO_16; + element = (insn >> 18) & 3; + } else { + size = MO_32; + element = (insn >> 19) & 1; } - for (pass = 0; pass < (q ? 4 : 2); pass++) { - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rd, pass, tmp2); - } - tcg_temp_free_i32(tmp); + tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0), + neon_element_offset(rm, element, size), + q ? 16 : 8, q ? 16 : 8); } else { return 1; } @@ -7813,8 +8250,8 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */ int size = extract32(insn, 20, 1); data = extract32(insn, 23, 2); /* rot */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { + if (!dc_isar_feature(aa32_vcma, s) + || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { return 1; } fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; @@ -7822,15 +8259,15 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */ int size = extract32(insn, 20, 1); data = extract32(insn, 24, 1); /* rot */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { + if (!dc_isar_feature(aa32_vcma, s) + || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { return 1; } fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; } else if ((insn & 0xfeb00f00) == 0xfc200d00) { /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */ bool u = extract32(insn, 4, 1); - if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (!dc_isar_feature(aa32_dp, s)) { return 1; } fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; @@ -7840,7 +8277,7 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } if (!s->vfp_enabled) { @@ -7892,11 +8329,11 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) int size = extract32(insn, 23, 1); int index; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { + if (!dc_isar_feature(aa32_vcma, s)) { return 1; } if (size == 0) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa32_fp16_arith, s)) { return 1; } /* For fp16, rm is just Vm, and index is M. */ @@ -7913,7 +8350,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) } else if ((insn & 0xffb00f00) == 0xfe200d00) { /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */ int u = extract32(insn, 4, 1); - if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (!dc_isar_feature(aa32_dp, s)) { return 1; } fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; @@ -7926,7 +8363,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } if (!s->vfp_enabled) { @@ -8889,8 +9326,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED. * Bits 8, 10 and 11 should be zero. */ - if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 || - (c & 0xd) != 0) { + if (!dc_isar_feature(aa32_crc32, s) || op1 == 0x3 || (c & 0xd) != 0) { goto illegal_op; } @@ -9758,7 +10194,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) case 1: case 3: /* SDIV, UDIV */ - if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) { + if (!dc_isar_feature(arm_div, s)) { goto illegal_op; } if (((insn >> 5) & 7) || (rd != 15)) { @@ -10785,7 +11221,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) case 0x28: case 0x29: case 0x2a: - if (!arm_dc_feature(s, ARM_FEATURE_CRC)) { + if (!dc_isar_feature(aa32_crc32, s)) { goto illegal_op; } break; @@ -10966,7 +11402,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) tmp2 = load_reg(s, rm); if ((op & 0x50) == 0x10) { /* sdiv, udiv */ - if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) { + if (!dc_isar_feature(thumb_div, s)) { goto illegal_op; } if (op & 0x20) @@ -12586,6 +13022,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) CPUARMState *env = cs->env_ptr; ARMCPU *cpu = arm_env_get_cpu(env); + dc->isar = &cpu->isar; dc->pc = dc->base.pc_first; dc->condjmp = 0; @@ -12703,7 +13140,6 @@ static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu) tcg_gen_movi_i32(tmp, 0); store_cpu_field(tmp, condexec_bits); } - tcg_clear_temp_count(); } static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) @@ -13092,11 +13528,6 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) translator_loop(ops, &dc.base, cpu, tb); } -static const char *cpu_mode_names[16] = { - "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt", - "???", "???", "hyp", "und", "???", "???", "???", "sys" -}; - void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -13162,7 +13593,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, psr & CPSR_V ? 'V' : '-', psr & CPSR_T ? 'T' : 'A', ns_status, - cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26); + aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26); } if (flags & CPU_DUMP_FPU) { diff --git a/target/arm/translate.h b/target/arm/translate.h index c1b65f3efb..1550aa8bc7 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -7,6 +7,7 @@ /* internal defines */ typedef struct DisasContext { DisasContextBase base; + const ARMISARegisters *isar; target_ulong pc; target_ulong page_start; @@ -190,4 +191,24 @@ static inline TCGv_i32 get_ahp_flag(void) return ret; } + +/* Vector operations shared between ARM and AArch64. */ +extern const GVecGen3 bsl_op; +extern const GVecGen3 bit_op; +extern const GVecGen3 bif_op; +extern const GVecGen3 mla_op[4]; +extern const GVecGen3 mls_op[4]; +extern const GVecGen3 cmtst_op[4]; +extern const GVecGen2i ssra_op[4]; +extern const GVecGen2i usra_op[4]; +extern const GVecGen2i sri_op[4]; +extern const GVecGen2i sli_op[4]; +void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); + +/* + * Forward to the isar_feature_* tests given a DisasContext pointer. + */ +#define dc_isar_feature(name, ctx) \ + ({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); }) + #endif /* TARGET_ARM_TRANSLATE_H */