diff --git a/hw/arm/virt.c b/hw/arm/virt.c index a4537af400..b334c82eda 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1404,7 +1404,7 @@ static void machvirt_init(MachineState *machine) "secure-memory", &error_abort); } - object_property_set_bool(cpuobj, true, "realized", NULL); + object_property_set_bool(cpuobj, true, "realized", &error_fatal); object_unref(cpuobj); } fdt_add_timer_nodes(vms); diff --git a/hw/display/pl110.c b/hw/display/pl110.c index 8c7dcc6f0a..cf68457fd1 100644 --- a/hw/display/pl110.c +++ b/hw/display/pl110.c @@ -12,6 +12,7 @@ #include "ui/console.h" #include "framebuffer.h" #include "ui/pixel_ops.h" +#include "qemu/timer.h" #include "qemu/log.h" #define PL110_CR_EN 0x001 @@ -19,6 +20,8 @@ #define PL110_CR_BEBO 0x200 #define PL110_CR_BEPO 0x400 #define PL110_CR_PWR 0x800 +#define PL110_IE_NB 0x004 +#define PL110_IE_VC 0x008 enum pl110_bppmode { @@ -50,6 +53,7 @@ typedef struct PL110State { MemoryRegion iomem; MemoryRegionSection fbsection; QemuConsole *con; + QEMUTimer *vblank_timer; int version; uint32_t timing[4]; @@ -320,7 +324,24 @@ static void pl110_resize(PL110State *s, int width, int height) /* Update interrupts. */ static void pl110_update(PL110State *s) { - /* TODO: Implement interrupts. */ + /* Raise IRQ if enabled and any status bit is 1 */ + if (s->int_status & s->int_mask) { + qemu_irq_raise(s->irq); + } else { + qemu_irq_lower(s->irq); + } +} + +static void pl110_vblank_interrupt(void *opaque) +{ + PL110State *s = opaque; + + /* Fire the vertical compare and next base IRQs and re-arm */ + s->int_status |= (PL110_IE_NB | PL110_IE_VC); + timer_mod(s->vblank_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + NANOSECONDS_PER_SECOND / 60); + pl110_update(s); } static uint64_t pl110_read(void *opaque, hwaddr offset, @@ -429,6 +450,11 @@ static void pl110_write(void *opaque, hwaddr offset, s->bpp = (val >> 1) & 7; if (pl110_enabled(s)) { qemu_console_resize(s->con, s->cols, s->rows); + timer_mod(s->vblank_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + NANOSECONDS_PER_SECOND / 60); + } else { + timer_del(s->vblank_timer); } break; case 10: /* LCDICR */ @@ -474,6 +500,8 @@ static void pl110_realize(DeviceState *dev, Error **errp) memory_region_init_io(&s->iomem, OBJECT(s), &pl110_ops, s, "pl110", 0x1000); sysbus_init_mmio(sbd, &s->iomem); sysbus_init_irq(sbd, &s->irq); + s->vblank_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + pl110_vblank_interrupt, s); qdev_init_gpio_in(dev, pl110_mux_ctrl_set, 1); s->con = graphic_console_init(dev, 0, &pl110_gfx_ops, s); } diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c index d701e49ff9..724bc9fa61 100644 --- a/hw/intc/arm_gic.c +++ b/hw/intc/arm_gic.c @@ -93,6 +93,7 @@ void gic_update(GICState *s) best_irq = 1023; for (irq = 0; irq < s->num_irq; irq++) { if (GIC_TEST_ENABLED(irq, cm) && gic_test_pending(s, irq, cm) && + (!GIC_TEST_ACTIVE(irq, cm)) && (irq < GIC_INTERNAL || GIC_TARGET(irq) & cm)) { if (GIC_GET_PRIORITY(irq, cpu) < best_prio) { best_prio = GIC_GET_PRIORITY(irq, cpu); @@ -255,7 +256,8 @@ static int gic_get_group_priority(GICState *s, int cpu, int irq) if (gic_has_groups(s) && !(s->cpu_ctlr[cpu] & GICC_CTLR_CBPR) && GIC_TEST_GROUP(irq, (1 << cpu))) { - bpr = s->abpr[cpu]; + bpr = s->abpr[cpu] - 1; + assert(bpr >= 0); } else { bpr = s->bpr[cpu]; } @@ -503,6 +505,11 @@ static void gic_set_cpu_control(GICState *s, int cpu, uint32_t value, static uint8_t gic_get_running_priority(GICState *s, int cpu, MemTxAttrs attrs) { + if ((s->revision != REV_11MPCORE) && (s->running_priority[cpu] > 0xff)) { + /* Idle priority */ + return 0xff; + } + if (s->security_extn && !attrs.secure) { if (s->running_priority[cpu] & 0x80) { /* Running priority in upper half of range: return the Non-secure @@ -1205,8 +1212,13 @@ static MemTxResult gic_cpu_read(GICState *s, int cpu, int offset, break; case 0x08: /* Binary Point */ if (s->security_extn && !attrs.secure) { - /* BPR is banked. Non-secure copy stored in ABPR. */ - *data = s->abpr[cpu]; + if (s->cpu_ctlr[cpu] & GICC_CTLR_CBPR) { + /* NS view of BPR when CBPR is 1 */ + *data = MIN(s->bpr[cpu] + 1, 7); + } else { + /* BPR is banked. Non-secure copy stored in ABPR. */ + *data = s->abpr[cpu]; + } } else { *data = s->bpr[cpu]; } @@ -1279,7 +1291,12 @@ static MemTxResult gic_cpu_write(GICState *s, int cpu, int offset, break; case 0x08: /* Binary Point */ if (s->security_extn && !attrs.secure) { - s->abpr[cpu] = MAX(value & 0x7, GIC_MIN_ABPR); + if (s->cpu_ctlr[cpu] & GICC_CTLR_CBPR) { + /* WI when CBPR is 1 */ + return MEMTX_OK; + } else { + s->abpr[cpu] = MAX(value & 0x7, GIC_MIN_ABPR); + } } else { s->bpr[cpu] = MAX(value & 0x7, GIC_MIN_BPR); } diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index 4fb48f62ba..9506f9b69f 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -595,19 +595,16 @@ static void imx_eth_do_tx(IMXFECState *s, uint32_t index) static void imx_eth_enable_rx(IMXFECState *s, bool flush) { IMXFECBufDesc bd; - bool rx_ring_full; imx_fec_read_bd(&bd, s->rx_descriptor); - rx_ring_full = !(bd.flags & ENET_BD_E); + s->regs[ENET_RDAR] = (bd.flags & ENET_BD_E) ? ENET_RDAR_RDAR : 0; - if (rx_ring_full) { + if (!s->regs[ENET_RDAR]) { FEC_PRINTF("RX buffer full\n"); } else if (flush) { qemu_flush_queued_packets(qemu_get_queue(s->nic)); } - - s->regs[ENET_RDAR] = rx_ring_full ? 0 : ENET_RDAR_RDAR; } static void imx_eth_reset(DeviceState *d) @@ -866,7 +863,6 @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value, case ENET_RDAR: if (s->regs[ENET_ECR] & ENET_ECR_ETHEREN) { if (!s->regs[index]) { - s->regs[index] = ENET_RDAR_RDAR; imx_eth_enable_rx(s, true); } } else { diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index f9264d3be5..fac7fa5c72 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -1388,6 +1388,7 @@ static void sdhci_sysbus_realize(DeviceState *dev, Error ** errp) } if (s->dma_mr) { + s->dma_as = &s->sysbus_dma_as; address_space_init(s->dma_as, s->dma_mr, "sdhci-dma"); } else { /* use system_memory() if property "dma" not set */ diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c index 85c5d0cb92..8af36ca3d4 100644 --- a/hw/ssi/xilinx_spips.c +++ b/hw/ssi/xilinx_spips.c @@ -210,6 +210,9 @@ #define SNOOP_NONE 0xEE #define SNOOP_STRIPING 0 +#define MIN_NUM_BUSSES 1 +#define MAX_NUM_BUSSES 2 + static inline int num_effective_busses(XilinxSPIPS *s) { return (s->regs[R_LQSPI_CFG] & LQSPI_CFG_SEP_BUS && @@ -573,7 +576,7 @@ static void xilinx_spips_flush_txfifo(XilinxSPIPS *s) for (;;) { int i; uint8_t tx = 0; - uint8_t tx_rx[num_effective_busses(s)]; + uint8_t tx_rx[MAX_NUM_BUSSES] = { 0 }; uint8_t dummy_cycles = 0; uint8_t addr_length; @@ -1221,6 +1224,19 @@ static void xilinx_spips_realize(DeviceState *dev, Error **errp) DB_PRINT_L(0, "realized spips\n"); + if (s->num_busses > MAX_NUM_BUSSES) { + error_setg(errp, + "requested number of SPI busses %u exceeds maximum %d", + s->num_busses, MAX_NUM_BUSSES); + return; + } + if (s->num_busses < MIN_NUM_BUSSES) { + error_setg(errp, + "requested number of SPI busses %u is below minimum %d", + s->num_busses, MIN_NUM_BUSSES); + return; + } + s->spi = g_new(SSIBus *, s->num_busses); for (i = 0; i < s->num_busses; ++i) { char bus_name[16]; diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h index cb37182536..1cf70f8c23 100644 --- a/include/hw/sd/sdhci.h +++ b/include/hw/sd/sdhci.h @@ -41,6 +41,7 @@ typedef struct SDHCIState { /*< public >*/ SDBus sdbus; MemoryRegion iomem; + AddressSpace sysbus_dma_as; AddressSpace *dma_as; MemoryRegion *dma_mr; diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 88b55df5ae..8c3889433c 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -905,6 +905,9 @@ extern const VMStateInfo vmstate_info_qtailq; #define VMSTATE_UINT32_ARRAY(_f, _s, _n) \ VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0) +#define VMSTATE_UINT32_SUB_ARRAY(_f, _s, _start, _num) \ + VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint32, uint32_t) + #define VMSTATE_UINT32_2DARRAY(_f, _s, _n1, _n2) \ VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, 0) @@ -914,6 +917,9 @@ extern const VMStateInfo vmstate_info_qtailq; #define VMSTATE_UINT64_ARRAY(_f, _s, _n) \ VMSTATE_UINT64_ARRAY_V(_f, _s, _n, 0) +#define VMSTATE_UINT64_SUB_ARRAY(_f, _s, _start, _num) \ + VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint64, uint64_t) + #define VMSTATE_UINT64_2DARRAY(_f, _s, _n1, _n2) \ VMSTATE_UINT64_2DARRAY_V(_f, _s, _n1, _n2, 0) @@ -932,9 +938,6 @@ extern const VMStateInfo vmstate_info_qtailq; #define VMSTATE_INT32_ARRAY(_f, _s, _n) \ VMSTATE_INT32_ARRAY_V(_f, _s, _n, 0) -#define VMSTATE_UINT32_SUB_ARRAY(_f, _s, _start, _num) \ - VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint32, uint32_t) - #define VMSTATE_INT64_ARRAY_V(_f, _s, _n, _v) \ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int64, int64_t) diff --git a/linux-user/signal.c b/linux-user/signal.c index f85f0dd780..5321f9e795 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -1487,12 +1487,13 @@ static int target_setup_sigframe(struct target_rt_sigframe *sf, } for (i = 0; i < 32; i++) { + uint64_t *q = aa64_vfp_qreg(env, i); #ifdef TARGET_WORDS_BIGENDIAN - __put_user(env->vfp.regs[i * 2], &aux->fpsimd.vregs[i * 2 + 1]); - __put_user(env->vfp.regs[i * 2 + 1], &aux->fpsimd.vregs[i * 2]); + __put_user(q[0], &aux->fpsimd.vregs[i * 2 + 1]); + __put_user(q[1], &aux->fpsimd.vregs[i * 2]); #else - __put_user(env->vfp.regs[i * 2], &aux->fpsimd.vregs[i * 2]); - __put_user(env->vfp.regs[i * 2 + 1], &aux->fpsimd.vregs[i * 2 + 1]); + __put_user(q[0], &aux->fpsimd.vregs[i * 2]); + __put_user(q[1], &aux->fpsimd.vregs[i * 2 + 1]); #endif } __put_user(vfp_get_fpsr(env), &aux->fpsimd.fpsr); @@ -1539,12 +1540,13 @@ static int target_restore_sigframe(CPUARMState *env, } for (i = 0; i < 32; i++) { + uint64_t *q = aa64_vfp_qreg(env, i); #ifdef TARGET_WORDS_BIGENDIAN - __get_user(env->vfp.regs[i * 2], &aux->fpsimd.vregs[i * 2 + 1]); - __get_user(env->vfp.regs[i * 2 + 1], &aux->fpsimd.vregs[i * 2]); + __get_user(q[0], &aux->fpsimd.vregs[i * 2 + 1]); + __get_user(q[1], &aux->fpsimd.vregs[i * 2]); #else - __get_user(env->vfp.regs[i * 2], &aux->fpsimd.vregs[i * 2]); - __get_user(env->vfp.regs[i * 2 + 1], &aux->fpsimd.vregs[i * 2 + 1]); + __get_user(q[0], &aux->fpsimd.vregs[i * 2]); + __get_user(q[1], &aux->fpsimd.vregs[i * 2 + 1]); #endif } __get_user(fpsr, &aux->fpsimd.fpsr); @@ -1903,7 +1905,7 @@ static abi_ulong *setup_sigframe_v2_vfp(abi_ulong *regspace, CPUARMState *env) __put_user(TARGET_VFP_MAGIC, &vfpframe->magic); __put_user(sizeof(*vfpframe), &vfpframe->size); for (i = 0; i < 32; i++) { - __put_user(float64_val(env->vfp.regs[i]), &vfpframe->ufp.fpregs[i]); + __put_user(*aa32_vfp_dreg(env, i), &vfpframe->ufp.fpregs[i]); } __put_user(vfp_get_fpscr(env), &vfpframe->ufp.fpscr); __put_user(env->vfp.xregs[ARM_VFP_FPEXC], &vfpframe->ufp_exc.fpexc); @@ -2210,7 +2212,7 @@ static abi_ulong *restore_sigframe_v2_vfp(CPUARMState *env, abi_ulong *regspace) return 0; } for (i = 0; i < 32; i++) { - __get_user(float64_val(env->vfp.regs[i]), &vfpframe->ufp.fpregs[i]); + __get_user(*aa32_vfp_dreg(env, i), &vfpframe->ufp.fpregs[i]); } __get_user(fpscr, &vfpframe->ufp.fpscr); vfp_set_fpscr(env, fpscr); diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c index 9e5b2fb31c..26a2c09868 100644 --- a/target/arm/arch_dump.c +++ b/target/arm/arch_dump.c @@ -99,8 +99,10 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, aarch64_note_init(¬e, s, "CORE", 5, NT_PRFPREG, sizeof(note.vfp)); - for (i = 0; i < 64; ++i) { - note.vfp.vregs[i] = cpu_to_dump64(s, float64_val(env->vfp.regs[i])); + for (i = 0; i < 32; ++i) { + uint64_t *q = aa64_vfp_qreg(env, i); + note.vfp.vregs[2*i + 0] = cpu_to_dump64(s, q[0]); + note.vfp.vregs[2*i + 1] = cpu_to_dump64(s, q[1]); } if (s->dump_info.d_endian == ELFDATA2MSB) { @@ -229,7 +231,7 @@ static int arm_write_elf32_vfp(WriteCoreDumpFunction f, CPUARMState *env, arm_note_init(¬e, s, "LINUX", 6, NT_ARM_VFP, sizeof(note.vfp)); for (i = 0; i < 32; ++i) { - note.vfp.vregs[i] = cpu_to_dump64(s, float64_val(env->vfp.regs[i])); + note.vfp.vregs[i] = cpu_to_dump64(s, *aa32_vfp_dreg(env, i)); } note.vfp.fpscr = cpu_to_dump32(s, vfp_get_fpscr(env)); diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 96316700dd..d2bb59eded 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -492,7 +492,7 @@ typedef struct CPUARMState { * the two execution states, and means we do not need to explicitly * map these registers when changing states. */ - float64 regs[64]; + uint64_t regs[64]; uint32_t xregs[16]; /* We store these fpcsr fields separately for convenience. */ @@ -1340,6 +1340,7 @@ enum arm_features { ARM_FEATURE_VBAR, /* has cp15 VBAR */ ARM_FEATURE_M_SECURITY, /* M profile Security Extension */ ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */ + ARM_FEATURE_SVE, /* has Scalable Vector Extension */ }; static inline int arm_feature(CPUARMState *env, int feature) @@ -2666,71 +2667,6 @@ static inline bool bswap_code(bool sctlr_b) #endif } -/* Return the exception level to which FP-disabled exceptions should - * be taken, or 0 if FP is enabled. - */ -static inline int fp_exception_el(CPUARMState *env) -{ - int fpen; - int cur_el = arm_current_el(env); - - /* CPACR and the CPTR registers don't exist before v6, so FP is - * always accessible - */ - if (!arm_feature(env, ARM_FEATURE_V6)) { - return 0; - } - - /* The CPACR controls traps to EL1, or PL1 if we're 32 bit: - * 0, 2 : trap EL0 and EL1/PL1 accesses - * 1 : trap only EL0 accesses - * 3 : trap no accesses - */ - fpen = extract32(env->cp15.cpacr_el1, 20, 2); - switch (fpen) { - case 0: - case 2: - if (cur_el == 0 || cur_el == 1) { - /* Trap to PL1, which might be EL1 or EL3 */ - if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { - return 3; - } - return 1; - } - if (cur_el == 3 && !is_a64(env)) { - /* Secure PL1 running at EL3 */ - return 3; - } - break; - case 1: - if (cur_el == 0) { - return 1; - } - break; - case 3: - break; - } - - /* For the CPTR registers we don't need to guard with an ARM_FEATURE - * check because zero bits in the registers mean "don't trap". - */ - - /* CPTR_EL2 : present in v7VE or v8 */ - if (cur_el <= 2 && extract32(env->cp15.cptr_el[2], 10, 1) - && !arm_is_secure_below_el3(env)) { - /* Trap FP ops at EL2, NS-EL1 or NS-EL0 to EL2 */ - return 2; - } - - /* CPTR_EL3 : present in v8 */ - if (extract32(env->cp15.cptr_el[3], 10, 1)) { - /* Trap all FP ops to EL3 */ - return 3; - } - - return 0; -} - #ifdef CONFIG_USER_ONLY static inline bool arm_cpu_bswap_data(CPUARMState *env) { @@ -2777,66 +2713,8 @@ static inline uint32_t arm_regime_tbi1(CPUARMState *env, ARMMMUIdx mmu_idx) } #endif -static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) -{ - ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false)); - if (is_a64(env)) { - *pc = env->pc; - *flags = ARM_TBFLAG_AARCH64_STATE_MASK; - /* Get control bits for tagged addresses */ - *flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT); - *flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT); - } else { - *pc = env->regs[15]; - *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT) - | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT) - | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT) - | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT) - | (arm_sctlr_b(env) << ARM_TBFLAG_SCTLR_B_SHIFT); - if (!(access_secure_reg(env))) { - *flags |= ARM_TBFLAG_NS_MASK; - } - if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30) - || arm_el_is_aa64(env, 1)) { - *flags |= ARM_TBFLAG_VFPEN_MASK; - } - *flags |= (extract32(env->cp15.c15_cpar, 0, 2) - << ARM_TBFLAG_XSCALE_CPAR_SHIFT); - } - - *flags |= (arm_to_core_mmu_idx(mmu_idx) << ARM_TBFLAG_MMUIDX_SHIFT); - - /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine - * states defined in the ARM ARM for software singlestep: - * SS_ACTIVE PSTATE.SS State - * 0 x Inactive (the TB flag for SS is always 0) - * 1 0 Active-pending - * 1 1 Active-not-pending - */ - if (arm_singlestep_active(env)) { - *flags |= ARM_TBFLAG_SS_ACTIVE_MASK; - if (is_a64(env)) { - if (env->pstate & PSTATE_SS) { - *flags |= ARM_TBFLAG_PSTATE_SS_MASK; - } - } else { - if (env->uncached_cpsr & PSTATE_SS) { - *flags |= ARM_TBFLAG_PSTATE_SS_MASK; - } - } - } - if (arm_cpu_data_is_big_endian(env)) { - *flags |= ARM_TBFLAG_BE_DATA_MASK; - } - *flags |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT; - - if (arm_v7m_is_handler_mode(env)) { - *flags |= ARM_TBFLAG_HANDLER_MASK; - } - - *cs_base = 0; -} +void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, + target_ulong *cs_base, uint32_t *flags); enum { QEMU_PSCI_CONDUIT_DISABLED = 0, @@ -2885,4 +2763,31 @@ static inline void *arm_get_el_change_hook_opaque(ARMCPU *cpu) return cpu->el_change_hook_opaque; } +/** + * aa32_vfp_dreg: + * Return a pointer to the Dn register within env in 32-bit mode. + */ +static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno) +{ + return &env->vfp.regs[regno]; +} + +/** + * aa32_vfp_qreg: + * Return a pointer to the Qn register within env in 32-bit mode. + */ +static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno) +{ + return &env->vfp.regs[2 * regno]; +} + +/** + * aa64_vfp_qreg: + * Return a pointer to the Qn register within env in 64-bit mode. + */ +static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) +{ + return &env->vfp.regs[2 * regno]; +} + #endif diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c index 3b6df3f41a..9ca0bdead7 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/crypto_helper.c @@ -30,20 +30,14 @@ union CRYPTO_STATE { #define CR_ST_WORD(state, i) (state.words[i]) #endif -void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm, - uint32_t decrypt) +void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) { static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; - - union CRYPTO_STATE rk = { .l = { - float64_val(env->vfp.regs[rm]), - float64_val(env->vfp.regs[rm + 1]) - } }; - union CRYPTO_STATE st = { .l = { - float64_val(env->vfp.regs[rd]), - float64_val(env->vfp.regs[rd + 1]) - } }; + uint64_t *rd = vd; + uint64_t *rm = vm; + union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; + union CRYPTO_STATE st = { .l = { rd[0], rd[1] } }; int i; assert(decrypt < 2); @@ -57,12 +51,11 @@ void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm, CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])]; } - env->vfp.regs[rd] = make_float64(st.l[0]); - env->vfp.regs[rd + 1] = make_float64(st.l[1]); + rd[0] = st.l[0]; + rd[1] = st.l[1]; } -void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, - uint32_t decrypt) +void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) { static uint32_t const mc[][256] = { { /* MixColumns lookup table */ @@ -197,10 +190,10 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, } }; - union CRYPTO_STATE st = { .l = { - float64_val(env->vfp.regs[rm]), - float64_val(env->vfp.regs[rm + 1]) - } }; + + uint64_t *rd = vd; + uint64_t *rm = vm; + union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; int i; assert(decrypt < 2); @@ -213,8 +206,8 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24); } - env->vfp.regs[rd] = make_float64(st.l[0]); - env->vfp.regs[rd + 1] = make_float64(st.l[1]); + rd[0] = st.l[0]; + rd[1] = st.l[1]; } /* @@ -236,21 +229,14 @@ static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) return (x & y) | ((x | y) & z); } -void HELPER(crypto_sha1_3reg)(CPUARMState *env, uint32_t rd, uint32_t rn, - uint32_t rm, uint32_t op) +void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op) { - union CRYPTO_STATE d = { .l = { - float64_val(env->vfp.regs[rd]), - float64_val(env->vfp.regs[rd + 1]) - } }; - union CRYPTO_STATE n = { .l = { - float64_val(env->vfp.regs[rn]), - float64_val(env->vfp.regs[rn + 1]) - } }; - union CRYPTO_STATE m = { .l = { - float64_val(env->vfp.regs[rm]), - float64_val(env->vfp.regs[rm + 1]) - } }; + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; if (op == 3) { /* sha1su0 */ d.l[0] ^= d.l[1] ^ m.l[0]; @@ -284,42 +270,37 @@ void HELPER(crypto_sha1_3reg)(CPUARMState *env, uint32_t rd, uint32_t rn, CR_ST_WORD(d, 0) = t; } } - env->vfp.regs[rd] = make_float64(d.l[0]); - env->vfp.regs[rd + 1] = make_float64(d.l[1]); + rd[0] = d.l[0]; + rd[1] = d.l[1]; } -void HELPER(crypto_sha1h)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(crypto_sha1h)(void *vd, void *vm) { - union CRYPTO_STATE m = { .l = { - float64_val(env->vfp.regs[rm]), - float64_val(env->vfp.regs[rm + 1]) - } }; + uint64_t *rd = vd; + uint64_t *rm = vm; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; - env->vfp.regs[rd] = make_float64(m.l[0]); - env->vfp.regs[rd + 1] = make_float64(m.l[1]); + rd[0] = m.l[0]; + rd[1] = m.l[1]; } -void HELPER(crypto_sha1su1)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(crypto_sha1su1)(void *vd, void *vm) { - union CRYPTO_STATE d = { .l = { - float64_val(env->vfp.regs[rd]), - float64_val(env->vfp.regs[rd + 1]) - } }; - union CRYPTO_STATE m = { .l = { - float64_val(env->vfp.regs[rm]), - float64_val(env->vfp.regs[rm + 1]) - } }; + uint64_t *rd = vd; + uint64_t *rm = vm; + union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); - env->vfp.regs[rd] = make_float64(d.l[0]); - env->vfp.regs[rd + 1] = make_float64(d.l[1]); + rd[0] = d.l[0]; + rd[1] = d.l[1]; } /* @@ -347,21 +328,14 @@ static uint32_t s1(uint32_t x) return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); } -void HELPER(crypto_sha256h)(CPUARMState *env, uint32_t rd, uint32_t rn, - uint32_t rm) +void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) { - union CRYPTO_STATE d = { .l = { - float64_val(env->vfp.regs[rd]), - float64_val(env->vfp.regs[rd + 1]) - } }; - union CRYPTO_STATE n = { .l = { - float64_val(env->vfp.regs[rn]), - float64_val(env->vfp.regs[rn + 1]) - } }; - union CRYPTO_STATE m = { .l = { - float64_val(env->vfp.regs[rm]), - float64_val(env->vfp.regs[rm + 1]) - } }; + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; int i; for (i = 0; i < 4; i++) { @@ -383,25 +357,18 @@ void HELPER(crypto_sha256h)(CPUARMState *env, uint32_t rd, uint32_t rn, CR_ST_WORD(d, 0) = t; } - env->vfp.regs[rd] = make_float64(d.l[0]); - env->vfp.regs[rd + 1] = make_float64(d.l[1]); + rd[0] = d.l[0]; + rd[1] = d.l[1]; } -void HELPER(crypto_sha256h2)(CPUARMState *env, uint32_t rd, uint32_t rn, - uint32_t rm) +void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) { - union CRYPTO_STATE d = { .l = { - float64_val(env->vfp.regs[rd]), - float64_val(env->vfp.regs[rd + 1]) - } }; - union CRYPTO_STATE n = { .l = { - float64_val(env->vfp.regs[rn]), - float64_val(env->vfp.regs[rn + 1]) - } }; - union CRYPTO_STATE m = { .l = { - float64_val(env->vfp.regs[rm]), - float64_val(env->vfp.regs[rm + 1]) - } }; + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; int i; for (i = 0; i < 4; i++) { @@ -415,51 +382,40 @@ void HELPER(crypto_sha256h2)(CPUARMState *env, uint32_t rd, uint32_t rn, CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; } - env->vfp.regs[rd] = make_float64(d.l[0]); - env->vfp.regs[rd + 1] = make_float64(d.l[1]); + rd[0] = d.l[0]; + rd[1] = d.l[1]; } -void HELPER(crypto_sha256su0)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(crypto_sha256su0)(void *vd, void *vm) { - union CRYPTO_STATE d = { .l = { - float64_val(env->vfp.regs[rd]), - float64_val(env->vfp.regs[rd + 1]) - } }; - union CRYPTO_STATE m = { .l = { - float64_val(env->vfp.regs[rm]), - float64_val(env->vfp.regs[rm + 1]) - } }; + uint64_t *rd = vd; + uint64_t *rm = vm; + union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); - env->vfp.regs[rd] = make_float64(d.l[0]); - env->vfp.regs[rd + 1] = make_float64(d.l[1]); + rd[0] = d.l[0]; + rd[1] = d.l[1]; } -void HELPER(crypto_sha256su1)(CPUARMState *env, uint32_t rd, uint32_t rn, - uint32_t rm) +void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) { - union CRYPTO_STATE d = { .l = { - float64_val(env->vfp.regs[rd]), - float64_val(env->vfp.regs[rd + 1]) - } }; - union CRYPTO_STATE n = { .l = { - float64_val(env->vfp.regs[rn]), - float64_val(env->vfp.regs[rn + 1]) - } }; - union CRYPTO_STATE m = { .l = { - float64_val(env->vfp.regs[rm]), - float64_val(env->vfp.regs[rm + 1]) - } }; + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); - env->vfp.regs[rd] = make_float64(d.l[0]); - env->vfp.regs[rd + 1] = make_float64(d.l[1]); + rd[0] = d.l[0]; + rd[1] = d.l[1]; } diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 3e00a9ead1..06fd321fae 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -153,13 +153,14 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices, if (index < 16 * numregs) { /* Convert index (a byte offset into the virtual table * which is a series of 128-bit vectors concatenated) - * into the correct vfp.regs[] element plus a bit offset + * into the correct register element plus a bit offset * into that element, bearing in mind that the table * can wrap around from V31 to V0. */ int elt = (rn * 2 + (index >> 3)) % 64; int bitidx = (index & 7) * 8; - uint64_t val = extract64(env->vfp.regs[elt], bitidx, 8); + uint64_t *q = aa64_vfp_qreg(env, elt >> 1); + uint64_t val = extract64(q[elt & 1], bitidx, 8); result = deposit64(result, shift, 8, val); } diff --git a/target/arm/helper.c b/target/arm/helper.c index c83c901a86..bfce09643b 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -64,15 +64,16 @@ static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) /* VFP data registers are always little-endian. */ nregs = arm_feature(env, ARM_FEATURE_VFP3) ? 32 : 16; if (reg < nregs) { - stfq_le_p(buf, env->vfp.regs[reg]); + stq_le_p(buf, *aa32_vfp_dreg(env, reg)); return 8; } if (arm_feature(env, ARM_FEATURE_NEON)) { /* Aliases for Q regs. */ nregs += 16; if (reg < nregs) { - stfq_le_p(buf, env->vfp.regs[(reg - 32) * 2]); - stfq_le_p(buf + 8, env->vfp.regs[(reg - 32) * 2 + 1]); + uint64_t *q = aa32_vfp_qreg(env, reg - 32); + stq_le_p(buf, q[0]); + stq_le_p(buf + 8, q[1]); return 16; } } @@ -90,14 +91,15 @@ static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) nregs = arm_feature(env, ARM_FEATURE_VFP3) ? 32 : 16; if (reg < nregs) { - env->vfp.regs[reg] = ldfq_le_p(buf); + *aa32_vfp_dreg(env, reg) = ldq_le_p(buf); return 8; } if (arm_feature(env, ARM_FEATURE_NEON)) { nregs += 16; if (reg < nregs) { - env->vfp.regs[(reg - 32) * 2] = ldfq_le_p(buf); - env->vfp.regs[(reg - 32) * 2 + 1] = ldfq_le_p(buf + 8); + uint64_t *q = aa32_vfp_qreg(env, reg - 32); + q[0] = ldq_le_p(buf); + q[1] = ldq_le_p(buf + 8); return 16; } } @@ -114,9 +116,12 @@ static int aarch64_fpu_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) switch (reg) { case 0 ... 31: /* 128 bit FP register */ - stfq_le_p(buf, env->vfp.regs[reg * 2]); - stfq_le_p(buf + 8, env->vfp.regs[reg * 2 + 1]); - return 16; + { + uint64_t *q = aa64_vfp_qreg(env, reg); + stq_le_p(buf, q[0]); + stq_le_p(buf + 8, q[1]); + return 16; + } case 32: /* FPSR */ stl_p(buf, vfp_get_fpsr(env)); @@ -135,9 +140,12 @@ static int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) switch (reg) { case 0 ... 31: /* 128 bit FP register */ - env->vfp.regs[reg * 2] = ldfq_le_p(buf); - env->vfp.regs[reg * 2 + 1] = ldfq_le_p(buf + 8); - return 16; + { + uint64_t *q = aa64_vfp_qreg(env, reg); + q[0] = ldq_le_p(buf); + q[1] = ldq_le_p(buf + 8); + return 16; + } case 32: /* FPSR */ vfp_set_fpsr(env, ldl_p(buf)); @@ -8360,7 +8368,7 @@ static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, MemTxAttrs attrs = {}; MemTxResult result = MEMTX_OK; AddressSpace *as; - uint32_t data; + uint64_t data; attrs.secure = is_secure; as = arm_addressspace(cs, attrs); @@ -11613,3 +11621,133 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) /* Linux crc32c converts the output to one's complement. */ return crc32c(acc, buf, bytes) ^ 0xffffffff; } + +/* Return the exception level to which FP-disabled exceptions should + * be taken, or 0 if FP is enabled. + */ +static inline int fp_exception_el(CPUARMState *env) +{ +#ifndef CONFIG_USER_ONLY + int fpen; + int cur_el = arm_current_el(env); + + /* CPACR and the CPTR registers don't exist before v6, so FP is + * always accessible + */ + if (!arm_feature(env, ARM_FEATURE_V6)) { + return 0; + } + + /* The CPACR controls traps to EL1, or PL1 if we're 32 bit: + * 0, 2 : trap EL0 and EL1/PL1 accesses + * 1 : trap only EL0 accesses + * 3 : trap no accesses + */ + fpen = extract32(env->cp15.cpacr_el1, 20, 2); + switch (fpen) { + case 0: + case 2: + if (cur_el == 0 || cur_el == 1) { + /* Trap to PL1, which might be EL1 or EL3 */ + if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { + return 3; + } + return 1; + } + if (cur_el == 3 && !is_a64(env)) { + /* Secure PL1 running at EL3 */ + return 3; + } + break; + case 1: + if (cur_el == 0) { + return 1; + } + break; + case 3: + break; + } + + /* For the CPTR registers we don't need to guard with an ARM_FEATURE + * check because zero bits in the registers mean "don't trap". + */ + + /* CPTR_EL2 : present in v7VE or v8 */ + if (cur_el <= 2 && extract32(env->cp15.cptr_el[2], 10, 1) + && !arm_is_secure_below_el3(env)) { + /* Trap FP ops at EL2, NS-EL1 or NS-EL0 to EL2 */ + return 2; + } + + /* CPTR_EL3 : present in v8 */ + if (extract32(env->cp15.cptr_el[3], 10, 1)) { + /* Trap all FP ops to EL3 */ + return 3; + } +#endif + return 0; +} + +void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, + target_ulong *cs_base, uint32_t *pflags) +{ + ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false)); + uint32_t flags; + + if (is_a64(env)) { + *pc = env->pc; + flags = ARM_TBFLAG_AARCH64_STATE_MASK; + /* Get control bits for tagged addresses */ + flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT); + flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT); + } else { + *pc = env->regs[15]; + flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT) + | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT) + | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT) + | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT) + | (arm_sctlr_b(env) << ARM_TBFLAG_SCTLR_B_SHIFT); + if (!(access_secure_reg(env))) { + flags |= ARM_TBFLAG_NS_MASK; + } + if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30) + || arm_el_is_aa64(env, 1)) { + flags |= ARM_TBFLAG_VFPEN_MASK; + } + flags |= (extract32(env->cp15.c15_cpar, 0, 2) + << ARM_TBFLAG_XSCALE_CPAR_SHIFT); + } + + flags |= (arm_to_core_mmu_idx(mmu_idx) << ARM_TBFLAG_MMUIDX_SHIFT); + + /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine + * states defined in the ARM ARM for software singlestep: + * SS_ACTIVE PSTATE.SS State + * 0 x Inactive (the TB flag for SS is always 0) + * 1 0 Active-pending + * 1 1 Active-not-pending + */ + if (arm_singlestep_active(env)) { + flags |= ARM_TBFLAG_SS_ACTIVE_MASK; + if (is_a64(env)) { + if (env->pstate & PSTATE_SS) { + flags |= ARM_TBFLAG_PSTATE_SS_MASK; + } + } else { + if (env->uncached_cpsr & PSTATE_SS) { + flags |= ARM_TBFLAG_PSTATE_SS_MASK; + } + } + } + if (arm_cpu_data_is_big_endian(env)) { + flags |= ARM_TBFLAG_BE_DATA_MASK; + } + flags |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT; + + if (arm_v7m_is_handler_mode(env)) { + flags |= ARM_TBFLAG_HANDLER_MASK; + } + + *pflags = flags; + *cs_base = 0; +} diff --git a/target/arm/helper.h b/target/arm/helper.h index 066729e8ad..5dec2e6262 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -188,7 +188,7 @@ DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_2(recpe_u32, i32, i32, ptr) DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr) -DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32) +DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i32, i32, i32, ptr, i32) DEF_HELPER_3(shl_cc, i32, env, i32, i32) DEF_HELPER_3(shr_cc, i32, env, i32, i32) @@ -511,28 +511,28 @@ DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32) DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32) DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32) -DEF_HELPER_3(neon_unzip8, void, env, i32, i32) -DEF_HELPER_3(neon_unzip16, void, env, i32, i32) -DEF_HELPER_3(neon_qunzip8, void, env, i32, i32) -DEF_HELPER_3(neon_qunzip16, void, env, i32, i32) -DEF_HELPER_3(neon_qunzip32, void, env, i32, i32) -DEF_HELPER_3(neon_zip8, void, env, i32, i32) -DEF_HELPER_3(neon_zip16, void, env, i32, i32) -DEF_HELPER_3(neon_qzip8, void, env, i32, i32) -DEF_HELPER_3(neon_qzip16, void, env, i32, i32) -DEF_HELPER_3(neon_qzip32, void, env, i32, i32) +DEF_HELPER_FLAGS_2(neon_unzip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_unzip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qunzip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qunzip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qunzip32, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_zip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_zip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32) -DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32) +DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_5(crypto_sha1_3reg, void, env, i32, i32, i32, i32) -DEF_HELPER_3(crypto_sha1h, void, env, i32, i32) -DEF_HELPER_3(crypto_sha1su1, void, env, i32, i32) +DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_2(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_4(crypto_sha256h, void, env, i32, i32, i32) -DEF_HELPER_4(crypto_sha256h2, void, env, i32, i32, i32) -DEF_HELPER_3(crypto_sha256su0, void, env, i32, i32) -DEF_HELPER_4(crypto_sha256su1, void, env, i32, i32, i32) +DEF_HELPER_FLAGS_3(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index f925a21481..f77c9c494b 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -358,7 +358,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) /* VFP registers */ r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP; for (i = 0; i < 32; i++) { - r.addr = (uintptr_t)(&env->vfp.regs[i]); + r.addr = (uintptr_t)aa32_vfp_dreg(env, i); ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); if (ret) { return ret; @@ -445,7 +445,7 @@ int kvm_arch_get_registers(CPUState *cs) /* VFP registers */ r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP; for (i = 0; i < 32; i++) { - r.addr = (uintptr_t)(&env->vfp.regs[i]); + r.addr = (uintptr_t)aa32_vfp_dreg(env, i); ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); if (ret) { return ret; diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 6554c30007..ac728494a4 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -696,21 +696,16 @@ int kvm_arch_put_registers(CPUState *cs, int level) } } - /* Advanced SIMD and FP registers - * We map Qn = regs[2n+1]:regs[2n] - */ + /* Advanced SIMD and FP registers. */ for (i = 0; i < 32; i++) { - int rd = i << 1; - uint64_t fp_val[2]; + uint64_t *q = aa64_vfp_qreg(env, i); #ifdef HOST_WORDS_BIGENDIAN - fp_val[0] = env->vfp.regs[rd + 1]; - fp_val[1] = env->vfp.regs[rd]; + uint64_t fp_val[2] = { q[1], q[0] }; + reg.addr = (uintptr_t)fp_val; #else - fp_val[1] = env->vfp.regs[rd + 1]; - fp_val[0] = env->vfp.regs[rd]; + reg.addr = (uintptr_t)q; #endif reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); - reg.addr = (uintptr_t)(&fp_val); ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret) { return ret; @@ -837,24 +832,18 @@ int kvm_arch_get_registers(CPUState *cs) env->spsr = env->banked_spsr[i]; } - /* Advanced SIMD and FP registers - * We map Qn = regs[2n+1]:regs[2n] - */ + /* Advanced SIMD and FP registers */ for (i = 0; i < 32; i++) { - uint64_t fp_val[2]; + uint64_t *q = aa64_vfp_qreg(env, i); reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); - reg.addr = (uintptr_t)(&fp_val); + reg.addr = (uintptr_t)q; ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret) { return ret; } else { - int rd = i << 1; #ifdef HOST_WORDS_BIGENDIAN - env->vfp.regs[rd + 1] = fp_val[0]; - env->vfp.regs[rd] = fp_val[1]; -#else - env->vfp.regs[rd + 1] = fp_val[1]; - env->vfp.regs[rd] = fp_val[0]; + uint64_t t; + t = q[0], q[0] = q[1], q[1] = t; #endif } } diff --git a/target/arm/machine.c b/target/arm/machine.c index 176274629c..a85c2430d3 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -50,7 +50,7 @@ static const VMStateDescription vmstate_vfp = { .minimum_version_id = 3, .needed = vfp_needed, .fields = (VMStateField[]) { - VMSTATE_FLOAT64_ARRAY(env.vfp.regs, ARMCPU, 64), + VMSTATE_UINT64_ARRAY(env.vfp.regs, ARMCPU, 64), /* The xregs array is a little awkward because element 1 (FPSCR) * requires a specific accessor, so we have to split it up in * the vmstate: diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c index ebdf7c9b10..689491cad3 100644 --- a/target/arm/neon_helper.c +++ b/target/arm/neon_helper.c @@ -2027,12 +2027,12 @@ uint64_t HELPER(neon_acgt_f64)(uint64_t a, uint64_t b, void *fpstp) #define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1)) -void HELPER(neon_qunzip8)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(neon_qunzip8)(void *vd, void *vm) { - uint64_t zm0 = float64_val(env->vfp.regs[rm]); - uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); - uint64_t zd0 = float64_val(env->vfp.regs[rd]); - uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t *rd = vd, *rm = vm; + uint64_t zd0 = rd[0], zd1 = rd[1]; + uint64_t zm0 = rm[0], zm1 = rm[1]; + uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8) | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24) | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40) @@ -2049,18 +2049,19 @@ void HELPER(neon_qunzip8)(CPUARMState *env, uint32_t rd, uint32_t rm) | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24) | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40) | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56); - env->vfp.regs[rm] = make_float64(m0); - env->vfp.regs[rm + 1] = make_float64(m1); - env->vfp.regs[rd] = make_float64(d0); - env->vfp.regs[rd + 1] = make_float64(d1); + + rm[0] = m0; + rm[1] = m1; + rd[0] = d0; + rd[1] = d1; } -void HELPER(neon_qunzip16)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(neon_qunzip16)(void *vd, void *vm) { - uint64_t zm0 = float64_val(env->vfp.regs[rm]); - uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); - uint64_t zd0 = float64_val(env->vfp.regs[rd]); - uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t *rd = vd, *rm = vm; + uint64_t zd0 = rd[0], zd1 = rd[1]; + uint64_t zm0 = rm[0], zm1 = rm[1]; + uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16) | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48); uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16) @@ -2069,32 +2070,35 @@ void HELPER(neon_qunzip16)(CPUARMState *env, uint32_t rd, uint32_t rm) | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48); uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16) | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48); - env->vfp.regs[rm] = make_float64(m0); - env->vfp.regs[rm + 1] = make_float64(m1); - env->vfp.regs[rd] = make_float64(d0); - env->vfp.regs[rd + 1] = make_float64(d1); + + rm[0] = m0; + rm[1] = m1; + rd[0] = d0; + rd[1] = d1; } -void HELPER(neon_qunzip32)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(neon_qunzip32)(void *vd, void *vm) { - uint64_t zm0 = float64_val(env->vfp.regs[rm]); - uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); - uint64_t zd0 = float64_val(env->vfp.regs[rd]); - uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t *rd = vd, *rm = vm; + uint64_t zd0 = rd[0], zd1 = rd[1]; + uint64_t zm0 = rm[0], zm1 = rm[1]; + uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32); uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32); uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32); uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32); - env->vfp.regs[rm] = make_float64(m0); - env->vfp.regs[rm + 1] = make_float64(m1); - env->vfp.regs[rd] = make_float64(d0); - env->vfp.regs[rd + 1] = make_float64(d1); + + rm[0] = m0; + rm[1] = m1; + rd[0] = d0; + rd[1] = d1; } -void HELPER(neon_unzip8)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(neon_unzip8)(void *vd, void *vm) { - uint64_t zm = float64_val(env->vfp.regs[rm]); - uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t *rd = vd, *rm = vm; + uint64_t zd = rd[0], zm = rm[0]; + uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8) | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24) | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40) @@ -2103,28 +2107,31 @@ void HELPER(neon_unzip8)(CPUARMState *env, uint32_t rd, uint32_t rm) | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24) | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40) | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56); - env->vfp.regs[rm] = make_float64(m0); - env->vfp.regs[rd] = make_float64(d0); + + rm[0] = m0; + rd[0] = d0; } -void HELPER(neon_unzip16)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(neon_unzip16)(void *vd, void *vm) { - uint64_t zm = float64_val(env->vfp.regs[rm]); - uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t *rd = vd, *rm = vm; + uint64_t zd = rd[0], zm = rm[0]; + uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16) | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48); uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16) | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48); - env->vfp.regs[rm] = make_float64(m0); - env->vfp.regs[rd] = make_float64(d0); + + rm[0] = m0; + rd[0] = d0; } -void HELPER(neon_qzip8)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(neon_qzip8)(void *vd, void *vm) { - uint64_t zm0 = float64_val(env->vfp.regs[rm]); - uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); - uint64_t zd0 = float64_val(env->vfp.regs[rd]); - uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t *rd = vd, *rm = vm; + uint64_t zd0 = rd[0], zd1 = rd[1]; + uint64_t zm0 = rm[0], zm1 = rm[1]; + uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8) | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24) | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40) @@ -2141,18 +2148,19 @@ void HELPER(neon_qzip8)(CPUARMState *env, uint32_t rd, uint32_t rm) | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24) | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40) | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56); - env->vfp.regs[rm] = make_float64(m0); - env->vfp.regs[rm + 1] = make_float64(m1); - env->vfp.regs[rd] = make_float64(d0); - env->vfp.regs[rd + 1] = make_float64(d1); + + rm[0] = m0; + rm[1] = m1; + rd[0] = d0; + rd[1] = d1; } -void HELPER(neon_qzip16)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(neon_qzip16)(void *vd, void *vm) { - uint64_t zm0 = float64_val(env->vfp.regs[rm]); - uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); - uint64_t zd0 = float64_val(env->vfp.regs[rd]); - uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t *rd = vd, *rm = vm; + uint64_t zd0 = rd[0], zd1 = rd[1]; + uint64_t zm0 = rm[0], zm1 = rm[1]; + uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16) | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48); uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16) @@ -2161,32 +2169,35 @@ void HELPER(neon_qzip16)(CPUARMState *env, uint32_t rd, uint32_t rm) | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48); uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16) | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48); - env->vfp.regs[rm] = make_float64(m0); - env->vfp.regs[rm + 1] = make_float64(m1); - env->vfp.regs[rd] = make_float64(d0); - env->vfp.regs[rd + 1] = make_float64(d1); + + rm[0] = m0; + rm[1] = m1; + rd[0] = d0; + rd[1] = d1; } -void HELPER(neon_qzip32)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(neon_qzip32)(void *vd, void *vm) { - uint64_t zm0 = float64_val(env->vfp.regs[rm]); - uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); - uint64_t zd0 = float64_val(env->vfp.regs[rd]); - uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t *rd = vd, *rm = vm; + uint64_t zd0 = rd[0], zd1 = rd[1]; + uint64_t zm0 = rm[0], zm1 = rm[1]; + uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32); uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32); uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32); uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32); - env->vfp.regs[rm] = make_float64(m0); - env->vfp.regs[rm + 1] = make_float64(m1); - env->vfp.regs[rd] = make_float64(d0); - env->vfp.regs[rd + 1] = make_float64(d1); + + rm[0] = m0; + rm[1] = m1; + rd[0] = d0; + rd[1] = d1; } -void HELPER(neon_zip8)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(neon_zip8)(void *vd, void *vm) { - uint64_t zm = float64_val(env->vfp.regs[rm]); - uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t *rd = vd, *rm = vm; + uint64_t zd = rd[0], zm = rm[0]; + uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8) | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24) | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40) @@ -2195,20 +2206,23 @@ void HELPER(neon_zip8)(CPUARMState *env, uint32_t rd, uint32_t rm) | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24) | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40) | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56); - env->vfp.regs[rm] = make_float64(m0); - env->vfp.regs[rd] = make_float64(d0); + + rm[0] = m0; + rd[0] = d0; } -void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm) +void HELPER(neon_zip16)(void *vd, void *vm) { - uint64_t zm = float64_val(env->vfp.regs[rm]); - uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t *rd = vd, *rm = vm; + uint64_t zd = rd[0], zm = rm[0]; + uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16) | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48); uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16) | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48); - env->vfp.regs[rm] = make_float64(m0); - env->vfp.regs[rd] = make_float64(d0); + + rm[0] = m0; + rd[0] = d0; } /* Helper function for 64 bit polynomial multiply case: diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index 712c5c55b6..a937e76710 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -54,20 +54,17 @@ static int exception_target_el(CPUARMState *env) return target_el; } -uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def, - uint32_t rn, uint32_t maxindex) +uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def, void *vn, + uint32_t maxindex) { - uint32_t val; - uint32_t tmp; - int index; - int shift; - uint64_t *table; - table = (uint64_t *)&env->vfp.regs[rn]; + uint32_t val, shift; + uint64_t *table = vn; + val = 0; for (shift = 0; shift < 32; shift += 8) { - index = (ireg >> shift) & 0xff; + uint32_t index = (ireg >> shift) & 0xff; if (index < maxindex) { - tmp = (table[index >> 3] >> ((index & 7) << 3)) & 0xff; + uint32_t tmp = (table[index >> 3] >> ((index & 7) << 3)) & 0xff; val |= tmp << shift; } else { val |= def & (0xff << shift); diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 70c1e08a36..eed64c73e5 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -80,8 +80,9 @@ typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); -typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32); -typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); +typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); +typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); +typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); /* initialize TCG globals. */ void a64_translate_init(void) @@ -163,15 +164,12 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f, if (flags & CPU_DUMP_FPU) { int numvfpregs = 32; - for (i = 0; i < numvfpregs; i += 2) { - uint64_t vlo = float64_val(env->vfp.regs[i * 2]); - uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]); - cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ", - i, vhi, vlo); - vlo = float64_val(env->vfp.regs[(i + 1) * 2]); - vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]); - cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n", - i + 1, vhi, vlo); + for (i = 0; i < numvfpregs; i++) { + uint64_t *q = aa64_vfp_qreg(env, i); + uint64_t vlo = q[0]; + uint64_t vhi = q[1]; + cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "%c", + i, vhi, vlo, (i & 1 ? '\n' : ' ')); } cpu_fprintf(f, "FPCR: %08x FPSR: %08x\n", vfp_get_fpcr(env), vfp_get_fpsr(env)); @@ -535,6 +533,21 @@ static inline int vec_reg_offset(DisasContext *s, int regno, return offs; } +/* Return the offset info CPUARMState of the "whole" vector register Qn. */ +static inline int vec_full_reg_offset(DisasContext *s, int regno) +{ + assert_fp_access_checked(s); + return offsetof(CPUARMState, vfp.regs[regno * 2]); +} + +/* Return a newly allocated pointer to the vector register. */ +static TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno) +{ + TCGv_ptr ret = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(ret, cpu_env, vec_full_reg_offset(s, regno)); + return ret; +} + /* Return the offset into CPUARMState of a slice (from * the least significant end) of FP register Qn (ie * Dn, Sn, Hn or Bn). @@ -542,19 +555,13 @@ static inline int vec_reg_offset(DisasContext *s, int regno, */ static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size) { - int offs = offsetof(CPUARMState, vfp.regs[regno * 2]); -#ifdef HOST_WORDS_BIGENDIAN - offs += (8 - (1 << size)); -#endif - assert_fp_access_checked(s); - return offs; + return vec_reg_offset(s, regno, 0, size); } /* Offset of the high half of the 128 bit vector Qn */ static inline int fp_reg_hi_offset(DisasContext *s, int regno) { - assert_fp_access_checked(s); - return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]); + return vec_reg_offset(s, regno, 1, MO_64); } /* Convenience accessors for reading and writing single and double @@ -10949,8 +10956,9 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); int decrypt; - TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt; - CryptoThreeOpEnvFn *genfn; + TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; + TCGv_i32 tcg_decrypt; + CryptoThreeOpIntFn *genfn; if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) || size != 0) { @@ -10984,18 +10992,14 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) return; } - /* Note that we convert the Vx register indexes into the - * index within the vfp.regs[] array, so we can share the - * helper with the AArch32 instructions. - */ - tcg_rd_regno = tcg_const_i32(rd << 1); - tcg_rn_regno = tcg_const_i32(rn << 1); + tcg_rd_ptr = vec_full_reg_ptr(s, rd); + tcg_rn_ptr = vec_full_reg_ptr(s, rn); tcg_decrypt = tcg_const_i32(decrypt); - genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt); + genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt); - tcg_temp_free_i32(tcg_rd_regno); - tcg_temp_free_i32(tcg_rn_regno); + tcg_temp_free_ptr(tcg_rd_ptr); + tcg_temp_free_ptr(tcg_rn_ptr); tcg_temp_free_i32(tcg_decrypt); } @@ -11012,8 +11016,8 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - CryptoThreeOpEnvFn *genfn; - TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno; + CryptoThreeOpFn *genfn; + TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; int feature = ARM_FEATURE_V8_SHA256; if (size != 0) { @@ -11052,23 +11056,23 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) return; } - tcg_rd_regno = tcg_const_i32(rd << 1); - tcg_rn_regno = tcg_const_i32(rn << 1); - tcg_rm_regno = tcg_const_i32(rm << 1); + tcg_rd_ptr = vec_full_reg_ptr(s, rd); + tcg_rn_ptr = vec_full_reg_ptr(s, rn); + tcg_rm_ptr = vec_full_reg_ptr(s, rm); if (genfn) { - genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno); + genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); } else { TCGv_i32 tcg_opcode = tcg_const_i32(opcode); - gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno, - tcg_rn_regno, tcg_rm_regno, tcg_opcode); + gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr, + tcg_rm_ptr, tcg_opcode); tcg_temp_free_i32(tcg_opcode); } - tcg_temp_free_i32(tcg_rd_regno); - tcg_temp_free_i32(tcg_rn_regno); - tcg_temp_free_i32(tcg_rm_regno); + tcg_temp_free_ptr(tcg_rd_ptr); + tcg_temp_free_ptr(tcg_rn_ptr); + tcg_temp_free_ptr(tcg_rm_ptr); } /* Crypto two-reg SHA @@ -11083,9 +11087,9 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) int opcode = extract32(insn, 12, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - CryptoTwoOpEnvFn *genfn; + CryptoTwoOpFn *genfn; int feature; - TCGv_i32 tcg_rd_regno, tcg_rn_regno; + TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; if (size != 0) { unallocated_encoding(s); @@ -11119,13 +11123,13 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) return; } - tcg_rd_regno = tcg_const_i32(rd << 1); - tcg_rn_regno = tcg_const_i32(rn << 1); + tcg_rd_ptr = vec_full_reg_ptr(s, rd); + tcg_rn_ptr = vec_full_reg_ptr(s, rn); - genfn(cpu_env, tcg_rd_regno, tcg_rn_regno); + genfn(tcg_rd_ptr, tcg_rn_ptr); - tcg_temp_free_i32(tcg_rd_regno); - tcg_temp_free_i32(tcg_rn_regno); + tcg_temp_free_ptr(tcg_rd_ptr); + tcg_temp_free_ptr(tcg_rn_ptr); } /* C3.6 Data processing - SIMD, inc Crypto diff --git a/target/arm/translate.c b/target/arm/translate.c index 781be1e219..55826b7e5a 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -1515,14 +1515,16 @@ static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr) static inline long vfp_reg_offset (int dp, int reg) { - if (dp) + if (dp) { return offsetof(CPUARMState, vfp.regs[reg]); - else if (reg & 1) { - return offsetof(CPUARMState, vfp.regs[reg >> 1]) - + offsetof(CPU_DoubleU, l.upper); } else { - return offsetof(CPUARMState, vfp.regs[reg >> 1]) - + offsetof(CPU_DoubleU, l.lower); + long ofs = offsetof(CPUARMState, vfp.regs[reg >> 1]); + if (reg & 1) { + ofs += offsetof(CPU_DoubleU, l.upper); + } else { + ofs += offsetof(CPU_DoubleU, l.lower); + } + return ofs; } } @@ -1559,6 +1561,13 @@ static inline void neon_store_reg64(TCGv_i64 var, int reg) tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg)); } +static TCGv_ptr vfp_reg_ptr(bool dp, int reg) +{ + TCGv_ptr ret = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg)); + return ret; +} + #define tcg_gen_ld_f32 tcg_gen_ld_i32 #define tcg_gen_ld_f64 tcg_gen_ld_i64 #define tcg_gen_st_f32 tcg_gen_st_i32 @@ -4680,22 +4689,23 @@ static inline TCGv_i32 neon_get_scalar(int size, int reg) static int gen_neon_unzip(int rd, int rm, int size, int q) { - TCGv_i32 tmp, tmp2; + TCGv_ptr pd, pm; + if (!q && size == 2) { return 1; } - tmp = tcg_const_i32(rd); - tmp2 = tcg_const_i32(rm); + pd = vfp_reg_ptr(true, rd); + pm = vfp_reg_ptr(true, rm); if (q) { switch (size) { case 0: - gen_helper_neon_qunzip8(cpu_env, tmp, tmp2); + gen_helper_neon_qunzip8(pd, pm); break; case 1: - gen_helper_neon_qunzip16(cpu_env, tmp, tmp2); + gen_helper_neon_qunzip16(pd, pm); break; case 2: - gen_helper_neon_qunzip32(cpu_env, tmp, tmp2); + gen_helper_neon_qunzip32(pd, pm); break; default: abort(); @@ -4703,38 +4713,39 @@ static int gen_neon_unzip(int rd, int rm, int size, int q) } else { switch (size) { case 0: - gen_helper_neon_unzip8(cpu_env, tmp, tmp2); + gen_helper_neon_unzip8(pd, pm); break; case 1: - gen_helper_neon_unzip16(cpu_env, tmp, tmp2); + gen_helper_neon_unzip16(pd, pm); break; default: abort(); } } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(tmp2); + tcg_temp_free_ptr(pd); + tcg_temp_free_ptr(pm); return 0; } static int gen_neon_zip(int rd, int rm, int size, int q) { - TCGv_i32 tmp, tmp2; + TCGv_ptr pd, pm; + if (!q && size == 2) { return 1; } - tmp = tcg_const_i32(rd); - tmp2 = tcg_const_i32(rm); + pd = vfp_reg_ptr(true, rd); + pm = vfp_reg_ptr(true, rm); if (q) { switch (size) { case 0: - gen_helper_neon_qzip8(cpu_env, tmp, tmp2); + gen_helper_neon_qzip8(pd, pm); break; case 1: - gen_helper_neon_qzip16(cpu_env, tmp, tmp2); + gen_helper_neon_qzip16(pd, pm); break; case 2: - gen_helper_neon_qzip32(cpu_env, tmp, tmp2); + gen_helper_neon_qzip32(pd, pm); break; default: abort(); @@ -4742,17 +4753,17 @@ static int gen_neon_zip(int rd, int rm, int size, int q) } else { switch (size) { case 0: - gen_helper_neon_zip8(cpu_env, tmp, tmp2); + gen_helper_neon_zip8(pd, pm); break; case 1: - gen_helper_neon_zip16(cpu_env, tmp, tmp2); + gen_helper_neon_zip16(pd, pm); break; default: abort(); } } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(tmp2); + tcg_temp_free_ptr(pd); + tcg_temp_free_ptr(pm); return 0; } @@ -5597,6 +5608,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) int u; uint32_t imm, mask; TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; + TCGv_ptr ptr1, ptr2, ptr3; TCGv_i64 tmp64; /* FIXME: this access check should not take precedence over UNDEF @@ -5643,34 +5655,34 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { return 1; } - tmp = tcg_const_i32(rd); - tmp2 = tcg_const_i32(rn); - tmp3 = tcg_const_i32(rm); + ptr1 = vfp_reg_ptr(true, rd); + ptr2 = vfp_reg_ptr(true, rn); + ptr3 = vfp_reg_ptr(true, rm); tmp4 = tcg_const_i32(size); - gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4); + gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4); tcg_temp_free_i32(tmp4); } else { /* SHA-256 */ if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) { return 1; } - tmp = tcg_const_i32(rd); - tmp2 = tcg_const_i32(rn); - tmp3 = tcg_const_i32(rm); + ptr1 = vfp_reg_ptr(true, rd); + ptr2 = vfp_reg_ptr(true, rn); + ptr3 = vfp_reg_ptr(true, rm); switch (size) { case 0: - gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3); + gen_helper_crypto_sha256h(ptr1, ptr2, ptr3); break; case 1: - gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3); + gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3); break; case 2: - gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3); + gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3); break; } } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp3); + tcg_temp_free_ptr(ptr1); + tcg_temp_free_ptr(ptr2); + tcg_temp_free_ptr(ptr3); return 0; } if (size == 3 && op != NEON_3R_LOGIC) { @@ -7159,8 +7171,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) || ((rm | rd) & 1)) { return 1; } - tmp = tcg_const_i32(rd); - tmp2 = tcg_const_i32(rm); + ptr1 = vfp_reg_ptr(true, rd); + ptr2 = vfp_reg_ptr(true, rm); /* Bit 6 is the lowest opcode bit; it distinguishes between * encryption (AESE/AESMC) and decryption (AESD/AESIMC) @@ -7168,12 +7180,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp3 = tcg_const_i32(extract32(insn, 6, 1)); if (op == NEON_2RM_AESE) { - gen_helper_crypto_aese(cpu_env, tmp, tmp2, tmp3); + gen_helper_crypto_aese(ptr1, ptr2, tmp3); } else { - gen_helper_crypto_aesmc(cpu_env, tmp, tmp2, tmp3); + gen_helper_crypto_aesmc(ptr1, ptr2, tmp3); } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(tmp2); + tcg_temp_free_ptr(ptr1); + tcg_temp_free_ptr(ptr2); tcg_temp_free_i32(tmp3); break; case NEON_2RM_SHA1H: @@ -7181,13 +7193,13 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) || ((rm | rd) & 1)) { return 1; } - tmp = tcg_const_i32(rd); - tmp2 = tcg_const_i32(rm); + ptr1 = vfp_reg_ptr(true, rd); + ptr2 = vfp_reg_ptr(true, rm); - gen_helper_crypto_sha1h(cpu_env, tmp, tmp2); + gen_helper_crypto_sha1h(ptr1, ptr2); - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(tmp2); + tcg_temp_free_ptr(ptr1); + tcg_temp_free_ptr(ptr2); break; case NEON_2RM_SHA1SU1: if ((rm | rd) & 1) { @@ -7201,15 +7213,15 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { return 1; } - tmp = tcg_const_i32(rd); - tmp2 = tcg_const_i32(rm); + ptr1 = vfp_reg_ptr(true, rd); + ptr2 = vfp_reg_ptr(true, rm); if (q) { - gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2); + gen_helper_crypto_sha256su0(ptr1, ptr2); } else { - gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2); + gen_helper_crypto_sha1su1(ptr1, ptr2); } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(tmp2); + tcg_temp_free_ptr(ptr1); + tcg_temp_free_ptr(ptr2); break; default: elementwise: @@ -7534,9 +7546,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_movi_i32(tmp, 0); } tmp2 = neon_load_reg(rm, 0); - tmp4 = tcg_const_i32(rn); + ptr1 = vfp_reg_ptr(true, rn); tmp5 = tcg_const_i32(n); - gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5); + gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5); tcg_temp_free_i32(tmp); if (insn & (1 << 6)) { tmp = neon_load_reg(rd, 1); @@ -7545,9 +7557,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_movi_i32(tmp, 0); } tmp3 = neon_load_reg(rm, 1); - gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5); + gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5); tcg_temp_free_i32(tmp5); - tcg_temp_free_i32(tmp4); + tcg_temp_free_ptr(ptr1); neon_store_reg(rd, 0, tmp2); neon_store_reg(rd, 1, tmp3); tcg_temp_free_i32(tmp); @@ -12562,7 +12574,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, numvfpregs += 16; } for (i = 0; i < numvfpregs; i++) { - uint64_t v = float64_val(env->vfp.regs[i]); + uint64_t v = *aa32_vfp_dreg(env, i); cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n", i * 2, (uint32_t)v, i * 2 + 1, (uint32_t)(v >> 32), diff --git a/target/arm/translate.h b/target/arm/translate.h index cd7313ace7..3f4df91e5e 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -108,7 +108,7 @@ static inline int default_exception_el(DisasContext *s) ? 3 : MAX(1, s->current_el); } -static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) +static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) { /* We don't need to save all of the syndrome so we mask and shift * out unneeded bits to help the sleb128 encoder do a better job.