From 870f0051b4ada9a361f7454f833432ae8c06c095 Mon Sep 17 00:00:00 2001 From: Graeme Gregory Date: Fri, 28 Aug 2020 10:02:43 +0100 Subject: [PATCH 01/35] hw/arm/sbsa-ref: fix typo breaking PCIe IRQs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixing a typo in a previous patch that translated an "i" to a 1 and therefore breaking the allocation of PCIe interrupts. This was discovered when virtio-net-pci devices ceased to function correctly. Cc: qemu-stable@nongnu.org Fixes: 48ba18e6d3f3 ("hw/arm/sbsa-ref: Simplify by moving the gic in the machine state") Signed-off-by: Graeme Gregory Reviewed-by: Philippe Mathieu-Daudé Message-id: 20200821083853.356490-1-graeme@nuviainc.com Signed-off-by: Peter Maydell --- hw/arm/sbsa-ref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index f030a416fd..2a7d9a61fc 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -554,7 +554,7 @@ static void create_pcie(SBSAMachineState *sms) for (i = 0; i < GPEX_NUM_IRQS; i++) { sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, - qdev_get_gpio_in(sms->gic, irq + 1)); + qdev_get_gpio_in(sms->gic, irq + i)); gpex_set_irq_num(GPEX_HOST(dev), i, irq + i); } From bb80ae077ebcc7e0cdc0f7e2ec663896b6103da1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:43 +0100 Subject: [PATCH 02/35] hw/clock: Remove unused clock_init*() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clock_init*() inlined funtions are simple wrappers around clock_set*() and are not used. Remove them in favor of clock_set*(). Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20200806123858.30058-2-f4bug@amsat.org Signed-off-by: Peter Maydell --- include/hw/clock.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/include/hw/clock.h b/include/hw/clock.h index f822a94220..468fed0996 100644 --- a/include/hw/clock.h +++ b/include/hw/clock.h @@ -209,17 +209,4 @@ static inline bool clock_is_enabled(const Clock *clk) return clock_get(clk) != 0; } -static inline void clock_init(Clock *clk, uint64_t value) -{ - clock_set(clk, value); -} -static inline void clock_init_hz(Clock *clk, uint64_t value) -{ - clock_set_hz(clk, value); -} -static inline void clock_init_ns(Clock *clk, uint64_t value) -{ - clock_set_ns(clk, value); -} - #endif /* QEMU_HW_CLOCK_H */ From 15aa2876d9c7181c58305430f726461a2f24cb00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:44 +0100 Subject: [PATCH 03/35] hw/clock: Let clock_set() return boolean value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let clock_set() return a boolean value whether the clock has been updated or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20200806123858.30058-3-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/core/clock.c | 7 ++++++- include/hw/clock.h | 12 +++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/hw/core/clock.c b/hw/core/clock.c index 3c0daf7d4c..7066282f7b 100644 --- a/hw/core/clock.c +++ b/hw/core/clock.c @@ -34,11 +34,16 @@ void clock_clear_callback(Clock *clk) clock_set_callback(clk, NULL, NULL); } -void clock_set(Clock *clk, uint64_t period) +bool clock_set(Clock *clk, uint64_t period) { + if (clk->period == period) { + return false; + } trace_clock_set(CLOCK_PATH(clk), CLOCK_PERIOD_TO_NS(clk->period), CLOCK_PERIOD_TO_NS(period)); clk->period = period; + + return true; } static void clock_propagate_period(Clock *clk, bool call_callbacks) diff --git a/include/hw/clock.h b/include/hw/clock.h index 468fed0996..d85af45c96 100644 --- a/include/hw/clock.h +++ b/include/hw/clock.h @@ -127,17 +127,19 @@ void clock_set_source(Clock *clk, Clock *src); * @value: the clock's value, 0 means unclocked * * Set the local cached period value of @clk to @value. + * + * @return: true if the clock is changed. */ -void clock_set(Clock *clk, uint64_t value); +bool clock_set(Clock *clk, uint64_t value); -static inline void clock_set_hz(Clock *clk, unsigned hz) +static inline bool clock_set_hz(Clock *clk, unsigned hz) { - clock_set(clk, CLOCK_PERIOD_FROM_HZ(hz)); + return clock_set(clk, CLOCK_PERIOD_FROM_HZ(hz)); } -static inline void clock_set_ns(Clock *clk, unsigned ns) +static inline bool clock_set_ns(Clock *clk, unsigned ns) { - clock_set(clk, CLOCK_PERIOD_FROM_NS(ns)); + return clock_set(clk, CLOCK_PERIOD_FROM_NS(ns)); } /** From 96250eab904261b31d9d1ac3abbdb36737635ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:44 +0100 Subject: [PATCH 04/35] hw/clock: Only propagate clock changes if the clock is changed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid propagating the clock change when the clock does not change. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20200806123858.30058-4-f4bug@amsat.org Signed-off-by: Peter Maydell --- include/hw/clock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/hw/clock.h b/include/hw/clock.h index d85af45c96..9ecd78b2c3 100644 --- a/include/hw/clock.h +++ b/include/hw/clock.h @@ -165,8 +165,9 @@ void clock_propagate(Clock *clk); */ static inline void clock_update(Clock *clk, uint64_t value) { - clock_set(clk, value); - clock_propagate(clk); + if (clock_set(clk, value)) { + clock_propagate(clk); + } } static inline void clock_update_hz(Clock *clk, unsigned hz) From 79ed6fd65026fbdc71c5f2a7a42a80aac0063d94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:44 +0100 Subject: [PATCH 05/35] hw/arm/musicpal: Use AddressSpace for DMA transfers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow the device to execute the DMA transfers in a different AddressSpace. We keep using the system_memory address space, but via the proper dma_memory_access() API. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20200814125533.4047-1-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/musicpal.c | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c index c3b9780f35..f2f4fc0264 100644 --- a/hw/arm/musicpal.c +++ b/hw/arm/musicpal.c @@ -30,6 +30,7 @@ #include "hw/audio/wm8750.h" #include "sysemu/block-backend.h" #include "sysemu/runstate.h" +#include "sysemu/dma.h" #include "exec/address-spaces.h" #include "ui/pixel_ops.h" #include "qemu/cutils.h" @@ -163,6 +164,8 @@ typedef struct mv88w8618_eth_state { MemoryRegion iomem; qemu_irq irq; + MemoryRegion *dma_mr; + AddressSpace dma_as; uint32_t smir; uint32_t icr; uint32_t imr; @@ -176,19 +179,21 @@ typedef struct mv88w8618_eth_state { NICConf conf; } mv88w8618_eth_state; -static void eth_rx_desc_put(uint32_t addr, mv88w8618_rx_desc *desc) +static void eth_rx_desc_put(AddressSpace *dma_as, uint32_t addr, + mv88w8618_rx_desc *desc) { cpu_to_le32s(&desc->cmdstat); cpu_to_le16s(&desc->bytes); cpu_to_le16s(&desc->buffer_size); cpu_to_le32s(&desc->buffer); cpu_to_le32s(&desc->next); - cpu_physical_memory_write(addr, desc, sizeof(*desc)); + dma_memory_write(dma_as, addr, desc, sizeof(*desc)); } -static void eth_rx_desc_get(uint32_t addr, mv88w8618_rx_desc *desc) +static void eth_rx_desc_get(AddressSpace *dma_as, uint32_t addr, + mv88w8618_rx_desc *desc) { - cpu_physical_memory_read(addr, desc, sizeof(*desc)); + dma_memory_read(dma_as, addr, desc, sizeof(*desc)); le32_to_cpus(&desc->cmdstat); le16_to_cpus(&desc->bytes); le16_to_cpus(&desc->buffer_size); @@ -209,9 +214,9 @@ static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size) continue; } do { - eth_rx_desc_get(desc_addr, &desc); + eth_rx_desc_get(&s->dma_as, desc_addr, &desc); if ((desc.cmdstat & MP_ETH_RX_OWN) && desc.buffer_size >= size) { - cpu_physical_memory_write(desc.buffer + s->vlan_header, + dma_memory_write(&s->dma_as, desc.buffer + s->vlan_header, buf, size); desc.bytes = size + s->vlan_header; desc.cmdstat &= ~MP_ETH_RX_OWN; @@ -221,7 +226,7 @@ static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size) if (s->icr & s->imr) { qemu_irq_raise(s->irq); } - eth_rx_desc_put(desc_addr, &desc); + eth_rx_desc_put(&s->dma_as, desc_addr, &desc); return size; } desc_addr = desc.next; @@ -230,19 +235,21 @@ static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size) return size; } -static void eth_tx_desc_put(uint32_t addr, mv88w8618_tx_desc *desc) +static void eth_tx_desc_put(AddressSpace *dma_as, uint32_t addr, + mv88w8618_tx_desc *desc) { cpu_to_le32s(&desc->cmdstat); cpu_to_le16s(&desc->res); cpu_to_le16s(&desc->bytes); cpu_to_le32s(&desc->buffer); cpu_to_le32s(&desc->next); - cpu_physical_memory_write(addr, desc, sizeof(*desc)); + dma_memory_write(dma_as, addr, desc, sizeof(*desc)); } -static void eth_tx_desc_get(uint32_t addr, mv88w8618_tx_desc *desc) +static void eth_tx_desc_get(AddressSpace *dma_as, uint32_t addr, + mv88w8618_tx_desc *desc) { - cpu_physical_memory_read(addr, desc, sizeof(*desc)); + dma_memory_read(dma_as, addr, desc, sizeof(*desc)); le32_to_cpus(&desc->cmdstat); le16_to_cpus(&desc->res); le16_to_cpus(&desc->bytes); @@ -259,17 +266,17 @@ static void eth_send(mv88w8618_eth_state *s, int queue_index) int len; do { - eth_tx_desc_get(desc_addr, &desc); + eth_tx_desc_get(&s->dma_as, desc_addr, &desc); next_desc = desc.next; if (desc.cmdstat & MP_ETH_TX_OWN) { len = desc.bytes; if (len < 2048) { - cpu_physical_memory_read(desc.buffer, buf, len); + dma_memory_read(&s->dma_as, desc.buffer, buf, len); qemu_send_packet(qemu_get_queue(s->nic), buf, len); } desc.cmdstat &= ~MP_ETH_TX_OWN; s->icr |= 1 << (MP_ETH_IRQ_TXLO_BIT - queue_index); - eth_tx_desc_put(desc_addr, &desc); + eth_tx_desc_put(&s->dma_as, desc_addr, &desc); } desc_addr = next_desc; } while (desc_addr != s->tx_queue[queue_index]); @@ -405,6 +412,12 @@ static void mv88w8618_eth_realize(DeviceState *dev, Error **errp) { mv88w8618_eth_state *s = MV88W8618_ETH(dev); + if (!s->dma_mr) { + error_setg(errp, TYPE_MV88W8618_ETH " 'dma-memory' link not set"); + return; + } + + address_space_init(&s->dma_as, s->dma_mr, "emac-dma"); s->nic = qemu_new_nic(&net_mv88w8618_info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, s); } @@ -428,6 +441,8 @@ static const VMStateDescription mv88w8618_eth_vmsd = { static Property mv88w8618_eth_properties[] = { DEFINE_NIC_PROPERTIES(mv88w8618_eth_state, conf), + DEFINE_PROP_LINK("dma-memory", mv88w8618_eth_state, dma_mr, + TYPE_MEMORY_REGION, MemoryRegion *), DEFINE_PROP_END_OF_LIST(), }; @@ -1653,6 +1668,8 @@ static void musicpal_init(MachineState *machine) qemu_check_nic_model(&nd_table[0], "mv88w8618"); dev = qdev_new(TYPE_MV88W8618_ETH); qdev_set_nic_properties(dev, &nd_table[0]); + object_property_set_link(OBJECT(dev), "dma-memory", + OBJECT(get_system_memory()), &error_fatal); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, MP_ETH_BASE); sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[MP_ETH_IRQ]); From 0e5aac18bc31dbdfab51f9784240d0c31a4c5579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:44 +0100 Subject: [PATCH 06/35] target/arm: Clarify HCR_EL2 ARMCPRegInfo type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit ce4afed839 ("target/arm: Implement AArch32 HCR and HCR2") the HCR_EL2 register has been changed from type NO_RAW (no underlying state and does not support raw access for state saving/loading) to type CONST (TCG can assume the value to be constant), removing the read/write accessors. We forgot to remove the previous type ARM_CP_NO_RAW. This is not really a problem since the field is overwritten. However it makes code review confuse, so remove it. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Edgar E. Iglesias Reviewed-by: Richard Henderson Message-id: 20200812111223.7787-1-f4bug@amsat.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 1 - 1 file changed, 1 deletion(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index 6b4f0eb533..44d666627a 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5105,7 +5105,6 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .access = PL2_RW, .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore }, { .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH, - .type = ARM_CP_NO_RAW, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, From dbf8c32178291169e111a6a9fd7ae17af4a3039d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:44 +0100 Subject: [PATCH 07/35] target/arm: Pass the entire mte descriptor to mte_check_fail We need more information than just the mmu_idx in order to create the proper exception syndrome. Only change the function signature so far. Signed-off-by: Richard Henderson Message-id: 20200813200816.3037186-2-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/mte_helper.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 104752041f..a40454588d 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -514,9 +514,10 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) } /* Record a tag check failure. */ -static void mte_check_fail(CPUARMState *env, int mmu_idx, +static void mte_check_fail(CPUARMState *env, uint32_t desc, uint64_t dirty_ptr, uintptr_t ra) { + int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); int el, reg_el, tcf, select; uint64_t sctlr; @@ -639,8 +640,7 @@ uint64_t mte_check1(CPUARMState *env, uint32_t desc, } if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) { - int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); - mte_check_fail(env, mmu_idx, ptr, ra); + mte_check_fail(env, desc, ptr, ra); } return useronly_clean_ptr(ptr); @@ -810,7 +810,7 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, fail_ofs = tag_first + n * TAG_GRANULE - ptr; fail_ofs = ROUND_UP(fail_ofs, esize); - mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra); + mte_check_fail(env, desc, ptr + fail_ofs, ra); } done: @@ -922,7 +922,7 @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) fail: /* Locate the first nibble that differs. */ i = ctz64(mem_tag ^ ptr_tag) >> 4; - mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra); + mte_check_fail(env, desc, align_ptr + i * TAG_GRANULE, ra); done: return useronly_clean_ptr(ptr); From 9a4670be7f0734d27bf4058db3becf83cd0cc9d5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:45 +0100 Subject: [PATCH 08/35] target/arm: Fill in the WnR syndrome bit in mte_check_fail According to AArch64.TagCheckFault, none of the other ISS values are provided, so we do not need to go so far as merge_syn_data_abort. But we were missing the WnR bit. Tested-by: Andrey Konovalov Reported-by: Andrey Konovalov Signed-off-by: Richard Henderson Message-id: 20200813200816.3037186-3-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/mte_helper.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index a40454588d..891306f5b0 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -519,7 +519,7 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc, { int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); - int el, reg_el, tcf, select; + int el, reg_el, tcf, select, is_write, syn; uint64_t sctlr; reg_el = regime_el(env, arm_mmu_idx); @@ -547,9 +547,10 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc, */ cpu_restore_state(env_cpu(env), ra, true); env->exception.vaddress = dirty_ptr; - raise_exception(env, EXCP_DATA_ABORT, - syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11), - exception_target_el(env)); + + is_write = FIELD_EX32(desc, MTEDESC, WRITE); + syn = syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, is_write, 0x11); + raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env)); /* noreturn, but fall through to the assert anyway */ case 0: From b3aec952bf11ef54376db96dd673c6a34753b697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:45 +0100 Subject: [PATCH 09/35] hw/sd/allwinner-sdhost: Use AddressSpace for DMA transfers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow the device to execute the DMA transfers in a different AddressSpace. The A10 and H3 SoC keep using the system_memory address space, but via the proper dma_memory_access() API. Signed-off-by: Philippe Mathieu-Daudé Tested-by: Niek Linnenbank Reviewed-by: Niek Linnenbank Message-id: 20200814110057.307-1-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/allwinner-a10.c | 2 ++ hw/arm/allwinner-h3.c | 2 ++ hw/sd/allwinner-sdhost.c | 37 ++++++++++++++++++++++++++------ include/hw/sd/allwinner-sdhost.h | 6 ++++++ 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c index e258463747..d404f31e02 100644 --- a/hw/arm/allwinner-a10.c +++ b/hw/arm/allwinner-a10.c @@ -155,6 +155,8 @@ static void aw_a10_realize(DeviceState *dev, Error **errp) } /* SD/MMC */ + object_property_set_link(OBJECT(&s->mmc0), "dma-memory", + OBJECT(get_system_memory()), &error_fatal); sysbus_realize(SYS_BUS_DEVICE(&s->mmc0), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(&s->mmc0), 0, AW_A10_MMC0_BASE); sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc0), 0, qdev_get_gpio_in(dev, 32)); diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c index ff92ded82c..43a8d3dc48 100644 --- a/hw/arm/allwinner-h3.c +++ b/hw/arm/allwinner-h3.c @@ -349,6 +349,8 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp) sysbus_mmio_map(SYS_BUS_DEVICE(&s->sid), 0, s->memmap[AW_H3_SID]); /* SD/MMC */ + object_property_set_link(OBJECT(&s->mmc0), "dma-memory", + OBJECT(get_system_memory()), &error_fatal); sysbus_realize(SYS_BUS_DEVICE(&s->mmc0), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(&s->mmc0), 0, s->memmap[AW_H3_MMC0]); sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc0), 0, diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c index f9eb92c09e..e82afb75eb 100644 --- a/hw/sd/allwinner-sdhost.c +++ b/hw/sd/allwinner-sdhost.c @@ -21,7 +21,10 @@ #include "qemu/log.h" #include "qemu/module.h" #include "qemu/units.h" +#include "qapi/error.h" #include "sysemu/blockdev.h" +#include "sysemu/dma.h" +#include "hw/qdev-properties.h" #include "hw/irq.h" #include "hw/sd/allwinner-sdhost.h" #include "migration/vmstate.h" @@ -306,7 +309,7 @@ static uint32_t allwinner_sdhost_process_desc(AwSdHostState *s, uint8_t buf[1024]; /* Read descriptor */ - cpu_physical_memory_read(desc_addr, desc, sizeof(*desc)); + dma_memory_read(&s->dma_as, desc_addr, desc, sizeof(*desc)); if (desc->size == 0) { desc->size = klass->max_desc_size; } else if (desc->size > klass->max_desc_size) { @@ -331,22 +334,24 @@ static uint32_t allwinner_sdhost_process_desc(AwSdHostState *s, /* Write to SD bus */ if (is_write) { - cpu_physical_memory_read((desc->addr & DESC_SIZE_MASK) + num_done, - buf, buf_bytes); + dma_memory_read(&s->dma_as, + (desc->addr & DESC_SIZE_MASK) + num_done, + buf, buf_bytes); sdbus_write_data(&s->sdbus, buf, buf_bytes); /* Read from SD bus */ } else { sdbus_read_data(&s->sdbus, buf, buf_bytes); - cpu_physical_memory_write((desc->addr & DESC_SIZE_MASK) + num_done, - buf, buf_bytes); + dma_memory_write(&s->dma_as, + (desc->addr & DESC_SIZE_MASK) + num_done, + buf, buf_bytes); } num_done += buf_bytes; } /* Clear hold flag and flush descriptor */ desc->status &= ~DESC_STATUS_HOLD; - cpu_physical_memory_write(desc_addr, desc, sizeof(*desc)); + dma_memory_write(&s->dma_as, desc_addr, desc, sizeof(*desc)); return num_done; } @@ -721,6 +726,12 @@ static const VMStateDescription vmstate_allwinner_sdhost = { } }; +static Property allwinner_sdhost_properties[] = { + DEFINE_PROP_LINK("dma-memory", AwSdHostState, dma_mr, + TYPE_MEMORY_REGION, MemoryRegion *), + DEFINE_PROP_END_OF_LIST(), +}; + static void allwinner_sdhost_init(Object *obj) { AwSdHostState *s = AW_SDHOST(obj); @@ -734,6 +745,18 @@ static void allwinner_sdhost_init(Object *obj) sysbus_init_irq(SYS_BUS_DEVICE(s), &s->irq); } +static void allwinner_sdhost_realize(DeviceState *dev, Error **errp) +{ + AwSdHostState *s = AW_SDHOST(dev); + + if (!s->dma_mr) { + error_setg(errp, TYPE_AW_SDHOST " 'dma-memory' link not set"); + return; + } + + address_space_init(&s->dma_as, s->dma_mr, "sdhost-dma"); +} + static void allwinner_sdhost_reset(DeviceState *dev) { AwSdHostState *s = AW_SDHOST(dev); @@ -792,6 +815,8 @@ static void allwinner_sdhost_class_init(ObjectClass *klass, void *data) dc->reset = allwinner_sdhost_reset; dc->vmsd = &vmstate_allwinner_sdhost; + dc->realize = allwinner_sdhost_realize; + device_class_set_props(dc, allwinner_sdhost_properties); } static void allwinner_sdhost_sun4i_class_init(ObjectClass *klass, void *data) diff --git a/include/hw/sd/allwinner-sdhost.h b/include/hw/sd/allwinner-sdhost.h index d94606a853..839732ebf3 100644 --- a/include/hw/sd/allwinner-sdhost.h +++ b/include/hw/sd/allwinner-sdhost.h @@ -71,6 +71,12 @@ typedef struct AwSdHostState { /** Interrupt output signal to notify CPU */ qemu_irq irq; + /** Memory region where DMA transfers are done */ + MemoryRegion *dma_mr; + + /** Address space used internally for DMA transfers */ + AddressSpace dma_as; + /** Number of bytes left in current DMA transfer */ uint32_t transfer_cnt; From 4757cb857934a5e2dcafe1f41e95233f5c0a878d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:45 +0100 Subject: [PATCH 10/35] hw/net/allwinner-sun8i-emac: Use AddressSpace for DMA transfers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow the device to execute the DMA transfers in a different AddressSpace. The H3 SoC keeps using the system_memory address space, but via the proper dma_memory_access() API. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Reviewed-by: Niek Linnenbank Tested-by: Niek Linnenbank Message-id: 20200814122907.27732-1-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/allwinner-h3.c | 2 ++ hw/net/allwinner-sun8i-emac.c | 46 +++++++++++++++++---------- include/hw/net/allwinner-sun8i-emac.h | 6 ++++ 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c index 43a8d3dc48..76ac51a3ef 100644 --- a/hw/arm/allwinner-h3.c +++ b/hw/arm/allwinner-h3.c @@ -365,6 +365,8 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp) qemu_check_nic_model(&nd_table[0], TYPE_AW_SUN8I_EMAC); qdev_set_nic_properties(DEVICE(&s->emac), &nd_table[0]); } + object_property_set_link(OBJECT(&s->emac), "dma-memory", + OBJECT(get_system_memory()), &error_fatal); sysbus_realize(SYS_BUS_DEVICE(&s->emac), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(&s->emac), 0, s->memmap[AW_H3_EMAC]); sysbus_connect_irq(SYS_BUS_DEVICE(&s->emac), 0, diff --git a/hw/net/allwinner-sun8i-emac.c b/hw/net/allwinner-sun8i-emac.c index 28637ff4c1..38d328587e 100644 --- a/hw/net/allwinner-sun8i-emac.c +++ b/hw/net/allwinner-sun8i-emac.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qapi/error.h" #include "hw/sysbus.h" #include "migration/vmstate.h" #include "net/net.h" @@ -29,6 +30,7 @@ #include "net/checksum.h" #include "qemu/module.h" #include "exec/cpu-common.h" +#include "sysemu/dma.h" #include "hw/net/allwinner-sun8i-emac.h" /* EMAC register offsets */ @@ -337,12 +339,13 @@ static void allwinner_sun8i_emac_update_irq(AwSun8iEmacState *s) qemu_set_irq(s->irq, (s->int_sta & s->int_en) != 0); } -static uint32_t allwinner_sun8i_emac_next_desc(FrameDescriptor *desc, +static uint32_t allwinner_sun8i_emac_next_desc(AwSun8iEmacState *s, + FrameDescriptor *desc, size_t min_size) { uint32_t paddr = desc->next; - cpu_physical_memory_read(paddr, desc, sizeof(*desc)); + dma_memory_read(&s->dma_as, paddr, desc, sizeof(*desc)); if ((desc->status & DESC_STATUS_CTL) && (desc->status2 & DESC_STATUS2_BUF_SIZE_MASK) >= min_size) { @@ -352,7 +355,8 @@ static uint32_t allwinner_sun8i_emac_next_desc(FrameDescriptor *desc, } } -static uint32_t allwinner_sun8i_emac_get_desc(FrameDescriptor *desc, +static uint32_t allwinner_sun8i_emac_get_desc(AwSun8iEmacState *s, + FrameDescriptor *desc, uint32_t start_addr, size_t min_size) { @@ -360,7 +364,7 @@ static uint32_t allwinner_sun8i_emac_get_desc(FrameDescriptor *desc, /* Note that the list is a cycle. Last entry points back to the head. */ while (desc_addr != 0) { - cpu_physical_memory_read(desc_addr, desc, sizeof(*desc)); + dma_memory_read(&s->dma_as, desc_addr, desc, sizeof(*desc)); if ((desc->status & DESC_STATUS_CTL) && (desc->status2 & DESC_STATUS2_BUF_SIZE_MASK) >= min_size) { @@ -379,20 +383,21 @@ static uint32_t allwinner_sun8i_emac_rx_desc(AwSun8iEmacState *s, FrameDescriptor *desc, size_t min_size) { - return allwinner_sun8i_emac_get_desc(desc, s->rx_desc_curr, min_size); + return allwinner_sun8i_emac_get_desc(s, desc, s->rx_desc_curr, min_size); } static uint32_t allwinner_sun8i_emac_tx_desc(AwSun8iEmacState *s, FrameDescriptor *desc, size_t min_size) { - return allwinner_sun8i_emac_get_desc(desc, s->tx_desc_head, min_size); + return allwinner_sun8i_emac_get_desc(s, desc, s->tx_desc_head, min_size); } -static void allwinner_sun8i_emac_flush_desc(FrameDescriptor *desc, +static void allwinner_sun8i_emac_flush_desc(AwSun8iEmacState *s, + FrameDescriptor *desc, uint32_t phys_addr) { - cpu_physical_memory_write(phys_addr, desc, sizeof(*desc)); + dma_memory_write(&s->dma_as, phys_addr, desc, sizeof(*desc)); } static bool allwinner_sun8i_emac_can_receive(NetClientState *nc) @@ -450,8 +455,8 @@ static ssize_t allwinner_sun8i_emac_receive(NetClientState *nc, << RX_DESC_STATUS_FRM_LEN_SHIFT; } - cpu_physical_memory_write(desc.addr, buf, desc_bytes); - allwinner_sun8i_emac_flush_desc(&desc, s->rx_desc_curr); + dma_memory_write(&s->dma_as, desc.addr, buf, desc_bytes); + allwinner_sun8i_emac_flush_desc(s, &desc, s->rx_desc_curr); trace_allwinner_sun8i_emac_receive(s->rx_desc_curr, desc.addr, desc_bytes); @@ -465,7 +470,7 @@ static ssize_t allwinner_sun8i_emac_receive(NetClientState *nc, bytes_left -= desc_bytes; /* Move to the next descriptor */ - s->rx_desc_curr = allwinner_sun8i_emac_next_desc(&desc, 64); + s->rx_desc_curr = allwinner_sun8i_emac_next_desc(s, &desc, 64); if (!s->rx_desc_curr) { /* Not enough buffer space available */ s->int_sta |= INT_STA_RX_BUF_UA; @@ -501,10 +506,10 @@ static void allwinner_sun8i_emac_transmit(AwSun8iEmacState *s) desc.status |= TX_DESC_STATUS_LENGTH_ERR; break; } - cpu_physical_memory_read(desc.addr, packet_buf + packet_bytes, bytes); + dma_memory_read(&s->dma_as, desc.addr, packet_buf + packet_bytes, bytes); packet_bytes += bytes; desc.status &= ~DESC_STATUS_CTL; - allwinner_sun8i_emac_flush_desc(&desc, s->tx_desc_curr); + allwinner_sun8i_emac_flush_desc(s, &desc, s->tx_desc_curr); /* After the last descriptor, send the packet */ if (desc.status2 & TX_DESC_STATUS2_LAST_DESC) { @@ -519,7 +524,7 @@ static void allwinner_sun8i_emac_transmit(AwSun8iEmacState *s) packet_bytes = 0; transmitted++; } - s->tx_desc_curr = allwinner_sun8i_emac_next_desc(&desc, 0); + s->tx_desc_curr = allwinner_sun8i_emac_next_desc(s, &desc, 0); } /* Raise transmit completed interrupt */ @@ -623,7 +628,7 @@ static uint64_t allwinner_sun8i_emac_read(void *opaque, hwaddr offset, break; case REG_TX_CUR_BUF: /* Transmit Current Buffer */ if (s->tx_desc_curr != 0) { - cpu_physical_memory_read(s->tx_desc_curr, &desc, sizeof(desc)); + dma_memory_read(&s->dma_as, s->tx_desc_curr, &desc, sizeof(desc)); value = desc.addr; } else { value = 0; @@ -636,7 +641,7 @@ static uint64_t allwinner_sun8i_emac_read(void *opaque, hwaddr offset, break; case REG_RX_CUR_BUF: /* Receive Current Buffer */ if (s->rx_desc_curr != 0) { - cpu_physical_memory_read(s->rx_desc_curr, &desc, sizeof(desc)); + dma_memory_read(&s->dma_as, s->rx_desc_curr, &desc, sizeof(desc)); value = desc.addr; } else { value = 0; @@ -790,6 +795,13 @@ static void allwinner_sun8i_emac_realize(DeviceState *dev, Error **errp) { AwSun8iEmacState *s = AW_SUN8I_EMAC(dev); + if (!s->dma_mr) { + error_setg(errp, TYPE_AW_SUN8I_EMAC " 'dma-memory' link not set"); + return; + } + + address_space_init(&s->dma_as, s->dma_mr, "emac-dma"); + qemu_macaddr_default_if_unset(&s->conf.macaddr); s->nic = qemu_new_nic(&net_allwinner_sun8i_emac_info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, s); @@ -799,6 +811,8 @@ static void allwinner_sun8i_emac_realize(DeviceState *dev, Error **errp) static Property allwinner_sun8i_emac_properties[] = { DEFINE_NIC_PROPERTIES(AwSun8iEmacState, conf), DEFINE_PROP_UINT8("phy-addr", AwSun8iEmacState, mii_phy_addr, 0), + DEFINE_PROP_LINK("dma-memory", AwSun8iEmacState, dma_mr, + TYPE_MEMORY_REGION, MemoryRegion *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/net/allwinner-sun8i-emac.h b/include/hw/net/allwinner-sun8i-emac.h index eda034e96b..dd1d7b96cd 100644 --- a/include/hw/net/allwinner-sun8i-emac.h +++ b/include/hw/net/allwinner-sun8i-emac.h @@ -49,6 +49,12 @@ typedef struct AwSun8iEmacState { /** Interrupt output signal to notify CPU */ qemu_irq irq; + /** Memory region where DMA transfers are done */ + MemoryRegion *dma_mr; + + /** Address space used internally for DMA transfers */ + AddressSpace dma_as; + /** Generic Network Interface Controller (NIC) for networking API */ NICState *nic; From 31a171cc8bc9d8524469c286c744dabe70f0c924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:45 +0100 Subject: [PATCH 11/35] hw/arm/xilinx_zynq: Uninline cadence_uart_create() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we want to call qdev_connect_clock_in() before the device is realized, we need to uninline cadence_uart_create() first. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Alistair Francis Message-id: 20200803105647.22223-2-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/xilinx_zynq.c | 14 ++++++++++++-- include/hw/char/cadence_uart.h | 17 ----------------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c index 32aa7323d9..cf6d9757b5 100644 --- a/hw/arm/xilinx_zynq.c +++ b/hw/arm/xilinx_zynq.c @@ -254,10 +254,20 @@ static void zynq_init(MachineState *machine) sysbus_create_simple(TYPE_CHIPIDEA, 0xE0002000, pic[53 - IRQ_OFFSET]); sysbus_create_simple(TYPE_CHIPIDEA, 0xE0003000, pic[76 - IRQ_OFFSET]); - dev = cadence_uart_create(0xE0000000, pic[59 - IRQ_OFFSET], serial_hd(0)); + dev = qdev_new(TYPE_CADENCE_UART); + busdev = SYS_BUS_DEVICE(dev); + qdev_prop_set_chr(dev, "chardev", serial_hd(0)); + sysbus_realize_and_unref(busdev, &error_fatal); + sysbus_mmio_map(busdev, 0, 0xE0000000); + sysbus_connect_irq(busdev, 0, pic[59 - IRQ_OFFSET]); qdev_connect_clock_in(dev, "refclk", qdev_get_clock_out(slcr, "uart0_ref_clk")); - dev = cadence_uart_create(0xE0001000, pic[82 - IRQ_OFFSET], serial_hd(1)); + dev = qdev_new(TYPE_CADENCE_UART); + busdev = SYS_BUS_DEVICE(dev); + qdev_prop_set_chr(dev, "chardev", serial_hd(1)); + sysbus_realize_and_unref(busdev, &error_fatal); + sysbus_mmio_map(busdev, 0, 0xE0001000); + sysbus_connect_irq(busdev, 0, pic[82 - IRQ_OFFSET]); qdev_connect_clock_in(dev, "refclk", qdev_get_clock_out(slcr, "uart1_ref_clk")); diff --git a/include/hw/char/cadence_uart.h b/include/hw/char/cadence_uart.h index ed7b58d31d..dabc49ea4f 100644 --- a/include/hw/char/cadence_uart.h +++ b/include/hw/char/cadence_uart.h @@ -53,21 +53,4 @@ typedef struct { Clock *refclk; } CadenceUARTState; -static inline DeviceState *cadence_uart_create(hwaddr addr, - qemu_irq irq, - Chardev *chr) -{ - DeviceState *dev; - SysBusDevice *s; - - dev = qdev_new(TYPE_CADENCE_UART); - s = SYS_BUS_DEVICE(dev); - qdev_prop_set_chr(dev, "chardev", chr); - sysbus_realize_and_unref(s, &error_fatal); - sysbus_mmio_map(s, 0, addr); - sysbus_connect_irq(s, 0, irq); - - return dev; -} - #endif From 3ab928789537b8eaf4102ca4602fcd1630b1ec5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:45 +0100 Subject: [PATCH 12/35] hw/arm/xilinx_zynq: Call qdev_connect_clock_in() before DeviceRealize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clock canonical name is set in device_set_realized (see the block added to hw/core/qdev.c in commit 0e6934f264). If we connect a clock after the device is realized, this code is not executed. This is currently not a problem as this name is only used for trace events, however this disrupt tracing. Fix by calling qdev_connect_clock_in() before realizing. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Alistair Francis Message-id: 20200803105647.22223-3-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/xilinx_zynq.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c index cf6d9757b5..969ef0727c 100644 --- a/hw/arm/xilinx_zynq.c +++ b/hw/arm/xilinx_zynq.c @@ -222,18 +222,18 @@ static void zynq_init(MachineState *machine) 1, 0x0066, 0x0022, 0x0000, 0x0000, 0x0555, 0x2aa, 0); - /* Create slcr, keep a pointer to connect clocks */ - slcr = qdev_new("xilinx,zynq_slcr"); - sysbus_realize_and_unref(SYS_BUS_DEVICE(slcr), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(slcr), 0, 0xF8000000); - /* Create the main clock source, and feed slcr with it */ zynq_machine->ps_clk = CLOCK(object_new(TYPE_CLOCK)); object_property_add_child(OBJECT(zynq_machine), "ps_clk", OBJECT(zynq_machine->ps_clk)); object_unref(OBJECT(zynq_machine->ps_clk)); clock_set_hz(zynq_machine->ps_clk, PS_CLK_FREQUENCY); + + /* Create slcr, keep a pointer to connect clocks */ + slcr = qdev_new("xilinx,zynq_slcr"); qdev_connect_clock_in(slcr, "ps_clk", zynq_machine->ps_clk); + sysbus_realize_and_unref(SYS_BUS_DEVICE(slcr), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(slcr), 0, 0xF8000000); dev = qdev_new(TYPE_A9MPCORE_PRIV); qdev_prop_set_uint32(dev, "num-cpu", 1); @@ -257,19 +257,19 @@ static void zynq_init(MachineState *machine) dev = qdev_new(TYPE_CADENCE_UART); busdev = SYS_BUS_DEVICE(dev); qdev_prop_set_chr(dev, "chardev", serial_hd(0)); + qdev_connect_clock_in(dev, "refclk", + qdev_get_clock_out(slcr, "uart0_ref_clk")); sysbus_realize_and_unref(busdev, &error_fatal); sysbus_mmio_map(busdev, 0, 0xE0000000); sysbus_connect_irq(busdev, 0, pic[59 - IRQ_OFFSET]); - qdev_connect_clock_in(dev, "refclk", - qdev_get_clock_out(slcr, "uart0_ref_clk")); dev = qdev_new(TYPE_CADENCE_UART); busdev = SYS_BUS_DEVICE(dev); qdev_prop_set_chr(dev, "chardev", serial_hd(1)); + qdev_connect_clock_in(dev, "refclk", + qdev_get_clock_out(slcr, "uart1_ref_clk")); sysbus_realize_and_unref(busdev, &error_fatal); sysbus_mmio_map(busdev, 0, 0xE0001000); sysbus_connect_irq(busdev, 0, pic[82 - IRQ_OFFSET]); - qdev_connect_clock_in(dev, "refclk", - qdev_get_clock_out(slcr, "uart1_ref_clk")); sysbus_create_varargs("cadence_ttc", 0xF8001000, pic[42-IRQ_OFFSET], pic[43-IRQ_OFFSET], pic[44-IRQ_OFFSET], NULL); From f129360ca15723e43a8c44d1a2f025c0df1270cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:46 +0100 Subject: [PATCH 13/35] hw/qdev-clock: Uninline qdev_connect_clock_in() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to assert the device is not realized. To avoid overloading this header including "hw/qdev-core.h", uninline the function first. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Alistair Francis Message-id: 20200803105647.22223-4-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/core/qdev-clock.c | 5 +++++ include/hw/qdev-clock.h | 6 +----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/hw/core/qdev-clock.c b/hw/core/qdev-clock.c index 5cc1e82e51..f139b68b88 100644 --- a/hw/core/qdev-clock.c +++ b/hw/core/qdev-clock.c @@ -183,3 +183,8 @@ Clock *qdev_alias_clock(DeviceState *dev, const char *name, return ncl->clock; } + +void qdev_connect_clock_in(DeviceState *dev, const char *name, Clock *source) +{ + clock_set_source(qdev_get_clock_in(dev, name), source); +} diff --git a/include/hw/qdev-clock.h b/include/hw/qdev-clock.h index a340f65ff9..a897f7c9d0 100644 --- a/include/hw/qdev-clock.h +++ b/include/hw/qdev-clock.h @@ -71,11 +71,7 @@ Clock *qdev_get_clock_out(DeviceState *dev, const char *name); * Set the source clock of input clock @name of device @dev to @source. * @source period update will be propagated to @name clock. */ -static inline void qdev_connect_clock_in(DeviceState *dev, const char *name, - Clock *source) -{ - clock_set_source(qdev_get_clock_in(dev, name), source); -} +void qdev_connect_clock_in(DeviceState *dev, const char *name, Clock *source); /** * qdev_alias_clock: From 739fa3255492f1c7541db1c7a9795fcf4b472c91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:46 +0100 Subject: [PATCH 14/35] hw/qdev-clock: Avoid calling qdev_connect_clock_in after DeviceRealize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clock canonical name is set in device_set_realized (see the block added to hw/core/qdev.c in commit 0e6934f264). If we connect a clock after the device is realized, this code is not executed. This is currently not a problem as this name is only used for trace events, however this disrupt tracing. Add a comment to document qdev_connect_clock_in() must be called before the device is realized, and assert this condition. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Alistair Francis Message-id: 20200803105647.22223-5-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/core/qdev-clock.c | 1 + include/hw/qdev-clock.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/hw/core/qdev-clock.c b/hw/core/qdev-clock.c index f139b68b88..47ecb5b4fa 100644 --- a/hw/core/qdev-clock.c +++ b/hw/core/qdev-clock.c @@ -186,5 +186,6 @@ Clock *qdev_alias_clock(DeviceState *dev, const char *name, void qdev_connect_clock_in(DeviceState *dev, const char *name, Clock *source) { + assert(!dev->realized); clock_set_source(qdev_get_clock_in(dev, name), source); } diff --git a/include/hw/qdev-clock.h b/include/hw/qdev-clock.h index a897f7c9d0..64ca4d266f 100644 --- a/include/hw/qdev-clock.h +++ b/include/hw/qdev-clock.h @@ -70,6 +70,8 @@ Clock *qdev_get_clock_out(DeviceState *dev, const char *name); * * Set the source clock of input clock @name of device @dev to @source. * @source period update will be propagated to @name clock. + * + * Must be called before @dev is realized. */ void qdev_connect_clock_in(DeviceState *dev, const char *name, Clock *source); From 68274b945e9ab4369e97baa010a700f8056c8113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:46 +0100 Subject: [PATCH 15/35] hw/misc/unimp: Display value after offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To better align the read/write accesses, display the value after the offset (read accesses only display the offset). Reviewed-by: Richard Henderson Signed-off-by: Philippe Mathieu-Daudé Message-id: 20200812190206.31595-2-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/misc/unimp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/misc/unimp.c b/hw/misc/unimp.c index bc4084d344..ee2e536c8b 100644 --- a/hw/misc/unimp.c +++ b/hw/misc/unimp.c @@ -22,7 +22,7 @@ static uint64_t unimp_read(void *opaque, hwaddr offset, unsigned size) { UnimplementedDeviceState *s = UNIMPLEMENTED_DEVICE(opaque); - qemu_log_mask(LOG_UNIMP, "%s: unimplemented device read " + qemu_log_mask(LOG_UNIMP, "%s: unimplemented device read " "(size %d, offset 0x%" HWADDR_PRIx ")\n", s->name, size, offset); return 0; @@ -34,9 +34,9 @@ static void unimp_write(void *opaque, hwaddr offset, UnimplementedDeviceState *s = UNIMPLEMENTED_DEVICE(opaque); qemu_log_mask(LOG_UNIMP, "%s: unimplemented device write " - "(size %d, value 0x%" PRIx64 - ", offset 0x%" HWADDR_PRIx ")\n", - s->name, size, value, offset); + "(size %d, offset 0x%" HWADDR_PRIx + ", value 0x%" PRIx64 ")\n", + s->name, size, offset, value); } static const MemoryRegionOps unimp_ops = { From a12b4c53cbf4d5e75f0e88d624c196d8b71256f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:46 +0100 Subject: [PATCH 16/35] hw/misc/unimp: Display the value with width of the access size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To quickly notice the access size, display the value with the width of the access (i.e. 16-bit access is displayed 0x0000, while 8-bit access 0x00). Reviewed-by: Richard Henderson Signed-off-by: Philippe Mathieu-Daudé Message-id: 20200812190206.31595-3-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/misc/unimp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/misc/unimp.c b/hw/misc/unimp.c index ee2e536c8b..b4b318db1c 100644 --- a/hw/misc/unimp.c +++ b/hw/misc/unimp.c @@ -35,8 +35,8 @@ static void unimp_write(void *opaque, hwaddr offset, qemu_log_mask(LOG_UNIMP, "%s: unimplemented device write " "(size %d, offset 0x%" HWADDR_PRIx - ", value 0x%" PRIx64 ")\n", - s->name, size, offset, value); + ", value 0x%0*" PRIx64 ")\n", + s->name, size, offset, size << 1, value); } static const MemoryRegionOps unimp_ops = { From 55d35c881924ce5a8ce410210865e47553762847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 28 Aug 2020 10:02:46 +0100 Subject: [PATCH 17/35] hw/misc/unimp: Display the offset with width of the region size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To have a better idea of how big is the region where the offset belongs, display the value with the width of the region size (i.e. a region of 0x1000 bytes uses 0x000 format). Reviewed-by: Richard Henderson Signed-off-by: Philippe Mathieu-Daudé Message-id: 20200812190206.31595-4-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/misc/unimp.c | 10 ++++++---- include/hw/misc/unimp.h | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hw/misc/unimp.c b/hw/misc/unimp.c index b4b318db1c..6cfc5727f0 100644 --- a/hw/misc/unimp.c +++ b/hw/misc/unimp.c @@ -23,8 +23,8 @@ static uint64_t unimp_read(void *opaque, hwaddr offset, unsigned size) UnimplementedDeviceState *s = UNIMPLEMENTED_DEVICE(opaque); qemu_log_mask(LOG_UNIMP, "%s: unimplemented device read " - "(size %d, offset 0x%" HWADDR_PRIx ")\n", - s->name, size, offset); + "(size %d, offset 0x%0*" HWADDR_PRIx ")\n", + s->name, size, s->offset_fmt_width, offset); return 0; } @@ -34,9 +34,9 @@ static void unimp_write(void *opaque, hwaddr offset, UnimplementedDeviceState *s = UNIMPLEMENTED_DEVICE(opaque); qemu_log_mask(LOG_UNIMP, "%s: unimplemented device write " - "(size %d, offset 0x%" HWADDR_PRIx + "(size %d, offset 0x%0*" HWADDR_PRIx ", value 0x%0*" PRIx64 ")\n", - s->name, size, offset, size << 1, value); + s->name, size, s->offset_fmt_width, offset, size << 1, value); } static const MemoryRegionOps unimp_ops = { @@ -63,6 +63,8 @@ static void unimp_realize(DeviceState *dev, Error **errp) return; } + s->offset_fmt_width = DIV_ROUND_UP(64 - clz64(s->size - 1), 4); + memory_region_init_io(&s->iomem, OBJECT(s), &unimp_ops, s, s->name, s->size); sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); diff --git a/include/hw/misc/unimp.h b/include/hw/misc/unimp.h index 4c1d13c9bf..c63968a2cd 100644 --- a/include/hw/misc/unimp.h +++ b/include/hw/misc/unimp.h @@ -20,6 +20,7 @@ typedef struct { SysBusDevice parent_obj; MemoryRegion iomem; + unsigned offset_fmt_width; char *name; uint64_t size; } UnimplementedDeviceState; From 512c65e62e9e1ae9863ae5a8493e9fad9dbf00e7 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 28 Aug 2020 10:02:47 +0100 Subject: [PATCH 18/35] armsse: Define ARMSSEClass correctly TYPE_ARM_SSE is a TYPE_SYS_BUS_DEVICE subclass, but ARMSSEClass::parent_class is declared as DeviceClass. It never caused any problems by pure luck: We were not setting class_size for TYPE_ARM_SSE, so class_size of TYPE_SYS_BUS_DEVICE was being used (sizeof(SysBusDeviceClass)). This made the system allocate enough memory for TYPE_ARM_SSE devices even though ARMSSEClass was too small for a sysbus device. Additionally, the ARMSSEClass::info field ended up at the same offset as SysBusDeviceClass::explicit_ofw_unit_address. This would make sysbus_get_fw_dev_path() crash for the device. Luckily, sysbus_get_fw_dev_path() never gets called for TYPE_ARM_SSE devices, because qdev_get_fw_dev_path() is only used by the boot device code, and TYPE_ARM_SSE devices don't appear at the fw_boot_order list. Signed-off-by: Eduardo Habkost Message-id: 20200826181006.4097163-1-ehabkost@redhat.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/arm/armsse.c | 1 + include/hw/arm/armsse.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c index dcbff9bd8f..6381bbd94d 100644 --- a/hw/arm/armsse.c +++ b/hw/arm/armsse.c @@ -1160,6 +1160,7 @@ static const TypeInfo armsse_info = { .name = TYPE_ARMSSE, .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(ARMSSE), + .class_size = sizeof(ARMSSEClass), .instance_init = armsse_init, .abstract = true, .interfaces = (InterfaceInfo[]) { diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h index 84080c2299..b10173beab 100644 --- a/include/hw/arm/armsse.h +++ b/include/hw/arm/armsse.h @@ -220,7 +220,7 @@ typedef struct ARMSSE { typedef struct ARMSSEInfo ARMSSEInfo; typedef struct ARMSSEClass { - DeviceClass parent_class; + SysBusDeviceClass parent_class; const ARMSSEInfo *info; } ARMSSEClass; From 5be4dd043f5beb5e7587d1ef8dd4e3716ec05639 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:47 +0100 Subject: [PATCH 19/35] qemu/int128: Add int128_lshift Add left-shift to match the existing right-shift. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- include/qemu/int128.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/qemu/int128.h b/include/qemu/int128.h index 5c9890db8b..76ea405922 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -63,6 +63,11 @@ static inline Int128 int128_rshift(Int128 a, int n) return a >> n; } +static inline Int128 int128_lshift(Int128 a, int n) +{ + return a << n; +} + static inline Int128 int128_add(Int128 a, Int128 b) { return a + b; @@ -217,6 +222,17 @@ static inline Int128 int128_rshift(Int128 a, int n) } } +static inline Int128 int128_lshift(Int128 a, int n) +{ + uint64_t l = a.lo << (n & 63); + if (n >= 64) { + return int128_make128(0, l); + } else if (n > 0) { + return int128_make128(l, (a.hi << n) | (a.lo >> (64 - n))); + } + return a; +} + static inline Int128 int128_add(Int128 a, Int128 b) { uint64_t lo = a.lo + b.lo; From f7d79c41fa4bd0f0d27dcd14babab8575fbed39f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:47 +0100 Subject: [PATCH 20/35] target/arm: Split out gen_gvec_fn_zz Model the new function on gen_gvec_fn2 in translate-a64.c, but indicating which kind of register and in which order. Since there is only one user of do_vector2_z, fold it into do_mov_z. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index d97cb37d83..f1803eb72b 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -143,15 +143,13 @@ static int pred_gvec_reg_size(DisasContext *s) } /* Invoke a vector expander on two Zregs. */ -static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn, - int esz, int rd, int rn) + +static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn, + int esz, int rd, int rn) { - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - gvec_fn(esz, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), vsz, vsz); - } - return true; + unsigned vsz = vec_full_reg_size(s); + gvec_fn(esz, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), vsz, vsz); } /* Invoke a vector expander on three Zregs. */ @@ -170,7 +168,10 @@ static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn, /* Invoke a vector move on two Zregs. */ static bool do_mov_z(DisasContext *s, int rd, int rn) { - return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn); + if (sve_access_check(s)) { + gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn); + } + return true; } /* Initialize a Zreg with replications of a 64-bit immediate. */ From 28c4da31be6a5e501b60b77bac17652dd3211378 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:47 +0100 Subject: [PATCH 21/35] target/arm: Split out gen_gvec_fn_zzz, do_zzz_fn Model gen_gvec_fn_zzz on gen_gvec_fn3 in translate-a64.c, but indicating which kind of register and in which order. Model do_zzz_fn on the other do_foo functions that take an argument set and verify sve enabled. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 43 +++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index f1803eb72b..9a3d060c05 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -153,16 +153,13 @@ static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn, } /* Invoke a vector expander on three Zregs. */ -static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn, - int esz, int rd, int rn, int rm) +static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, + int esz, int rd, int rn, int rm) { - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - gvec_fn(esz, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), vsz, vsz); - } - return true; + unsigned vsz = vec_full_reg_size(s); + gvec_fn(esz, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), vsz, vsz); } /* Invoke a vector move on two Zregs. */ @@ -274,24 +271,32 @@ const uint64_t pred_esz_masks[4] = { *** SVE Logical - Unpredicated Group */ +static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn) +{ + if (sve_access_check(s)) { + gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm); + } + return true; +} + static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_and); } static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_or); } static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_xor); } static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_andc); } /* @@ -300,32 +305,32 @@ static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a) static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_add); } static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_sub); } static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_ssadd); } static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_sssub); } static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_usadd); } static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_ussub); } /* From 8a40fe5f1bf3837ae3f9961efe1d51e7214f2664 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:47 +0100 Subject: [PATCH 22/35] target/arm: Rearrange {sve,fp}_check_access assert We want to ensure that access is checked by the time we ask for a specific fp/vector register. We want to ensure that we do not emit two lots of code to raise an exception. But sometimes it's difficult to cleanly organize the code such that we never pass through sve_check_access exactly once. Allow multiple calls so long as the result is true, that is, no exception to be raised. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-5-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 27 ++++++++++++++++----------- target/arm/translate.h | 1 + 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 0fc5e12fab..115dc946e7 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -1156,18 +1156,18 @@ static void do_vec_ld(DisasContext *s, int destidx, int element, * unallocated-encoding checks (otherwise the syndrome information * for the resulting exception will be incorrect). */ -static inline bool fp_access_check(DisasContext *s) +static bool fp_access_check(DisasContext *s) { - assert(!s->fp_access_checked); - s->fp_access_checked = true; + if (s->fp_excp_el) { + assert(!s->fp_access_checked); + s->fp_access_checked = true; - if (!s->fp_excp_el) { - return true; + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, + syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + return false; } - - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); - return false; + s->fp_access_checked = true; + return true; } /* Check that SVE access is enabled. If it is, return true. @@ -1176,10 +1176,14 @@ static inline bool fp_access_check(DisasContext *s) bool sve_access_check(DisasContext *s) { if (s->sve_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(), - s->sve_excp_el); + assert(!s->sve_access_checked); + s->sve_access_checked = true; + + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, + syn_sve_access_trap(), s->sve_excp_el); return false; } + s->sve_access_checked = true; return fp_access_check(s); } @@ -14529,6 +14533,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) s->base.pc_next += 4; s->fp_access_checked = false; + s->sve_access_checked = false; if (dc_isar_feature(aa64_bti, s)) { if (s->base.num_insns == 1) { diff --git a/target/arm/translate.h b/target/arm/translate.h index 6d6d4c0f42..423b0e08df 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -64,6 +64,7 @@ typedef struct DisasContext { * that it is set at the point where we actually touch the FP regs. */ bool fp_access_checked; + bool sve_access_checked; /* ARMv8 single-step state (this is distinct from the QEMU gdbstub * single-step support). */ From d0b2df5a01eeccbac71d4d883158b91e7f9a6a29 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:48 +0100 Subject: [PATCH 23/35] target/arm: Merge do_vector2_p into do_mov_p This is the only user of the function. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-6-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 9a3d060c05..6dac70359b 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -178,18 +178,6 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word) tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); } -/* Invoke a vector expander on two Pregs. */ -static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn, - int esz, int rd, int rn) -{ - if (sve_access_check(s)) { - unsigned psz = pred_gvec_reg_size(s); - gvec_fn(esz, pred_full_reg_offset(s, rd), - pred_full_reg_offset(s, rn), psz, psz); - } - return true; -} - /* Invoke a vector expander on three Pregs. */ static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn, int esz, int rd, int rn, int rm) @@ -221,7 +209,12 @@ static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op, /* Invoke a vector move on two Pregs. */ static bool do_mov_p(DisasContext *s, int rd, int rn) { - return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn); + if (sve_access_check(s)) { + unsigned psz = pred_gvec_reg_size(s); + tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), + pred_full_reg_offset(s, rn), psz, psz); + } + return true; } /* Set the cpu flags as per a return from an SVE helper. */ From dd81a8d7cf5c90963603806e58a217bbe759f75e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:48 +0100 Subject: [PATCH 24/35] target/arm: Clean up 4-operand predicate expansion Move the check for !S into do_pppp_flags, which allows to merge in do_vecop4_p. Split out gen_gvec_fn_ppp without sve_access_check, to mirror gen_gvec_fn_zzz. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-7-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 111 ++++++++++++++----------------------- 1 file changed, 43 insertions(+), 68 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 6dac70359b..5dfc129e73 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -179,31 +179,13 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word) } /* Invoke a vector expander on three Pregs. */ -static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn, - int esz, int rd, int rn, int rm) +static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, + int rd, int rn, int rm) { - if (sve_access_check(s)) { - unsigned psz = pred_gvec_reg_size(s); - gvec_fn(esz, pred_full_reg_offset(s, rd), - pred_full_reg_offset(s, rn), - pred_full_reg_offset(s, rm), psz, psz); - } - return true; -} - -/* Invoke a vector operation on four Pregs. */ -static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op, - int rd, int rn, int rm, int rg) -{ - if (sve_access_check(s)) { - unsigned psz = pred_gvec_reg_size(s); - tcg_gen_gvec_4(pred_full_reg_offset(s, rd), - pred_full_reg_offset(s, rn), - pred_full_reg_offset(s, rm), - pred_full_reg_offset(s, rg), - psz, psz, gvec_op); - } - return true; + unsigned psz = pred_gvec_reg_size(s); + gvec_fn(MO_64, pred_full_reg_offset(s, rd), + pred_full_reg_offset(s, rn), + pred_full_reg_offset(s, rm), psz, psz); } /* Invoke a vector move on two Pregs. */ @@ -1067,6 +1049,11 @@ static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, int mofs = pred_full_reg_offset(s, a->rm); int gofs = pred_full_reg_offset(s, a->pg); + if (!a->s) { + tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); + return true; + } + if (psz == 8) { /* Do the operation and the flags generation in temps. */ TCGv_i64 pd = tcg_temp_new_i64(); @@ -1126,19 +1113,24 @@ static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_and_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else if (a->rn == a->rm) { - if (a->pg == a->rn) { - return do_mov_p(s, a->rd, a->rn); - } else { - return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg); + + if (!a->s) { + if (!sve_access_check(s)) { + return true; + } + if (a->rn == a->rm) { + if (a->pg == a->rn) { + do_mov_p(s, a->rd, a->rn); + } else { + gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); + } + return true; + } else if (a->pg == a->rn || a->pg == a->rm) { + gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); + return true; } - } else if (a->pg == a->rn || a->pg == a->rm) { - return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); } + return do_pppp_flags(s, a, &op); } static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1162,13 +1154,14 @@ static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_bic_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else if (a->pg == a->rn) { - return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); + + if (!a->s && a->pg == a->rn) { + if (sve_access_check(s)) { + gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); + } + return true; } + return do_pppp_flags(s, a, &op); } static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1192,11 +1185,7 @@ static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_eor_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } + return do_pppp_flags(s, a, &op); } static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1222,11 +1211,11 @@ static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_sel_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; + if (a->s) { return false; - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); } + return do_pppp_flags(s, a, &op); } static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1250,13 +1239,11 @@ static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_orr_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else if (a->pg == a->rn && a->rn == a->rm) { + + if (!a->s && a->pg == a->rn && a->rn == a->rm) { return do_mov_p(s, a->rd, a->rn); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); } + return do_pppp_flags(s, a, &op); } static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1280,11 +1267,7 @@ static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_orn_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } + return do_pppp_flags(s, a, &op); } static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1308,11 +1291,7 @@ static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_nor_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } + return do_pppp_flags(s, a, &op); } static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1336,11 +1315,7 @@ static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_nand_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } + return do_pppp_flags(s, a, &op); } /* From d4bc623254b55e2f9613c9450216fa7e50c03929 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:48 +0100 Subject: [PATCH 25/35] target/arm: Use tcg_gen_gvec_bitsel for trans_SEL_pppp The gvec operation was added after the initial implementation of the SEL instruction and was missed in the conversion. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-8-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 5dfc129e73..a747fff01f 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -1188,34 +1188,19 @@ static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) return do_pppp_flags(s, a, &op); } -static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) -{ - tcg_gen_and_i64(pn, pn, pg); - tcg_gen_andc_i64(pm, pm, pg); - tcg_gen_or_i64(pd, pn, pm); -} - -static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, - TCGv_vec pm, TCGv_vec pg) -{ - tcg_gen_and_vec(vece, pn, pn, pg); - tcg_gen_andc_vec(vece, pm, pm, pg); - tcg_gen_or_vec(vece, pd, pn, pm); -} - static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) { - static const GVecGen4 op = { - .fni8 = gen_sel_pg_i64, - .fniv = gen_sel_pg_vec, - .fno = gen_helper_sve_sel_pppp, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - }; - if (a->s) { return false; } - return do_pppp_flags(s, a, &op); + if (sve_access_check(s)) { + unsigned psz = pred_gvec_reg_size(s); + tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), + pred_full_reg_offset(s, a->pg), + pred_full_reg_offset(s, a->rn), + pred_full_reg_offset(s, a->rm), psz, psz); + } + return true; } static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) From 36cbb7a8e7100864c488a1153cecba90b1c33a4c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:48 +0100 Subject: [PATCH 26/35] target/arm: Split out gen_gvec_ool_zzzp Model after gen_gvec_fn_zzz et al. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-9-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index a747fff01f..96bfd32bbe 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -142,8 +142,19 @@ static int pred_gvec_reg_size(DisasContext *s) return size_for_gvec(pred_full_reg_size(s)); } -/* Invoke a vector expander on two Zregs. */ +/* Invoke an out-of-line helper on 3 Zregs and a predicate. */ +static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, + int rd, int rn, int rm, int pg, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + pred_full_reg_offset(s, pg), + vsz, vsz, data, fn); +} +/* Invoke a vector expander on two Zregs. */ static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn, int esz, int rd, int rn) { @@ -314,16 +325,11 @@ static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a) static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn) { - unsigned vsz = vec_full_reg_size(s); if (fn == NULL) { return false; } if (sve_access_check(s)) { - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - pred_full_reg_offset(s, a->pg), - vsz, vsz, 0, fn); + gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0); } return true; } @@ -337,12 +343,7 @@ static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d }; - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); + gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); } #define DO_ZPZZ(NAME, name) \ @@ -2704,12 +2705,8 @@ static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a) static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - pred_full_reg_offset(s, a->pg), - vsz, vsz, a->esz, gen_helper_sve_splice); + gen_gvec_ool_zzzp(s, gen_helper_sve_splice, + a->rd, a->rn, a->rm, a->pg, 0); } return true; } From 602459969c3b5a96ac970bd4698f60e6edb23bb0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:48 +0100 Subject: [PATCH 27/35] target/arm: Merge helper_sve_clr_* and helper_sve_movz_* The existing clr functions have only one vector argument, and so can only clear in place. The existing movz functions have two vector arguments, and so can clear while moving. Merge them, with a flag that controls the sense of active vs inactive elements being cleared. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-10-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-sve.h | 5 --- target/arm/sve_helper.c | 70 ++++++++------------------------------ target/arm/translate-sve.c | 53 +++++++++++------------------ 3 files changed, 34 insertions(+), 94 deletions(-) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 63c4a087ca..4411c47120 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -269,11 +269,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 382fa82bc8..4758d46f34 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -956,85 +956,43 @@ uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc) return flags; } -/* Store zero into every active element of Zd. We will use this for two - * and three-operand predicated instructions for which logic dictates a - * zero result. In particular, logical shift by element size, which is - * otherwise undefined on the host. - * - * For element sizes smaller than uint64_t, we use tables to expand - * the N bits of the controlling predicate to a byte mask, and clear - * those bytes. +/* + * Copy Zn into Zd, and store zero into inactive elements. + * If inv, store zeros into the active elements. */ -void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_b(pg[H1(i)]); - } -} - -void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_h(pg[H1(i)]); - } -} - -void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_s(pg[H1(i)]); - } -} - -void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - if (pg[H1(i)] & 1) { - d[i] = 0; - } - } -} - -/* Copy Zn into Zd, and store zero into inactive elements. */ void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_b(pg[H1(i)]); + d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv); } } void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_h(pg[H1(i)]); + d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv); } } void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_s(pg[H1(i)]); + d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv); } } @@ -1043,8 +1001,10 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc) intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + uint8_t inv = simd_data(desc); + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1); + d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1); } } diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 96bfd32bbe..aba156d5ab 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -590,37 +590,26 @@ static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a) *** SVE Shift by Immediate - Predicated Group */ -/* Store zero into every active element of Zd. We will use this for two - * and three-operand predicated instructions for which logic dictates a - * zero result. +/* + * Copy Zn into Zd, storing zeros into inactive elements. + * If invert, store zeros into the active elements. */ -static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz) -{ - static gen_helper_gvec_2 * const fns[4] = { - gen_helper_sve_clr_b, gen_helper_sve_clr_h, - gen_helper_sve_clr_s, gen_helper_sve_clr_d, - }; - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); - } - return true; -} - -/* Copy Zn into Zd, storing zeros into inactive elements. */ -static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz) +static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, + int esz, bool invert) { static gen_helper_gvec_3 * const fns[4] = { gen_helper_sve_movz_b, gen_helper_sve_movz_h, gen_helper_sve_movz_s, gen_helper_sve_movz_d, }; - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); + + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + pred_full_reg_offset(s, pg), + vsz, vsz, invert, fns[esz]); + } + return true; } static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a, @@ -664,7 +653,7 @@ static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For logical shifts, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -682,7 +671,7 @@ static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For logical shifts, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -700,7 +689,7 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For arithmetic right shift for division, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -5049,8 +5038,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) /* Zero the inactive elements. */ gen_set_label(over); - do_movz_zpz(s, a->rd, a->rd, a->pg, esz); - return true; + return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); } static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, @@ -5833,8 +5821,5 @@ static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a) static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a) { - if (sve_access_check(s)) { - do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz); - } - return true; + return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false); } From 96a461f7c12587d3a64a71e4d90cda5c09ca3eb4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:49 +0100 Subject: [PATCH 28/35] target/arm: Split out gen_gvec_ool_zzp Model after gen_gvec_fn_zzz et al. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-11-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index aba156d5ab..6d10e2ac2c 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -142,6 +142,17 @@ static int pred_gvec_reg_size(DisasContext *s) return size_for_gvec(pred_full_reg_size(s)); } +/* Invoke an out-of-line helper on 2 Zregs and a predicate. */ +static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, + int rd, int rn, int pg, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + pred_full_reg_offset(s, pg), + vsz, vsz, data, fn); +} + /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, int rd, int rn, int rm, int pg, int data) @@ -415,11 +426,7 @@ static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - pred_full_reg_offset(s, a->pg), - vsz, vsz, 0, fn); + gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0); } return true; } @@ -603,11 +610,7 @@ static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - pred_full_reg_offset(s, pg), - vsz, vsz, invert, fns[esz]); + gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); } return true; } @@ -616,11 +619,7 @@ static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a, gen_helper_gvec_3 *fn) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - pred_full_reg_offset(s, a->pg), - vsz, vsz, a->imm, fn); + gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); } return true; } From e645d1a17a359156c6047006d760ca176d493edb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:49 +0100 Subject: [PATCH 29/35] target/arm: Split out gen_gvec_ool_zzz Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-12-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 53 +++++++++++++------------------------- 1 file changed, 18 insertions(+), 35 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 6d10e2ac2c..0d5bcf6c1e 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -142,6 +142,17 @@ static int pred_gvec_reg_size(DisasContext *s) return size_for_gvec(pred_full_reg_size(s)); } +/* Invoke an out-of-line helper on 3 Zregs. */ +static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, + int rd, int rn, int rm, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vsz, vsz, data, fn); +} + /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, int rd, int rn, int pg, int data) @@ -769,11 +780,7 @@ static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fn); + gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0); } return true; } @@ -947,11 +954,7 @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a) static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, a->imm, fn); + gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); } return true; } @@ -1012,11 +1015,7 @@ static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); } return true; } @@ -2067,11 +2066,7 @@ static bool trans_TBL(DisasContext *s, arg_rrr_esz *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); } return true; } @@ -2244,11 +2239,7 @@ static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data, gen_helper_gvec_3 *fn) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, data, fn); + gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); } return true; } @@ -3373,11 +3364,7 @@ static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fns[a->u][a->sz]); + gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0); } return true; } @@ -3390,11 +3377,7 @@ static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, a->index, fns[a->u][a->sz]); + gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index); } return true; } From 40e32e5a8a379baf6e0d49d83cf19950cfbaf96b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:49 +0100 Subject: [PATCH 30/35] target/arm: Split out gen_gvec_ool_zz Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-13-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 0d5bcf6c1e..15ad6c7d32 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -142,6 +142,16 @@ static int pred_gvec_reg_size(DisasContext *s) return size_for_gvec(pred_full_reg_size(s)); } +/* Invoke an out-of-line helper on 2 Zregs. */ +static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, + int rd, int rn, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vsz, vsz, data, fn); +} + /* Invoke an out-of-line helper on 3 Zregs. */ static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, int rd, int rn, int rm, int data) @@ -995,10 +1005,7 @@ static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0); } return true; } @@ -2050,10 +2057,7 @@ static bool trans_REV_v(DisasContext *s, arg_rr_esz *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0); } return true; } From 830d1a5a05619b0d407040ab8418018076765249 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:49 +0100 Subject: [PATCH 31/35] target/arm: Tidy SVE tszimm shift formats Rather than require the user to fill in the immediate (shl or shr), create full formats that include the immediate. Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-14-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/sve.decode | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/target/arm/sve.decode b/target/arm/sve.decode index 4f580a25e7..6425396ac1 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -150,13 +150,17 @@ @rd_rn_i6 ........ ... rn:5 ..... imm:s6 rd:5 &rri # Two register operand, one immediate operand, with predicate, -# element size encoded as TSZHL. User must fill in imm. -@rdn_pg_tszimm ........ .. ... ... ... pg:3 ..... rd:5 \ - &rpri_esz rn=%reg_movprfx esz=%tszimm_esz +# element size encoded as TSZHL. +@rdn_pg_tszimm_shl ........ .. ... ... ... pg:3 ..... rd:5 \ + &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shl +@rdn_pg_tszimm_shr ........ .. ... ... ... pg:3 ..... rd:5 \ + &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shr # Similarly without predicate. -@rd_rn_tszimm ........ .. ... ... ...... rn:5 rd:5 \ - &rri_esz esz=%tszimm16_esz +@rd_rn_tszimm_shl ........ .. ... ... ...... rn:5 rd:5 \ + &rri_esz esz=%tszimm16_esz imm=%tszimm16_shl +@rd_rn_tszimm_shr ........ .. ... ... ...... rn:5 rd:5 \ + &rri_esz esz=%tszimm16_esz imm=%tszimm16_shr # Two register operand, one immediate operand, with 4-bit predicate. # User must fill in imm. @@ -289,14 +293,10 @@ UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn ### SVE Shift by Immediate - Predicated Group # SVE bitwise shift by immediate (predicated) -ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shr -LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shr -LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shl -ASRD 00000100 .. 000 100 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shr +ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... @rdn_pg_tszimm_shr +LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... @rdn_pg_tszimm_shr +LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... @rdn_pg_tszimm_shl +ASRD 00000100 .. 000 100 100 ... .. ... ..... @rdn_pg_tszimm_shr # SVE bitwise shift by vector (predicated) ASR_zpzz 00000100 .. 010 000 100 ... ..... ..... @rdn_pg_rm @@ -400,12 +400,9 @@ RDVL 00000100 101 11111 01010 imm:s6 rd:5 ### SVE Bitwise Shift - Unpredicated Group # SVE bitwise shift by immediate (unpredicated) -ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... \ - @rd_rn_tszimm imm=%tszimm16_shr -LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... \ - @rd_rn_tszimm imm=%tszimm16_shr -LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... \ - @rd_rn_tszimm imm=%tszimm16_shl +ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... @rd_rn_tszimm_shr +LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... @rd_rn_tszimm_shr +LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... @rd_rn_tszimm_shl # SVE bitwise shift by wide elements (unpredicated) # Note esz != 3 From d21798856b227a20a0a41640236af445f4f4aeb0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:49 +0100 Subject: [PATCH 32/35] target/arm: Generalize inl_qrdmlah_* helper functions Unify add/sub helpers and add a parameter for rounding. This will allow saturating non-rounding to reuse this code. Signed-off-by: Richard Henderson [PMM: fixed accidental use of '=' rather than '+=' in do_sqrdmlah_s] Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-15-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/vec_helper.c | 80 +++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 51 deletions(-) diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 7d76412ee0..9f10be03ed 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -37,19 +37,24 @@ #endif /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */ -static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2, - int16_t src3, uint32_t *sat) +static int16_t do_sqrdmlah_h(int16_t src1, int16_t src2, int16_t src3, + bool neg, bool round, uint32_t *sat) { - /* Simplify: + /* + * Simplify: * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16 * = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15 */ int32_t ret = (int32_t)src1 * src2; - ret = ((int32_t)src3 << 15) + ret + (1 << 14); + if (neg) { + ret = -ret; + } + ret += ((int32_t)src3 << 15) + (round << 14); ret >>= 15; + if (ret != (int16_t)ret) { *sat = 1; - ret = (ret < 0 ? -0x8000 : 0x7fff); + ret = (ret < 0 ? INT16_MIN : INT16_MAX); } return ret; } @@ -58,8 +63,9 @@ uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1, uint32_t src2, uint32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat); - uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat); + uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, false, true, sat); + uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16, + false, true, sat); return deposit32(e1, 16, 16, e2); } @@ -73,35 +79,18 @@ void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 2; ++i) { - d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_h(n[i], m[i], d[i], false, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } -/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */ -static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2, - int16_t src3, uint32_t *sat) -{ - /* Similarly, using subtraction: - * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16 - * = ((a3 << 15) - (e1 * e2) + (1 << 14)) >> 15 - */ - int32_t ret = (int32_t)src1 * src2; - ret = ((int32_t)src3 << 15) - ret + (1 << 14); - ret >>= 15; - if (ret != (int16_t)ret) { - *sat = 1; - ret = (ret < 0 ? -0x8000 : 0x7fff); - } - return ret; -} - uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1, uint32_t src2, uint32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat); - uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat); + uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, true, true, sat); + uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16, + true, true, sat); return deposit32(e1, 16, 16, e2); } @@ -115,19 +104,23 @@ void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 2; ++i) { - d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_h(n[i], m[i], d[i], true, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */ -static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2, - int32_t src3, uint32_t *sat) +static int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3, + bool neg, bool round, uint32_t *sat) { /* Simplify similarly to int_qrdmlah_s16 above. */ int64_t ret = (int64_t)src1 * src2; - ret = ((int64_t)src3 << 31) + ret + (1 << 30); + if (neg) { + ret = -ret; + } + ret += ((int64_t)src3 << 31) + (round << 30); ret >>= 31; + if (ret != (int32_t)ret) { *sat = 1; ret = (ret < 0 ? INT32_MIN : INT32_MAX); @@ -139,7 +132,7 @@ uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1, int32_t src2, int32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - return inl_qrdmlah_s32(src1, src2, src3, sat); + return do_sqrdmlah_s(src1, src2, src3, false, true, sat); } void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm, @@ -152,31 +145,16 @@ void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 4; ++i) { - d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_s(n[i], m[i], d[i], false, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } -/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */ -static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2, - int32_t src3, uint32_t *sat) -{ - /* Simplify similarly to int_qrdmlsh_s16 above. */ - int64_t ret = (int64_t)src1 * src2; - ret = ((int64_t)src3 << 31) - ret + (1 << 30); - ret >>= 31; - if (ret != (int32_t)ret) { - *sat = 1; - ret = (ret < 0 ? INT32_MIN : INT32_MAX); - } - return ret; -} - uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1, int32_t src2, int32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - return inl_qrdmlsh_s32(src1, src2, src3, sat); + return do_sqrdmlah_s(src1, src2, src3, true, true, sat); } void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, @@ -189,7 +167,7 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 4; ++i) { - d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_s(n[i], m[i], d[i], true, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } From 2e5a265e6a9e7169c4a3e87db261b2fa92582590 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:50 +0100 Subject: [PATCH 33/35] target/arm: Convert integer multiply (indexed) to gvec for aa64 advsimd Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-19-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.h | 4 ++++ target/arm/translate-a64.c | 16 ++++++++++++++++ target/arm/vec_helper.c | 29 +++++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index 759639a63a..d0573a53c8 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -758,6 +758,10 @@ DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 115dc946e7..abbd6421a2 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -13488,6 +13488,22 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) data, gen_helper_gvec_fmlal_idx_a64); } return; + + case 0x08: /* MUL */ + if (!is_long && !is_scalar) { + static gen_helper_gvec_3 * const fns[3] = { + gen_helper_gvec_mul_idx_h, + gen_helper_gvec_mul_idx_s, + gen_helper_gvec_mul_idx_d, + }; + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + is_q ? 16 : 8, vec_full_reg_size(s), + index, fns[size - 1]); + return; + } + break; } if (size == 3) { diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 9f10be03ed..155b6368cb 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -711,6 +711,27 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) */ #define DO_MUL_IDX(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ + intptr_t idx = simd_data(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ + TYPE mm = m[H(i + idx)]; \ + for (j = 0; j < segment; j++) { \ + d[i + j] = n[i + j] * mm; \ + } \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_MUL_IDX(gvec_mul_idx_h, uint16_t, H2) +DO_MUL_IDX(gvec_mul_idx_s, uint32_t, H4) +DO_MUL_IDX(gvec_mul_idx_d, uint64_t, ) + +#undef DO_MUL_IDX + +#define DO_FMUL_IDX(NAME, TYPE, H) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ @@ -725,11 +746,11 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } -DO_MUL_IDX(gvec_fmul_idx_h, float16, H2) -DO_MUL_IDX(gvec_fmul_idx_s, float32, H4) -DO_MUL_IDX(gvec_fmul_idx_d, float64, ) +DO_FMUL_IDX(gvec_fmul_idx_h, float16, H2) +DO_FMUL_IDX(gvec_fmul_idx_s, float32, H4) +DO_FMUL_IDX(gvec_fmul_idx_d, float64, ) -#undef DO_MUL_IDX +#undef DO_FMUL_IDX #define DO_FMLA_IDX(NAME, TYPE, H) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ From 3607440c4df6498585a570cfc1041e4972b41b56 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:50 +0100 Subject: [PATCH 34/35] target/arm: Convert integer multiply-add (indexed) to gvec for aa64 advsimd Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-20-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.h | 14 ++++++++++++++ target/arm/translate-a64.c | 34 ++++++++++++++++++++++++++++++++++ target/arm/vec_helper.c | 25 +++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/target/arm/helper.h b/target/arm/helper.h index d0573a53c8..378bb1898b 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -762,6 +762,20 @@ DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index abbd6421a2..a50b2f0cb1 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -13504,6 +13504,40 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) return; } break; + + case 0x10: /* MLA */ + if (!is_long && !is_scalar) { + static gen_helper_gvec_4 * const fns[3] = { + gen_helper_gvec_mla_idx_h, + gen_helper_gvec_mla_idx_s, + gen_helper_gvec_mla_idx_d, + }; + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rd), + is_q ? 16 : 8, vec_full_reg_size(s), + index, fns[size - 1]); + return; + } + break; + + case 0x14: /* MLS */ + if (!is_long && !is_scalar) { + static gen_helper_gvec_4 * const fns[3] = { + gen_helper_gvec_mls_idx_h, + gen_helper_gvec_mls_idx_s, + gen_helper_gvec_mls_idx_d, + }; + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rd), + is_q ? 16 : 8, vec_full_reg_size(s), + index, fns[size - 1]); + return; + } + break; } if (size == 3) { diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 155b6368cb..9e93a8a154 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -731,6 +731,31 @@ DO_MUL_IDX(gvec_mul_idx_d, uint64_t, ) #undef DO_MUL_IDX +#define DO_MLA_IDX(NAME, TYPE, OP, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ + intptr_t idx = simd_data(desc); \ + TYPE *d = vd, *n = vn, *m = vm, *a = va; \ + for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ + TYPE mm = m[H(i + idx)]; \ + for (j = 0; j < segment; j++) { \ + d[i + j] = a[i + j] OP n[i + j] * mm; \ + } \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_MLA_IDX(gvec_mla_idx_h, uint16_t, +, H2) +DO_MLA_IDX(gvec_mla_idx_s, uint32_t, +, H4) +DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, ) + +DO_MLA_IDX(gvec_mls_idx_h, uint16_t, -, H2) +DO_MLA_IDX(gvec_mls_idx_s, uint32_t, -, H4) +DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, ) + +#undef DO_MLA_IDX + #define DO_FMUL_IDX(NAME, TYPE, H) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ From ed78849d9711805bda37ee026018d6ee7a606d0e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Aug 2020 10:02:50 +0100 Subject: [PATCH 35/35] target/arm: Convert sq{, r}dmulh to gvec for aa64 advsimd Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200815013145.539409-21-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.h | 10 ++++++++ target/arm/translate-a64.c | 33 ++++++++++++++++++-------- target/arm/vec_helper.c | 48 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 10 deletions(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index 378bb1898b..3ca73a1764 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -776,6 +776,16 @@ DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index a50b2f0cb1..4ba6918b60 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -678,6 +678,20 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, tcg_temp_free_ptr(fpst); } +/* Expand a 3-operand + qc + operation using an out-of-line helper. */ +static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, + int rm, gen_helper_gvec_3_ptr *fn) +{ + TCGv_ptr qc_ptr = tcg_temp_new_ptr(); + + tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), qc_ptr, + is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); + tcg_temp_free_ptr(qc_ptr); +} + /* Set ZF and NF based on a 64 bit result. This is alas fiddlier * than the 32 bit equivalent. */ @@ -11734,6 +11748,15 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); } return; + case 0x16: /* SQDMULH, SQRDMULH */ + { + static gen_helper_gvec_3_ptr * const fns[2][2] = { + { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, + { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, + }; + gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); + } + return; case 0x11: if (!u) { /* CMTST */ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); @@ -11845,16 +11868,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genenvfn = fns[size][u]; break; } - case 0x16: /* SQDMULH, SQRDMULH */ - { - static NeonGenTwoOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, - { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, - }; - assert(size == 1 || size == 2); - genenvfn = fns[size - 1][u]; - break; - } default: g_assert_not_reached(); } diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 9e93a8a154..a6c53d2ab6 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -109,6 +109,30 @@ void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(neon_sqdmulh_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */ static int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3, bool neg, bool round, uint32_t *sat) @@ -172,6 +196,30 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(neon_sqdmulh_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int32_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int32_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + /* Integer 8 and 16-bit dot-product. * * Note that for the loops herein, host endianness does not matter