diff --git a/MAINTAINERS b/MAINTAINERS index 7f323cd2eb..e07746ac7d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -302,6 +302,7 @@ M: Daniel Henrique Barboza R: Cédric Le Goater R: David Gibson R: Greg Kurz +R: Nicholas Piggin L: qemu-ppc@nongnu.org S: Odd Fixes F: target/ppc/ @@ -1451,6 +1452,8 @@ F: tests/avocado/ppc_pseries.py PowerNV (Non-Virtualized) M: Cédric Le Goater +R: Frédéric Barrat +R: Nicholas Piggin L: qemu-ppc@nongnu.org S: Odd Fixes F: docs/system/ppc/powernv.rst @@ -2445,6 +2448,7 @@ T: git https://github.com/philmd/qemu.git fw_cfg-next XIVE M: Cédric Le Goater +R: Frédéric Barrat L: qemu-ppc@nongnu.org S: Odd Fixes F: hw/*/*xive* diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index 622f9d28b7..e536b3ec26 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -479,6 +479,16 @@ static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, return count; } +static uint32_t pnv_xive_presenter_get_config(XivePresenter *xptr) +{ + uint32_t cfg = 0; + + /* TIMA GEN1 is all P9 knows */ + cfg |= XIVE_PRESENTER_GEN1_TIMA_OS; + + return cfg; +} + static uint8_t pnv_xive_get_block_id(XiveRouter *xrtr) { return pnv_xive_block_id(PNV_XIVE(xrtr)); @@ -1991,6 +2001,7 @@ static void pnv_xive_class_init(ObjectClass *klass, void *data) xnc->notify = pnv_xive_notify; xpc->match_nvt = pnv_xive_match_nvt; + xpc->get_config = pnv_xive_presenter_get_config; }; static const TypeInfo pnv_xive_info = { diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index ec1edeb385..ed438a20ed 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -501,6 +501,17 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, return count; } +static uint32_t pnv_xive2_presenter_get_config(XivePresenter *xptr) +{ + PnvXive2 *xive = PNV_XIVE2(xptr); + uint32_t cfg = 0; + + if (xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS) { + cfg |= XIVE_PRESENTER_GEN1_TIMA_OS; + } + return cfg; +} + static uint8_t pnv_xive2_get_block_id(Xive2Router *xrtr) { return pnv_xive2_block_id(PNV_XIVE2(xrtr)); @@ -1645,17 +1656,6 @@ static const MemoryRegionOps pnv_xive2_ic_tm_indirect_ops = { /* * TIMA ops */ - -/* - * Special TIMA offsets to handle accesses in a POWER10 way. - * - * Only the CAM line updates done by the hypervisor should be handled - * specifically. - */ -#define HV_PAGE_OFFSET (XIVE_TM_HV_PAGE << TM_SHIFT) -#define HV_PUSH_OS_CTX_OFFSET (HV_PAGE_OFFSET | (TM_QW1_OS + TM_WORD2)) -#define HV_PULL_OS_CTX_OFFSET (HV_PAGE_OFFSET | TM_SPC_PULL_OS_CTX) - static void pnv_xive2_tm_write(void *opaque, hwaddr offset, uint64_t value, unsigned size) { @@ -1663,18 +1663,7 @@ static void pnv_xive2_tm_write(void *opaque, hwaddr offset, PnvXive2 *xive = pnv_xive2_tm_get_xive(cpu); XiveTCTX *tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc); XivePresenter *xptr = XIVE_PRESENTER(xive); - bool gen1_tima_os = - xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS; - offset &= TM_ADDRESS_MASK; - - /* TODO: should we switch the TM ops table instead ? */ - if (!gen1_tima_os && offset == HV_PUSH_OS_CTX_OFFSET) { - xive2_tm_push_os_ctx(xptr, tctx, offset, value, size); - return; - } - - /* Other TM ops are the same as XIVE1 */ xive_tctx_tm_write(xptr, tctx, offset, value, size); } @@ -1684,17 +1673,7 @@ static uint64_t pnv_xive2_tm_read(void *opaque, hwaddr offset, unsigned size) PnvXive2 *xive = pnv_xive2_tm_get_xive(cpu); XiveTCTX *tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc); XivePresenter *xptr = XIVE_PRESENTER(xive); - bool gen1_tima_os = - xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS; - offset &= TM_ADDRESS_MASK; - - /* TODO: should we switch the TM ops table instead ? */ - if (!gen1_tima_os && offset == HV_PULL_OS_CTX_OFFSET) { - return xive2_tm_pull_os_ctx(xptr, tctx, offset, size); - } - - /* Other TM ops are the same as XIVE1 */ return xive_tctx_tm_read(xptr, tctx, offset, size); } @@ -1987,6 +1966,7 @@ static void pnv_xive2_class_init(ObjectClass *klass, void *data) xnc->notify = pnv_xive2_notify; xpc->match_nvt = pnv_xive2_match_nvt; + xpc->get_config = pnv_xive2_presenter_get_config; }; static const TypeInfo pnv_xive2_info = { diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index dc641cc604..8bcab2846c 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -475,6 +475,21 @@ static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format, return count; } +static uint32_t spapr_xive_presenter_get_config(XivePresenter *xptr) +{ + uint32_t cfg = 0; + + /* + * Let's claim GEN1 TIMA format. If running with KVM on P10, the + * correct answer is deep in the hardware and not accessible to + * us. But it shouldn't matter as it only affects the presenter + * as seen by a guest OS. + */ + cfg |= XIVE_PRESENTER_GEN1_TIMA_OS; + + return cfg; +} + static uint8_t spapr_xive_get_block_id(XiveRouter *xrtr) { return SPAPR_XIVE_BLOCK_ID; @@ -832,6 +847,7 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data) sicc->post_load = spapr_xive_post_load; xpc->match_nvt = spapr_xive_match_nvt; + xpc->get_config = spapr_xive_presenter_get_config; xpc->in_kernel = spapr_xive_in_kernel_xptr; } diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 5204c14b87..84c079b034 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -20,6 +20,7 @@ #include "monitor/monitor.h" #include "hw/irq.h" #include "hw/ppc/xive.h" +#include "hw/ppc/xive2.h" #include "hw/ppc/xive_regs.h" #include "trace.h" @@ -461,6 +462,13 @@ static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, } } +static uint32_t xive_presenter_get_config(XivePresenter *xptr) +{ + XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); + + return xpc->get_config(xptr); +} + /* * Define a mapping of "special" operations depending on the TIMA page * offset and the size of the operation. @@ -497,14 +505,47 @@ static const XiveTmOp xive_tm_operations[] = { { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, xive_tm_pull_pool_ctx }, }; -static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write) +static const XiveTmOp xive2_tm_operations[] = { + /* + * MMIOs below 2K : raw values and special operations without side + * effects + */ + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive2_tm_push_os_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, xive_tm_vt_poll }, + + /* MMIOs above 2K : special operations with side effects */ + { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, xive_tm_ack_os_reg }, + { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, NULL, xive_tm_ack_hv_reg }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, NULL, xive_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, xive_tm_pull_pool_ctx }, +}; + +static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset, + unsigned size, bool write) { uint8_t page_offset = (offset >> TM_SHIFT) & 0x3; uint32_t op_offset = offset & TM_ADDRESS_MASK; - int i; + const XiveTmOp *tm_ops; + int i, tm_ops_count; + uint32_t cfg; - for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) { - const XiveTmOp *xto = &xive_tm_operations[i]; + cfg = xive_presenter_get_config(xptr); + if (cfg & XIVE_PRESENTER_GEN1_TIMA_OS) { + tm_ops = xive_tm_operations; + tm_ops_count = ARRAY_SIZE(xive_tm_operations); + } else { + tm_ops = xive2_tm_operations; + tm_ops_count = ARRAY_SIZE(xive2_tm_operations); + } + + for (i = 0; i < tm_ops_count; i++) { + const XiveTmOp *xto = &tm_ops[i]; /* Accesses done from a more privileged TIMA page is allowed */ if (xto->page_offset >= page_offset && @@ -535,7 +576,7 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, * First, check for special operations in the 2K region */ if (offset & TM_SPECIAL_OP) { - xto = xive_tm_find_op(offset, size, true); + xto = xive_tm_find_op(tctx->xptr, offset, size, true); if (!xto) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA " "@%"HWADDR_PRIx"\n", offset); @@ -548,7 +589,7 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Then, for special operations in the region below 2K. */ - xto = xive_tm_find_op(offset, size, true); + xto = xive_tm_find_op(tctx->xptr, offset, size, true); if (xto) { xto->write_handler(xptr, tctx, offset, value, size); return; @@ -574,7 +615,7 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, * First, check for special operations in the 2K region */ if (offset & TM_SPECIAL_OP) { - xto = xive_tm_find_op(offset, size, false); + xto = xive_tm_find_op(tctx->xptr, offset, size, false); if (!xto) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA" "@%"HWADDR_PRIx"\n", offset); @@ -587,7 +628,7 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Then, for special operations in the region below 2K. */ - xto = xive_tm_find_op(offset, size, false); + xto = xive_tm_find_op(tctx->xptr, offset, size, false); if (xto) { ret = xto->read_handler(xptr, tctx, offset, size); goto out; diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 542f9e2932..6232cbeee1 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -133,13 +133,13 @@ static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off, PCIDevice *pdev; if (size != 4) { - phb_error(phb, "rc_config_write invalid size %d\n", size); + phb_error(phb, "rc_config_write invalid size %d", size); return; } pdev = pci_find_device(pci->bus, 0, 0); if (!pdev) { - phb_error(phb, "rc_config_write device not found\n"); + phb_error(phb, "rc_config_write device not found"); return; } @@ -155,13 +155,13 @@ static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off, uint64_t val; if (size != 4) { - phb_error(phb, "rc_config_read invalid size %d\n", size); + phb_error(phb, "rc_config_read invalid size %d", size); return ~0ull; } pdev = pci_find_device(pci->bus, 0, 0); if (!pdev) { - phb_error(phb, "rc_config_read device not found\n"); + phb_error(phb, "rc_config_read device not found"); return ~0ull; } @@ -1039,19 +1039,19 @@ static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr, if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & (PEC_NEST_STK_BAR_EN_MMIO0 | PEC_NEST_STK_BAR_EN_MMIO1)) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + phb_pec_error(pec, "Changing enabled BAR unsupported"); } phb->nest_regs[reg] = val & 0xffffffffff000000ull; break; case PEC_NEST_STK_PHB_REGS_BAR: if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + phb_pec_error(pec, "Changing enabled BAR unsupported"); } phb->nest_regs[reg] = val & 0xffffffffffc00000ull; break; case PEC_NEST_STK_INT_BAR: if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + phb_pec_error(pec, "Changing enabled BAR unsupported"); } phb->nest_regs[reg] = val & 0xfffffff000000000ull; break; diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build index c927337da0..a313d4b964 100644 --- a/hw/ppc/meson.build +++ b/hw/ppc/meson.build @@ -15,6 +15,7 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files( 'spapr_vio.c', 'spapr_events.c', 'spapr_hcall.c', + 'spapr_nested.c', 'spapr_iommu.c', 'spapr_rtas.c', 'spapr_pci.c', diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 590fc64b32..fc083173f3 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -799,7 +799,8 @@ static void pnv_init(MachineState *machine) DeviceState *dev; if (kvm_enabled()) { - error_report("The powernv machine does not work with KVM acceleration"); + error_report("machine %s does not support the KVM accelerator", + mc->name); exit(EXIT_FAILURE); } diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 1b1220c423..82e4408c5c 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -1436,6 +1436,12 @@ int ppc_cpu_pir(PowerPCCPU *cpu) return env->spr_cb[SPR_PIR].default_value; } +int ppc_cpu_tir(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + return env->spr_cb[SPR_TIR].default_value; +} + PowerPCCPU *ppc_get_vcpu_by_pir(int pir) { CPUState *cs; diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c index f969fa3c29..f061b8cf3b 100644 --- a/hw/ppc/ppc440_bamboo.c +++ b/hw/ppc/ppc440_bamboo.c @@ -19,7 +19,6 @@ #include "hw/pci/pci.h" #include "hw/boards.h" #include "sysemu/kvm.h" -#include "kvm_ppc.h" #include "sysemu/device_tree.h" #include "hw/loader.h" #include "elf.h" @@ -97,16 +96,6 @@ static int bamboo_load_device_tree(MachineState *machine, fprintf(stderr, "couldn't set /chosen/bootargs\n"); } - /* - * Copy data from the host device tree into the guest. Since the guest can - * directly access the timebase without host involvement, we must expose - * the correct frequencies. - */ - if (kvm_enabled()) { - tb_freq = kvmppc_get_tbfreq(); - clock_freq = kvmppc_get_clockfreq(); - } - qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency", clock_freq); qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency", @@ -175,6 +164,12 @@ static void bamboo_init(MachineState *machine) int success; int i; + if (kvm_enabled()) { + error_report("machine %s does not support the KVM accelerator", + MACHINE_GET_CLASS(machine)->name); + exit(EXIT_FAILURE); + } + cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); env = &cpu->env; diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 33bf232f8b..d9231c7317 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -45,7 +45,6 @@ #include "trace.h" #include "elf.h" #include "qemu/units.h" -#include "kvm_ppc.h" /* SMP is not enabled, for now */ #define MAX_CPUS 1 @@ -245,6 +244,12 @@ static void ibm_40p_init(MachineState *machine) long kernel_size = 0, initrd_size = 0; char boot_device; + if (kvm_enabled()) { + error_report("machine %s does not support the KVM accelerator", + MACHINE_GET_CLASS(machine)->name); + exit(EXIT_FAILURE); + } + /* init CPU */ cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); env = &cpu->env; @@ -392,18 +397,7 @@ static void ibm_40p_init(MachineState *machine) fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height); fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth); - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled()); - if (kvm_enabled()) { - uint8_t *hypercall; - - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq()); - hypercall = g_malloc(16); - kvmppc_get_hypercall(env, hypercall, 16); - fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16); - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid()); - } else { - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND); - } + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND); fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, boot_device); qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index dcb7f1c70a..54dbfd7fe9 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -61,6 +61,7 @@ #include "hw/ppc/fdt.h" #include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_nested.h" #include "hw/ppc/spapr_vio.h" #include "hw/ppc/vof.h" #include "hw/qdev-properties.h" @@ -2524,10 +2525,19 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp) int ret; unsigned int smp_threads = ms->smp.threads; - if (!kvm_enabled() && (smp_threads > 1)) { - error_setg(errp, "TCG cannot support more than 1 thread/core " - "on a pseries machine"); - return; + if (tcg_enabled()) { + if (smp_threads > 1 && + !ppc_type_check_compat(ms->cpu_type, CPU_POWERPC_LOGICAL_2_07, 0, + spapr->max_compat_pvr)) { + error_setg(errp, "TCG only supports SMT on POWER8 or newer CPUs"); + return; + } + + if (smp_threads > 8) { + error_setg(errp, "TCG cannot support more than 8 threads/core " + "on a pseries machine"); + return; + } } if (!is_power_of_2(smp_threads)) { error_setg(errp, "Cannot support %d threads/core on a pseries " diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 3fd45a6dec..5a0755d34f 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -473,6 +473,20 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr, error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n"); } + } else if (tcg_enabled()) { + MachineState *ms = MACHINE(spapr); + unsigned int smp_threads = ms->smp.threads; + + /* + * Nested-HV vCPU env state to L2, so SMT-shared SPR updates, for + * example, do not necessarily update the correct SPR value on sibling + * threads that are in a different guest/host context. + */ + if (smp_threads > 1) { + error_setg(errp, "TCG does not support nested-HV with SMT"); + error_append_hint(errp, "Try appending -machine cap-nested-hv=off " + "or use threads=1 with -smp\n"); + } } } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 9b88dd549a..a4e3c2fadd 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -255,7 +255,7 @@ static void spapr_cpu_core_unrealize(DeviceState *dev) } static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, - SpaprCpuCore *sc, Error **errp) + SpaprCpuCore *sc, int thread_index, Error **errp) { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); @@ -267,6 +267,9 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); + env->spr_cb[SPR_PIR].default_value = cs->cpu_index; + env->spr_cb[SPR_TIR].default_value = thread_index; + /* Set time-base frequency to 512 MHz. vhyp must be set first. */ cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); @@ -337,7 +340,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) for (i = 0; i < cc->nr_threads; i++) { sc->threads[i] = spapr_create_vcpu(sc, i, errp); if (!sc->threads[i] || - !spapr_realize_vcpu(sc->threads[i], spapr, sc, errp)) { + !spapr_realize_vcpu(sc->threads[i], spapr, sc, i, errp)) { spapr_cpu_core_unrealize(dev); return; } diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index b904755575..002ea0b7c1 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -13,6 +13,7 @@ #include "hw/ppc/ppc.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/spapr_nested.h" #include "mmu-hash64.h" #include "cpu-models.h" #include "trace.h" @@ -1498,349 +1499,17 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, } #ifdef CONFIG_TCG -#define PRTS_MASK 0x1f - -static target_ulong h_set_ptbl(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - target_ulong ptcr = args[0]; - - if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) { - return H_FUNCTION; - } - - if ((ptcr & PRTS_MASK) + 12 - 4 > 12) { - return H_PARAMETER; - } - - spapr->nested_ptcr = ptcr; /* Save new partition table */ - - return H_SUCCESS; -} - -static target_ulong h_tlb_invalidate(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - /* - * The spapr virtual hypervisor nested HV implementation retains no L2 - * translation state except for TLB. And the TLB is always invalidated - * across L1<->L2 transitions, so nothing is required here. - */ - - return H_SUCCESS; -} - -static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - /* - * This HCALL is not required, L1 KVM will take a slow path and walk the - * page tables manually to do the data copy. - */ - return H_FUNCTION; -} - -/* - * When this handler returns, the environment is switched to the L2 guest - * and TCG begins running that. spapr_exit_nested() performs the switch from - * L2 back to L1 and returns from the H_ENTER_NESTED hcall. - */ -static target_ulong h_enter_nested(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); - CPUState *cs = CPU(cpu); - CPUPPCState *env = &cpu->env; - SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); - target_ulong hv_ptr = args[0]; - target_ulong regs_ptr = args[1]; - target_ulong hdec, now = cpu_ppc_load_tbl(env); - target_ulong lpcr, lpcr_mask; - struct kvmppc_hv_guest_state *hvstate; - struct kvmppc_hv_guest_state hv_state; - struct kvmppc_pt_regs *regs; - hwaddr len; - - if (spapr->nested_ptcr == 0) { - return H_NOT_AVAILABLE; - } - - len = sizeof(*hvstate); - hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false, - MEMTXATTRS_UNSPECIFIED); - if (len != sizeof(*hvstate)) { - address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false); - return H_PARAMETER; - } - - memcpy(&hv_state, hvstate, len); - - address_space_unmap(CPU(cpu)->as, hvstate, len, len, false); - - /* - * We accept versions 1 and 2. Version 2 fields are unused because TCG - * does not implement DAWR*. - */ - if (hv_state.version > HV_GUEST_STATE_VERSION) { - return H_PARAMETER; - } - - spapr_cpu->nested_host_state = g_try_new(CPUPPCState, 1); - if (!spapr_cpu->nested_host_state) { - return H_NO_MEM; - } - - memcpy(spapr_cpu->nested_host_state, env, sizeof(CPUPPCState)); - - len = sizeof(*regs); - regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false, - MEMTXATTRS_UNSPECIFIED); - if (!regs || len != sizeof(*regs)) { - address_space_unmap(CPU(cpu)->as, regs, len, 0, false); - g_free(spapr_cpu->nested_host_state); - return H_P2; - } - - len = sizeof(env->gpr); - assert(len == sizeof(regs->gpr)); - memcpy(env->gpr, regs->gpr, len); - - env->lr = regs->link; - env->ctr = regs->ctr; - cpu_write_xer(env, regs->xer); - ppc_set_cr(env, regs->ccr); - - env->msr = regs->msr; - env->nip = regs->nip; - - address_space_unmap(CPU(cpu)->as, regs, len, len, false); - - env->cfar = hv_state.cfar; - - assert(env->spr[SPR_LPIDR] == 0); - env->spr[SPR_LPIDR] = hv_state.lpid; - - lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER; - lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask); - lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE; - lpcr &= ~LPCR_LPES0; - env->spr[SPR_LPCR] = lpcr & pcc->lpcr_mask; - - env->spr[SPR_PCR] = hv_state.pcr; - /* hv_state.amor is not used */ - env->spr[SPR_DPDES] = hv_state.dpdes; - env->spr[SPR_HFSCR] = hv_state.hfscr; - hdec = hv_state.hdec_expiry - now; - spapr_cpu->nested_tb_offset = hv_state.tb_offset; - /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/ - env->spr[SPR_SRR0] = hv_state.srr0; - env->spr[SPR_SRR1] = hv_state.srr1; - env->spr[SPR_SPRG0] = hv_state.sprg[0]; - env->spr[SPR_SPRG1] = hv_state.sprg[1]; - env->spr[SPR_SPRG2] = hv_state.sprg[2]; - env->spr[SPR_SPRG3] = hv_state.sprg[3]; - env->spr[SPR_BOOKS_PID] = hv_state.pidr; - env->spr[SPR_PPR] = hv_state.ppr; - - cpu_ppc_hdecr_init(env); - cpu_ppc_store_hdecr(env, hdec); - - /* - * The hv_state.vcpu_token is not needed. It is used by the KVM - * implementation to remember which L2 vCPU last ran on which physical - * CPU so as to invalidate process scope translations if it is moved - * between physical CPUs. For now TLBs are always flushed on L1<->L2 - * transitions so this is not a problem. - * - * Could validate that the same vcpu_token does not attempt to run on - * different L1 vCPUs at the same time, but that would be a L1 KVM bug - * and it's not obviously worth a new data structure to do it. - */ - - env->tb_env->tb_offset += spapr_cpu->nested_tb_offset; - spapr_cpu->in_nested = true; - - hreg_compute_hflags(env); - ppc_maybe_interrupt(env); - tlb_flush(cs); - env->reserve_addr = -1; /* Reset the reservation */ - - /* - * The spapr hcall helper sets env->gpr[3] to the return value, but at - * this point the L1 is not returning from the hcall but rather we - * start running the L2, so r3 must not be clobbered, so return env->gpr[3] - * to leave it unchanged. - */ - return env->gpr[3]; -} - -void spapr_exit_nested(PowerPCCPU *cpu, int excp) -{ - CPUState *cs = CPU(cpu); - CPUPPCState *env = &cpu->env; - SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); - target_ulong r3_return = env->excp_vectors[excp]; /* hcall return value */ - target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4]; - target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5]; - struct kvmppc_hv_guest_state *hvstate; - struct kvmppc_pt_regs *regs; - hwaddr len; - - assert(spapr_cpu->in_nested); - - cpu_ppc_hdecr_exit(env); - - len = sizeof(*hvstate); - hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true, - MEMTXATTRS_UNSPECIFIED); - if (len != sizeof(*hvstate)) { - address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true); - r3_return = H_PARAMETER; - goto out_restore_l1; - } - - hvstate->cfar = env->cfar; - hvstate->lpcr = env->spr[SPR_LPCR]; - hvstate->pcr = env->spr[SPR_PCR]; - hvstate->dpdes = env->spr[SPR_DPDES]; - hvstate->hfscr = env->spr[SPR_HFSCR]; - - if (excp == POWERPC_EXCP_HDSI) { - hvstate->hdar = env->spr[SPR_HDAR]; - hvstate->hdsisr = env->spr[SPR_HDSISR]; - hvstate->asdr = env->spr[SPR_ASDR]; - } else if (excp == POWERPC_EXCP_HISI) { - hvstate->asdr = env->spr[SPR_ASDR]; - } - - /* HEIR should be implemented for HV mode and saved here. */ - hvstate->srr0 = env->spr[SPR_SRR0]; - hvstate->srr1 = env->spr[SPR_SRR1]; - hvstate->sprg[0] = env->spr[SPR_SPRG0]; - hvstate->sprg[1] = env->spr[SPR_SPRG1]; - hvstate->sprg[2] = env->spr[SPR_SPRG2]; - hvstate->sprg[3] = env->spr[SPR_SPRG3]; - hvstate->pidr = env->spr[SPR_BOOKS_PID]; - hvstate->ppr = env->spr[SPR_PPR]; - - /* Is it okay to specify write length larger than actual data written? */ - address_space_unmap(CPU(cpu)->as, hvstate, len, len, true); - - len = sizeof(*regs); - regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true, - MEMTXATTRS_UNSPECIFIED); - if (!regs || len != sizeof(*regs)) { - address_space_unmap(CPU(cpu)->as, regs, len, 0, true); - r3_return = H_P2; - goto out_restore_l1; - } - - len = sizeof(env->gpr); - assert(len == sizeof(regs->gpr)); - memcpy(regs->gpr, env->gpr, len); - - regs->link = env->lr; - regs->ctr = env->ctr; - regs->xer = cpu_read_xer(env); - regs->ccr = ppc_get_cr(env); - - if (excp == POWERPC_EXCP_MCHECK || - excp == POWERPC_EXCP_RESET || - excp == POWERPC_EXCP_SYSCALL) { - regs->nip = env->spr[SPR_SRR0]; - regs->msr = env->spr[SPR_SRR1] & env->msr_mask; - } else { - regs->nip = env->spr[SPR_HSRR0]; - regs->msr = env->spr[SPR_HSRR1] & env->msr_mask; - } - - /* Is it okay to specify write length larger than actual data written? */ - address_space_unmap(CPU(cpu)->as, regs, len, len, true); - -out_restore_l1: - memcpy(env->gpr, spapr_cpu->nested_host_state->gpr, sizeof(env->gpr)); - env->lr = spapr_cpu->nested_host_state->lr; - env->ctr = spapr_cpu->nested_host_state->ctr; - memcpy(env->crf, spapr_cpu->nested_host_state->crf, sizeof(env->crf)); - env->cfar = spapr_cpu->nested_host_state->cfar; - env->xer = spapr_cpu->nested_host_state->xer; - env->so = spapr_cpu->nested_host_state->so; - env->ov = spapr_cpu->nested_host_state->ov; - env->ov32 = spapr_cpu->nested_host_state->ov32; - env->ca32 = spapr_cpu->nested_host_state->ca32; - env->msr = spapr_cpu->nested_host_state->msr; - env->nip = spapr_cpu->nested_host_state->nip; - - assert(env->spr[SPR_LPIDR] != 0); - env->spr[SPR_LPCR] = spapr_cpu->nested_host_state->spr[SPR_LPCR]; - env->spr[SPR_LPIDR] = spapr_cpu->nested_host_state->spr[SPR_LPIDR]; - env->spr[SPR_PCR] = spapr_cpu->nested_host_state->spr[SPR_PCR]; - env->spr[SPR_DPDES] = 0; - env->spr[SPR_HFSCR] = spapr_cpu->nested_host_state->spr[SPR_HFSCR]; - env->spr[SPR_SRR0] = spapr_cpu->nested_host_state->spr[SPR_SRR0]; - env->spr[SPR_SRR1] = spapr_cpu->nested_host_state->spr[SPR_SRR1]; - env->spr[SPR_SPRG0] = spapr_cpu->nested_host_state->spr[SPR_SPRG0]; - env->spr[SPR_SPRG1] = spapr_cpu->nested_host_state->spr[SPR_SPRG1]; - env->spr[SPR_SPRG2] = spapr_cpu->nested_host_state->spr[SPR_SPRG2]; - env->spr[SPR_SPRG3] = spapr_cpu->nested_host_state->spr[SPR_SPRG3]; - env->spr[SPR_BOOKS_PID] = spapr_cpu->nested_host_state->spr[SPR_BOOKS_PID]; - env->spr[SPR_PPR] = spapr_cpu->nested_host_state->spr[SPR_PPR]; - - /* - * Return the interrupt vector address from H_ENTER_NESTED to the L1 - * (or error code). - */ - env->gpr[3] = r3_return; - - env->tb_env->tb_offset -= spapr_cpu->nested_tb_offset; - spapr_cpu->in_nested = false; - - hreg_compute_hflags(env); - ppc_maybe_interrupt(env); - tlb_flush(cs); - env->reserve_addr = -1; /* Reset the reservation */ - - g_free(spapr_cpu->nested_host_state); - spapr_cpu->nested_host_state = NULL; -} - -static void hypercall_register_nested(void) -{ - spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); - spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested); - spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate); - spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest); -} - static void hypercall_register_softmmu(void) { /* DO NOTHING */ } #else -void spapr_exit_nested(PowerPCCPU *cpu, int excp) -{ - g_assert_not_reached(); -} - static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, target_ulong *args) { g_assert_not_reached(); } -static void hypercall_register_nested(void) -{ - /* DO NOTHING */ -} - static void hypercall_register_softmmu(void) { /* hcall-pft */ @@ -1910,7 +1579,7 @@ static void hypercall_register_types(void) spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt); - hypercall_register_nested(); + spapr_register_nested(); } type_init(hypercall_register_types) diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c new file mode 100644 index 0000000000..121aa96ddc --- /dev/null +++ b/hw/ppc/spapr_nested.c @@ -0,0 +1,395 @@ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "exec/exec-all.h" +#include "helper_regs.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/spapr_nested.h" + +#ifdef CONFIG_TCG +#define PRTS_MASK 0x1f + +static target_ulong h_set_ptbl(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong ptcr = args[0]; + + if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) { + return H_FUNCTION; + } + + if ((ptcr & PRTS_MASK) + 12 - 4 > 12) { + return H_PARAMETER; + } + + spapr->nested_ptcr = ptcr; /* Save new partition table */ + + return H_SUCCESS; +} + +static target_ulong h_tlb_invalidate(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* + * The spapr virtual hypervisor nested HV implementation retains no L2 + * translation state except for TLB. And the TLB is always invalidated + * across L1<->L2 transitions, so nothing is required here. + */ + + return H_SUCCESS; +} + +static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* + * This HCALL is not required, L1 KVM will take a slow path and walk the + * page tables manually to do the data copy. + */ + return H_FUNCTION; +} + +static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + memcpy(save->gpr, env->gpr, sizeof(save->gpr)); + + save->lr = env->lr; + save->ctr = env->ctr; + save->cfar = env->cfar; + save->msr = env->msr; + save->nip = env->nip; + + save->cr = ppc_get_cr(env); + save->xer = cpu_read_xer(env); + + save->lpcr = env->spr[SPR_LPCR]; + save->lpidr = env->spr[SPR_LPIDR]; + save->pcr = env->spr[SPR_PCR]; + save->dpdes = env->spr[SPR_DPDES]; + save->hfscr = env->spr[SPR_HFSCR]; + save->srr0 = env->spr[SPR_SRR0]; + save->srr1 = env->spr[SPR_SRR1]; + save->sprg0 = env->spr[SPR_SPRG0]; + save->sprg1 = env->spr[SPR_SPRG1]; + save->sprg2 = env->spr[SPR_SPRG2]; + save->sprg3 = env->spr[SPR_SPRG3]; + save->pidr = env->spr[SPR_BOOKS_PID]; + save->ppr = env->spr[SPR_PPR]; + + save->tb_offset = env->tb_env->tb_offset; +} + +static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + + memcpy(env->gpr, load->gpr, sizeof(env->gpr)); + + env->lr = load->lr; + env->ctr = load->ctr; + env->cfar = load->cfar; + env->msr = load->msr; + env->nip = load->nip; + + ppc_set_cr(env, load->cr); + cpu_write_xer(env, load->xer); + + env->spr[SPR_LPCR] = load->lpcr; + env->spr[SPR_LPIDR] = load->lpidr; + env->spr[SPR_PCR] = load->pcr; + env->spr[SPR_DPDES] = load->dpdes; + env->spr[SPR_HFSCR] = load->hfscr; + env->spr[SPR_SRR0] = load->srr0; + env->spr[SPR_SRR1] = load->srr1; + env->spr[SPR_SPRG0] = load->sprg0; + env->spr[SPR_SPRG1] = load->sprg1; + env->spr[SPR_SPRG2] = load->sprg2; + env->spr[SPR_SPRG3] = load->sprg3; + env->spr[SPR_BOOKS_PID] = load->pidr; + env->spr[SPR_PPR] = load->ppr; + + env->tb_env->tb_offset = load->tb_offset; + + /* + * MSR updated, compute hflags and possible interrupts. + */ + hreg_compute_hflags(env); + ppc_maybe_interrupt(env); + + /* + * Nested HV does not tag TLB entries between L1 and L2, so must + * flush on transition. + */ + tlb_flush(cs); + env->reserve_addr = -1; /* Reset the reservation */ +} + +/* + * When this handler returns, the environment is switched to the L2 guest + * and TCG begins running that. spapr_exit_nested() performs the switch from + * L2 back to L1 and returns from the H_ENTER_NESTED hcall. + */ +static target_ulong h_enter_nested(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + CPUPPCState *env = &cpu->env; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + struct nested_ppc_state l2_state; + target_ulong hv_ptr = args[0]; + target_ulong regs_ptr = args[1]; + target_ulong hdec, now = cpu_ppc_load_tbl(env); + target_ulong lpcr, lpcr_mask; + struct kvmppc_hv_guest_state *hvstate; + struct kvmppc_hv_guest_state hv_state; + struct kvmppc_pt_regs *regs; + hwaddr len; + + if (spapr->nested_ptcr == 0) { + return H_NOT_AVAILABLE; + } + + len = sizeof(*hvstate); + hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false, + MEMTXATTRS_UNSPECIFIED); + if (len != sizeof(*hvstate)) { + address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false); + return H_PARAMETER; + } + + memcpy(&hv_state, hvstate, len); + + address_space_unmap(CPU(cpu)->as, hvstate, len, len, false); + + /* + * We accept versions 1 and 2. Version 2 fields are unused because TCG + * does not implement DAWR*. + */ + if (hv_state.version > HV_GUEST_STATE_VERSION) { + return H_PARAMETER; + } + + if (hv_state.lpid == 0) { + return H_PARAMETER; + } + + spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1); + if (!spapr_cpu->nested_host_state) { + return H_NO_MEM; + } + + assert(env->spr[SPR_LPIDR] == 0); + assert(env->spr[SPR_DPDES] == 0); + nested_save_state(spapr_cpu->nested_host_state, cpu); + + len = sizeof(*regs); + regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false, + MEMTXATTRS_UNSPECIFIED); + if (!regs || len != sizeof(*regs)) { + address_space_unmap(CPU(cpu)->as, regs, len, 0, false); + g_free(spapr_cpu->nested_host_state); + return H_P2; + } + + len = sizeof(l2_state.gpr); + assert(len == sizeof(regs->gpr)); + memcpy(l2_state.gpr, regs->gpr, len); + + l2_state.lr = regs->link; + l2_state.ctr = regs->ctr; + l2_state.xer = regs->xer; + l2_state.cr = regs->ccr; + l2_state.msr = regs->msr; + l2_state.nip = regs->nip; + + address_space_unmap(CPU(cpu)->as, regs, len, len, false); + + l2_state.cfar = hv_state.cfar; + l2_state.lpidr = hv_state.lpid; + + lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER; + lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask); + lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE; + lpcr &= ~LPCR_LPES0; + l2_state.lpcr = lpcr & pcc->lpcr_mask; + + l2_state.pcr = hv_state.pcr; + /* hv_state.amor is not used */ + l2_state.dpdes = hv_state.dpdes; + l2_state.hfscr = hv_state.hfscr; + /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/ + l2_state.srr0 = hv_state.srr0; + l2_state.srr1 = hv_state.srr1; + l2_state.sprg0 = hv_state.sprg[0]; + l2_state.sprg1 = hv_state.sprg[1]; + l2_state.sprg2 = hv_state.sprg[2]; + l2_state.sprg3 = hv_state.sprg[3]; + l2_state.pidr = hv_state.pidr; + l2_state.ppr = hv_state.ppr; + l2_state.tb_offset = env->tb_env->tb_offset + hv_state.tb_offset; + + /* + * Switch to the nested guest environment and start the "hdec" timer. + */ + nested_load_state(cpu, &l2_state); + + hdec = hv_state.hdec_expiry - now; + cpu_ppc_hdecr_init(env); + cpu_ppc_store_hdecr(env, hdec); + + /* + * The hv_state.vcpu_token is not needed. It is used by the KVM + * implementation to remember which L2 vCPU last ran on which physical + * CPU so as to invalidate process scope translations if it is moved + * between physical CPUs. For now TLBs are always flushed on L1<->L2 + * transitions so this is not a problem. + * + * Could validate that the same vcpu_token does not attempt to run on + * different L1 vCPUs at the same time, but that would be a L1 KVM bug + * and it's not obviously worth a new data structure to do it. + */ + + spapr_cpu->in_nested = true; + + /* + * The spapr hcall helper sets env->gpr[3] to the return value, but at + * this point the L1 is not returning from the hcall but rather we + * start running the L2, so r3 must not be clobbered, so return env->gpr[3] + * to leave it unchanged. + */ + return env->gpr[3]; +} + +void spapr_exit_nested(PowerPCCPU *cpu, int excp) +{ + CPUPPCState *env = &cpu->env; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + struct nested_ppc_state l2_state; + target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4]; + target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5]; + target_ulong hsrr0, hsrr1, hdar, asdr, hdsisr; + struct kvmppc_hv_guest_state *hvstate; + struct kvmppc_pt_regs *regs; + hwaddr len; + + assert(spapr_cpu->in_nested); + + nested_save_state(&l2_state, cpu); + hsrr0 = env->spr[SPR_HSRR0]; + hsrr1 = env->spr[SPR_HSRR1]; + hdar = env->spr[SPR_HDAR]; + hdsisr = env->spr[SPR_HDSISR]; + asdr = env->spr[SPR_ASDR]; + + /* + * Switch back to the host environment (including for any error). + */ + assert(env->spr[SPR_LPIDR] != 0); + nested_load_state(cpu, spapr_cpu->nested_host_state); + env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */ + + cpu_ppc_hdecr_exit(env); + + spapr_cpu->in_nested = false; + + g_free(spapr_cpu->nested_host_state); + spapr_cpu->nested_host_state = NULL; + + len = sizeof(*hvstate); + hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true, + MEMTXATTRS_UNSPECIFIED); + if (len != sizeof(*hvstate)) { + address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true); + env->gpr[3] = H_PARAMETER; + return; + } + + hvstate->cfar = l2_state.cfar; + hvstate->lpcr = l2_state.lpcr; + hvstate->pcr = l2_state.pcr; + hvstate->dpdes = l2_state.dpdes; + hvstate->hfscr = l2_state.hfscr; + + if (excp == POWERPC_EXCP_HDSI) { + hvstate->hdar = hdar; + hvstate->hdsisr = hdsisr; + hvstate->asdr = asdr; + } else if (excp == POWERPC_EXCP_HISI) { + hvstate->asdr = asdr; + } + + /* HEIR should be implemented for HV mode and saved here. */ + hvstate->srr0 = l2_state.srr0; + hvstate->srr1 = l2_state.srr1; + hvstate->sprg[0] = l2_state.sprg0; + hvstate->sprg[1] = l2_state.sprg1; + hvstate->sprg[2] = l2_state.sprg2; + hvstate->sprg[3] = l2_state.sprg3; + hvstate->pidr = l2_state.pidr; + hvstate->ppr = l2_state.ppr; + + /* Is it okay to specify write length larger than actual data written? */ + address_space_unmap(CPU(cpu)->as, hvstate, len, len, true); + + len = sizeof(*regs); + regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true, + MEMTXATTRS_UNSPECIFIED); + if (!regs || len != sizeof(*regs)) { + address_space_unmap(CPU(cpu)->as, regs, len, 0, true); + env->gpr[3] = H_P2; + return; + } + + len = sizeof(env->gpr); + assert(len == sizeof(regs->gpr)); + memcpy(regs->gpr, l2_state.gpr, len); + + regs->link = l2_state.lr; + regs->ctr = l2_state.ctr; + regs->xer = l2_state.xer; + regs->ccr = l2_state.cr; + + if (excp == POWERPC_EXCP_MCHECK || + excp == POWERPC_EXCP_RESET || + excp == POWERPC_EXCP_SYSCALL) { + regs->nip = l2_state.srr0; + regs->msr = l2_state.srr1 & env->msr_mask; + } else { + regs->nip = hsrr0; + regs->msr = hsrr1 & env->msr_mask; + } + + /* Is it okay to specify write length larger than actual data written? */ + address_space_unmap(CPU(cpu)->as, regs, len, len, true); +} + +void spapr_register_nested(void) +{ + spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); + spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested); + spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate); + spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest); +} +#else +void spapr_exit_nested(PowerPCCPU *cpu, int excp) +{ + g_assert_not_reached(); +} + +void spapr_register_nested(void) +{ + /* DO NOTHING */ +} +#endif diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h index 02af03ada2..e095c002dc 100644 --- a/include/hw/ppc/ppc.h +++ b/include/hw/ppc/ppc.h @@ -6,6 +6,7 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level); PowerPCCPU *ppc_get_vcpu_by_pir(int pir); int ppc_cpu_pir(PowerPCCPU *cpu); +int ppc_cpu_tir(PowerPCCPU *cpu); /* PowerPC hardware exceptions management helpers */ typedef void (*clk_setup_cb)(void *opaque, uint32_t freq); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index bd5a6c4780..538b2dfb89 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -621,66 +621,6 @@ struct SpaprMachineState { #define SVM_H_TPM_COMM 0xEF10 #define SVM_HCALL_MAX SVM_H_TPM_COMM -/* - * Register state for entering a nested guest with H_ENTER_NESTED. - * New member must be added at the end. - */ -struct kvmppc_hv_guest_state { - uint64_t version; /* version of this structure layout, must be first */ - uint32_t lpid; - uint32_t vcpu_token; - /* These registers are hypervisor privileged (at least for writing) */ - uint64_t lpcr; - uint64_t pcr; - uint64_t amor; - uint64_t dpdes; - uint64_t hfscr; - int64_t tb_offset; - uint64_t dawr0; - uint64_t dawrx0; - uint64_t ciabr; - uint64_t hdec_expiry; - uint64_t purr; - uint64_t spurr; - uint64_t ic; - uint64_t vtb; - uint64_t hdar; - uint64_t hdsisr; - uint64_t heir; - uint64_t asdr; - /* These are OS privileged but need to be set late in guest entry */ - uint64_t srr0; - uint64_t srr1; - uint64_t sprg[4]; - uint64_t pidr; - uint64_t cfar; - uint64_t ppr; - /* Version 1 ends here */ - uint64_t dawr1; - uint64_t dawrx1; - /* Version 2 ends here */ -}; - -/* Latest version of hv_guest_state structure */ -#define HV_GUEST_STATE_VERSION 2 - -/* Linux 64-bit powerpc pt_regs struct, used by nested HV */ -struct kvmppc_pt_regs { - uint64_t gpr[32]; - uint64_t nip; - uint64_t msr; - uint64_t orig_gpr3; /* Used for restarting system calls */ - uint64_t ctr; - uint64_t link; - uint64_t xer; - uint64_t ccr; - uint64_t softe; /* Soft enabled/disabled */ - uint64_t trap; /* Reason for being here */ - uint64_t dar; /* Fault registers */ - uint64_t dsisr; /* on 4xx/Book-E used for ESR */ - uint64_t result; /* Result of a system call */ -}; - typedef struct SpaprDeviceTreeUpdateHeader { uint32_t version_id; } SpaprDeviceTreeUpdateHeader; @@ -698,8 +638,6 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn); target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args); -void spapr_exit_nested(PowerPCCPU *cpu, int excp); - target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong shift); target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu, SpaprMachineState *spapr, diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h index b560514560..69a52e39b8 100644 --- a/include/hw/ppc/spapr_cpu_core.h +++ b/include/hw/ppc/spapr_cpu_core.h @@ -41,6 +41,8 @@ void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, target_ulong r1, target_ulong r3, target_ulong r4); +struct nested_ppc_state; + typedef struct SpaprCpuState { uint64_t vpa_addr; uint64_t slb_shadow_addr, slb_shadow_size; @@ -51,8 +53,7 @@ typedef struct SpaprCpuState { /* Fields for nested-HV support */ bool in_nested; /* true while the L2 is executing */ - CPUPPCState *nested_host_state; /* holds the L1 state while L2 executes */ - int64_t nested_tb_offset; /* L1->L2 TB offset */ + struct nested_ppc_state *nested_host_state; /* holds the L1 state while L2 executes */ } SpaprCpuState; static inline SpaprCpuState *spapr_cpu_state(PowerPCCPU *cpu) diff --git a/include/hw/ppc/spapr_nested.h b/include/hw/ppc/spapr_nested.h new file mode 100644 index 0000000000..d383486476 --- /dev/null +++ b/include/hw/ppc/spapr_nested.h @@ -0,0 +1,102 @@ +#ifndef HW_SPAPR_NESTED_H +#define HW_SPAPR_NESTED_H + +#include "qemu/osdep.h" +#include "target/ppc/cpu.h" + +/* + * Register state for entering a nested guest with H_ENTER_NESTED. + * New member must be added at the end. + */ +struct kvmppc_hv_guest_state { + uint64_t version; /* version of this structure layout, must be first */ + uint32_t lpid; + uint32_t vcpu_token; + /* These registers are hypervisor privileged (at least for writing) */ + uint64_t lpcr; + uint64_t pcr; + uint64_t amor; + uint64_t dpdes; + uint64_t hfscr; + int64_t tb_offset; + uint64_t dawr0; + uint64_t dawrx0; + uint64_t ciabr; + uint64_t hdec_expiry; + uint64_t purr; + uint64_t spurr; + uint64_t ic; + uint64_t vtb; + uint64_t hdar; + uint64_t hdsisr; + uint64_t heir; + uint64_t asdr; + /* These are OS privileged but need to be set late in guest entry */ + uint64_t srr0; + uint64_t srr1; + uint64_t sprg[4]; + uint64_t pidr; + uint64_t cfar; + uint64_t ppr; + /* Version 1 ends here */ + uint64_t dawr1; + uint64_t dawrx1; + /* Version 2 ends here */ +}; + +/* Latest version of hv_guest_state structure */ +#define HV_GUEST_STATE_VERSION 2 + +/* Linux 64-bit powerpc pt_regs struct, used by nested HV */ +struct kvmppc_pt_regs { + uint64_t gpr[32]; + uint64_t nip; + uint64_t msr; + uint64_t orig_gpr3; /* Used for restarting system calls */ + uint64_t ctr; + uint64_t link; + uint64_t xer; + uint64_t ccr; + uint64_t softe; /* Soft enabled/disabled */ + uint64_t trap; /* Reason for being here */ + uint64_t dar; /* Fault registers */ + uint64_t dsisr; /* on 4xx/Book-E used for ESR */ + uint64_t result; /* Result of a system call */ +}; + +/* + * nested_ppc_state is used to save the host CPU state before switching it to + * the guest CPU state, to be restored on H_ENTER_NESTED exit. + */ +struct nested_ppc_state { + uint64_t gpr[32]; + uint64_t lr; + uint64_t ctr; + uint64_t cfar; + uint64_t msr; + uint64_t nip; + uint32_t cr; + + uint64_t xer; + + uint64_t lpcr; + uint64_t lpidr; + uint64_t pidr; + uint64_t pcr; + uint64_t dpdes; + uint64_t hfscr; + uint64_t srr0; + uint64_t srr1; + uint64_t sprg0; + uint64_t sprg1; + uint64_t sprg2; + uint64_t sprg3; + uint64_t ppr; + + int64_t tb_offset; +}; + +void spapr_register_nested(void); +void spapr_exit_nested(PowerPCCPU *cpu, int excp); + +#endif /* HW_SPAPR_NESTED_H */ diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index f7eea4ca81..3dfb06e002 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -430,6 +430,8 @@ typedef struct XivePresenterClass XivePresenterClass; DECLARE_CLASS_CHECKERS(XivePresenterClass, XIVE_PRESENTER, TYPE_XIVE_PRESENTER) +#define XIVE_PRESENTER_GEN1_TIMA_OS 0x1 + struct XivePresenterClass { InterfaceClass parent; int (*match_nvt)(XivePresenter *xptr, uint8_t format, @@ -437,6 +439,7 @@ struct XivePresenterClass { bool cam_ignore, uint8_t priority, uint32_t logic_serv, XiveTCTXMatch *match); bool (*in_kernel)(const XivePresenter *xptr); + uint32_t (*get_config)(XivePresenter *xptr); }; int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 0ee2adc105..4138a25801 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -672,6 +672,8 @@ enum { POWERPC_FLAG_TM = 0x00100000, /* Has SCV (ISA 3.00) */ POWERPC_FLAG_SCV = 0x00200000, + /* Has >1 thread per core */ + POWERPC_FLAG_SMT = 0x00400000, }; /* @@ -1268,6 +1270,13 @@ struct CPUArchState { uint64_t pmu_base_time; }; +#define _CORE_ID(cs) \ + (POWERPC_CPU(cs)->env.spr_cb[SPR_PIR].default_value & ~(cs->nr_threads - 1)) + +#define THREAD_SIBLING_FOREACH(cs, cs_sibling) \ + CPU_FOREACH(cs_sibling) \ + if (_CORE_ID(cs) == _CORE_ID(cs_sibling)) + #define SET_FIT_PERIOD(a_, b_, c_, d_) \ do { \ env->fit_period[0] = (a_); \ @@ -1647,6 +1656,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_HMER (0x150) #define SPR_HMEER (0x151) #define SPR_PCR (0x152) +#define SPR_HEIR (0x153) #define SPR_BOOKE_LPIDR (0x152) #define SPR_BOOKE_TCR (0x154) #define SPR_BOOKE_TLB0PS (0x158) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 7bce421a7c..aeff71d063 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -1630,6 +1630,7 @@ static void register_8xx_sprs(CPUPPCState *env) * HSRR0 => SPR 314 (Power 2.04 hypv) * HSRR1 => SPR 315 (Power 2.04 hypv) * LPIDR => SPR 317 (970) + * HEIR => SPR 339 (Power 2.05 hypv) (64-bit reg from 3.1) * EPR => SPR 702 (Power 2.04 emb) * perf => 768-783 (Power 2.04) * perf => 784-799 (Power 2.04) @@ -5523,6 +5524,24 @@ static void register_power6_common_sprs(CPUPPCState *env) 0x00000000); } +static void register_HEIR32_spr(CPUPPCState *env) +{ + spr_register_hv(env, SPR_HEIR, "HEIR", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic32, + 0x00000000); +} + +static void register_HEIR64_spr(CPUPPCState *env) +{ + spr_register_hv(env, SPR_HEIR, "HEIR", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); +} + static void register_power8_tce_address_control_sprs(CPUPPCState *env) { spr_register_kvm(env, SPR_TAR, "TAR", @@ -5951,6 +5970,7 @@ static void init_proc_POWER7(CPUPPCState *env) register_power5p_ear_sprs(env); register_power5p_tb_sprs(env); register_power6_common_sprs(env); + register_HEIR32_spr(env); register_power6_dbg_sprs(env); register_power7_book4_sprs(env); @@ -6073,6 +6093,7 @@ static void init_proc_POWER8(CPUPPCState *env) register_power5p_ear_sprs(env); register_power5p_tb_sprs(env); register_power6_common_sprs(env); + register_HEIR32_spr(env); register_power6_dbg_sprs(env); register_power8_tce_address_control_sprs(env); register_power8_ids_sprs(env); @@ -6235,6 +6256,7 @@ static void init_proc_POWER9(CPUPPCState *env) register_power5p_ear_sprs(env); register_power5p_tb_sprs(env); register_power6_common_sprs(env); + register_HEIR32_spr(env); register_power6_dbg_sprs(env); register_power8_tce_address_control_sprs(env); register_power8_ids_sprs(env); @@ -6427,6 +6449,7 @@ static void init_proc_POWER10(CPUPPCState *env) register_power5p_ear_sprs(env); register_power5p_tb_sprs(env); register_power6_common_sprs(env); + register_HEIR64_spr(env); register_power6_dbg_sprs(env); register_power8_tce_address_control_sprs(env); register_power8_ids_sprs(env); @@ -6732,6 +6755,7 @@ static void ppc_cpu_realize(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); PowerPCCPU *cpu = POWERPC_CPU(dev); + CPUPPCState *env = &cpu->env; PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); Error *local_err = NULL; @@ -6763,6 +6787,10 @@ static void ppc_cpu_realize(DeviceState *dev, Error **errp) pcc->parent_realize(dev, errp); + if (env_cpu(env)->nr_threads > 1) { + env->flags |= POWERPC_FLAG_SMT; + } + return; unrealize: diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 12d8a7257b..2158390e27 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -28,6 +28,7 @@ #include "trace.h" #ifdef CONFIG_TCG +#include "sysemu/tcg.h" #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #endif @@ -133,6 +134,26 @@ static void dump_hcall(CPUPPCState *env) env->nip); } +#ifdef CONFIG_TCG +/* Return true iff byteswap is needed to load instruction */ +static inline bool insn_need_byteswap(CPUArchState *env) +{ + /* SYSTEM builds TARGET_BIG_ENDIAN. Need to swap when MSR[LE] is set */ + return !!(env->msr & ((target_ulong)1 << MSR_LE)); +} + +static uint32_t ppc_ldl_code(CPUArchState *env, abi_ptr addr) +{ + uint32_t insn = cpu_ldl_code(env, addr); + + if (insn_need_byteswap(env)) { + insn = bswap32(insn); + } + + return insn; +} +#endif + static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int excp) { const char *es; @@ -1328,6 +1349,72 @@ static bool books_vhyp_handles_hv_excp(PowerPCCPU *cpu) return false; } +#ifdef CONFIG_TCG +static bool is_prefix_insn(CPUPPCState *env, uint32_t insn) +{ + if (!(env->insns_flags2 & PPC2_ISA310)) { + return false; + } + return ((insn & 0xfc000000) == 0x04000000); +} + +static bool is_prefix_insn_excp(PowerPCCPU *cpu, int excp) +{ + CPUPPCState *env = &cpu->env; + + if (!tcg_enabled()) { + /* + * This does not load instructions and set the prefix bit correctly + * for injected interrupts with KVM. That may have to be discovered + * and set by the KVM layer before injecting. + */ + return false; + } + + switch (excp) { + case POWERPC_EXCP_HDSI: + /* HDSI PRTABLE_FAULT has the originating access type in error_code */ + if ((env->spr[SPR_HDSISR] & DSISR_PRTABLE_FAULT) && + (env->error_code == MMU_INST_FETCH)) { + /* + * Fetch failed due to partition scope translation, so prefix + * indication is not relevant (and attempting to load the + * instruction at NIP would cause recursive faults with the same + * translation). + */ + break; + } + /* fall through */ + case POWERPC_EXCP_MCHECK: + case POWERPC_EXCP_DSI: + case POWERPC_EXCP_DSEG: + case POWERPC_EXCP_ALIGN: + case POWERPC_EXCP_PROGRAM: + case POWERPC_EXCP_FPU: + case POWERPC_EXCP_TRACE: + case POWERPC_EXCP_HV_EMU: + case POWERPC_EXCP_VPU: + case POWERPC_EXCP_VSXU: + case POWERPC_EXCP_FU: + case POWERPC_EXCP_HV_FU: { + uint32_t insn = ppc_ldl_code(env, env->nip); + if (is_prefix_insn(env, insn)) { + return true; + } + break; + } + default: + break; + } + return false; +} +#else +static bool is_prefix_insn_excp(PowerPCCPU *cpu, int excp) +{ + return false; +} +#endif + static void powerpc_excp_books(PowerPCCPU *cpu, int excp) { CPUState *cs = CPU(cpu); @@ -1375,6 +1462,10 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) vector |= env->excp_prefix; + if (is_prefix_insn_excp(cpu, excp)) { + msr |= PPC_BIT(34); + } + switch (excp) { case POWERPC_EXCP_MCHECK: /* Machine check exception */ if (!FIELD_EX64(env->msr, MSR, ME)) { @@ -1500,6 +1591,10 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) vhc->hypercall(cpu->vhyp, cpu); return; } + if (env->insns_flags2 & PPC2_ISA310) { + /* ISAv3.1 puts LEV into SRR1 */ + msr |= lev << 20; + } if (lev == 1) { new_msr |= (target_ulong)MSR_HVB; } @@ -1551,13 +1646,28 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) case POWERPC_EXCP_HDECR: /* Hypervisor decrementer exception */ case POWERPC_EXCP_HDSI: /* Hypervisor data storage exception */ case POWERPC_EXCP_SDOOR_HV: /* Hypervisor Doorbell interrupt */ - case POWERPC_EXCP_HV_EMU: case POWERPC_EXCP_HVIRT: /* Hypervisor virtualization */ srr0 = SPR_HSRR0; srr1 = SPR_HSRR1; new_msr |= (target_ulong)MSR_HVB; new_msr |= env->msr & ((target_ulong)1 << MSR_RI); break; +#ifdef CONFIG_TCG + case POWERPC_EXCP_HV_EMU: { + uint32_t insn = ppc_ldl_code(env, env->nip); + env->spr[SPR_HEIR] = insn; + if (is_prefix_insn(env, insn)) { + uint32_t insn2 = ppc_ldl_code(env, env->nip + 4); + env->spr[SPR_HEIR] <<= 32; + env->spr[SPR_HEIR] |= insn2; + } + srr0 = SPR_HSRR0; + srr1 = SPR_HSRR1; + new_msr |= (target_ulong)MSR_HVB; + new_msr |= env->msr & ((target_ulong)1 << MSR_RI); + break; + } +#endif case POWERPC_EXCP_VPU: /* Vector unavailable exception */ case POWERPC_EXCP_VSXU: /* VSX unavailable exception */ case POWERPC_EXCP_FU: /* Facility unavailable exception */ @@ -3076,22 +3186,42 @@ void helper_book3s_msgclrp(CPUPPCState *env, target_ulong rb) } /* - * sends a message to other threads that are on the same + * sends a message to another thread on the same * multi-threaded processor */ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb) { - int pir = env->spr_cb[SPR_PIR].default_value; + CPUState *cs = env_cpu(env); + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUState *ccs; + uint32_t nr_threads = cs->nr_threads; + int ttir = rb & PPC_BITMASK(57, 63); helper_hfscr_facility_check(env, HFSCR_MSGP, "msgsndp", HFSCR_IC_MSGP); - if (!dbell_type_server(rb)) { + if (!dbell_type_server(rb) || ttir >= nr_threads) { return; } - /* TODO: TCG supports only one thread */ + if (nr_threads == 1) { + ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, 1); + return; + } - book3s_msgsnd_common(pir, PPC_INTERRUPT_DOORBELL); + /* Does iothread need to be locked for walking CPU list? */ + qemu_mutex_lock_iothread(); + THREAD_SIBLING_FOREACH(cs, ccs) { + PowerPCCPU *ccpu = POWERPC_CPU(ccs); + uint32_t thread_id = ppc_cpu_tir(ccpu); + + if (ttir == thread_id) { + ppc_set_irq(ccpu, PPC_INTERRUPT_DOORBELL, 1); + qemu_mutex_unlock_iothread(); + return; + } + } + + g_assert_not_reached(); } #endif /* TARGET_PPC64 */ @@ -3104,7 +3234,7 @@ void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, /* Restore state and reload the insn we executed, for filling in DSISR. */ cpu_restore_state(cs, retaddr); - insn = cpu_ldl_code(env, env->nip); + insn = ppc_ldl_code(env, env->nip); switch (env->mmu_model) { case POWERPC_MMU_SOFT_4xx: diff --git a/target/ppc/gdbstub.c b/target/ppc/gdbstub.c index 63c9abe4f1..ca39efdc35 100644 --- a/target/ppc/gdbstub.c +++ b/target/ppc/gdbstub.c @@ -327,6 +327,25 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu) unsigned int num_regs = 0; int i; + for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) { + ppc_spr_t *spr = &env->spr_cb[i]; + + if (!spr->name) { + continue; + } + + /* + * GDB identifies registers based on the order they are + * presented in the XML. These ids will not match QEMU's + * representation (which follows the PowerISA). + * + * Store the position of the current register description so + * we can make the correspondence later. + */ + spr->gdb_id = num_regs; + num_regs++; + } + if (pcc->gdb_spr_xml) { return; } @@ -348,17 +367,6 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu) g_string_append_printf(xml, " bitsize=\"%d\"", TARGET_LONG_BITS); g_string_append(xml, " group=\"spr\"/>"); - - /* - * GDB identifies registers based on the order they are - * presented in the XML. These ids will not match QEMU's - * representation (which follows the PowerISA). - * - * Store the position of the current register description so - * we can make the correspondence later. - */ - spr->gdb_id = num_regs; - num_regs++; } g_string_append(xml, ""); diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 38efbc351c..fda40b8a60 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -704,6 +704,8 @@ DEF_HELPER_3(store_dcr, void, env, tl, tl) DEF_HELPER_2(load_dump_spr, void, env, i32) DEF_HELPER_2(store_dump_spr, void, env, i32) +DEF_HELPER_3(spr_write_CTRL, void, env, i32, tl) + DEF_HELPER_4(fscr_facility_check, void, env, i32, i32, i32) DEF_HELPER_4(msr_facility_check, void, env, i32, i32, i32) DEF_HELPER_FLAGS_1(load_tbl, TCG_CALL_NO_RWG, tl, env) diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index a7f2de9d10..a8a935e267 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -1728,6 +1728,10 @@ int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) .addr = (uintptr_t) &bits, }; + if (!kvm_enabled()) { + return 0; + } + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); } @@ -1741,6 +1745,10 @@ int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) .addr = (uintptr_t) &bits, }; + if (!kvm_enabled()) { + return 0; + } + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); } @@ -1755,6 +1763,10 @@ int kvmppc_set_tcr(PowerPCCPU *cpu) .addr = (uintptr_t) &tcr, }; + if (!kvm_enabled()) { + return 0; + } + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); } diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 40ddc5c08c..1f1af21f33 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -43,6 +43,31 @@ void helper_store_dump_spr(CPUPPCState *env, uint32_t sprn) env->spr[sprn]); } +void helper_spr_write_CTRL(CPUPPCState *env, uint32_t sprn, + target_ulong val) +{ + CPUState *cs = env_cpu(env); + CPUState *ccs; + uint32_t run = val & 1; + uint32_t ts, ts_mask; + + assert(sprn == SPR_CTRL); + + env->spr[sprn] &= ~1U; + env->spr[sprn] |= run; + + ts_mask = ~(1U << (8 + env->spr[SPR_TIR])); + ts = run << (8 + env->spr[SPR_TIR]); + + THREAD_SIBLING_FOREACH(cs, ccs) { + CPUPPCState *cenv = &POWERPC_CPU(ccs)->env; + + cenv->spr[sprn] &= ts_mask; + cenv->spr[sprn] |= ts; + } +} + + #ifdef TARGET_PPC64 static void raise_hv_fu_exception(CPUPPCState *env, uint32_t bit, const char *caller, uint32_t cause, @@ -159,32 +184,64 @@ void helper_store_pcr(CPUPPCState *env, target_ulong value) */ target_ulong helper_load_dpdes(CPUPPCState *env) { + CPUState *cs = env_cpu(env); + CPUState *ccs; + uint32_t nr_threads = cs->nr_threads; target_ulong dpdes = 0; helper_hfscr_facility_check(env, HFSCR_MSGP, "load DPDES", HFSCR_IC_MSGP); - /* TODO: TCG supports only one thread */ - if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) { - dpdes = 1; + if (nr_threads == 1) { + if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) { + dpdes = 1; + } + return dpdes; } + qemu_mutex_lock_iothread(); + THREAD_SIBLING_FOREACH(cs, ccs) { + PowerPCCPU *ccpu = POWERPC_CPU(ccs); + CPUPPCState *cenv = &ccpu->env; + uint32_t thread_id = ppc_cpu_tir(ccpu); + + if (cenv->pending_interrupts & PPC_INTERRUPT_DOORBELL) { + dpdes |= (0x1 << thread_id); + } + } + qemu_mutex_unlock_iothread(); + return dpdes; } void helper_store_dpdes(CPUPPCState *env, target_ulong val) { PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + CPUState *ccs; + uint32_t nr_threads = cs->nr_threads; helper_hfscr_facility_check(env, HFSCR_MSGP, "store DPDES", HFSCR_IC_MSGP); - /* TODO: TCG supports only one thread */ - if (val & ~0x1) { + if (val & ~(nr_threads - 1)) { qemu_log_mask(LOG_GUEST_ERROR, "Invalid DPDES register value " TARGET_FMT_lx"\n", val); + val &= (nr_threads - 1); /* Ignore the invalid bits */ + } + + if (nr_threads == 1) { + ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & 0x1); return; } - ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & 0x1); + /* Does iothread need to be locked for walking CPU list? */ + qemu_mutex_lock_iothread(); + THREAD_SIBLING_FOREACH(cs, ccs) { + PowerPCCPU *ccpu = POWERPC_CPU(ccs); + uint32_t thread_id = ppc_cpu_tir(ccpu); + + ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & (0x1 << thread_id)); + } + qemu_mutex_unlock_iothread(); } #endif /* defined(TARGET_PPC64) */ diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 031efda0df..920084bd8f 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -145,6 +145,13 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; + env->error_code = 0; + if (cause & DSISR_PRTABLE_FAULT) { + /* HDSI PRTABLE_FAULT gets the originating access type in error_code */ + env->error_code = access_type; + access_type = MMU_DATA_LOAD; + } + qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx" 0x%" HWADDR_PRIx" cause %08x\n", __func__, access_str(access_type), @@ -166,7 +173,6 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, env->spr[SPR_HDSISR] = cause; env->spr[SPR_HDAR] = eaddr; env->spr[SPR_ASDR] = g_raddr; - env->error_code = 0; break; default: g_assert_not_reached(); @@ -369,17 +375,26 @@ static bool validate_pate(PowerPCCPU *cpu, uint64_t lpid, ppc_v3_pate_t *pate) } static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, - MMUAccessType access_type, + MMUAccessType orig_access_type, vaddr eaddr, hwaddr g_raddr, ppc_v3_pate_t pate, hwaddr *h_raddr, int *h_prot, int *h_page_size, bool pde_addr, int mmu_idx, bool guest_visible) { + MMUAccessType access_type = orig_access_type; int fault_cause = 0; hwaddr pte_addr; uint64_t pte; + if (pde_addr) { + /* + * Translation of process-scoped tables/directories is performed as + * a read-access. + */ + access_type = MMU_DATA_LOAD; + } + qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx " mmu_idx %u 0x%"HWADDR_PRIx"\n", __func__, access_str(access_type), @@ -396,7 +411,8 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, fault_cause |= DSISR_PRTABLE_FAULT; } if (guest_visible) { - ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr, fault_cause); + ppc_radix64_raise_hsi(cpu, orig_access_type, + eaddr, g_raddr, fault_cause); } return 1; } @@ -477,10 +493,10 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, * is only used to translate the effective addresses of the * process table entries. */ - ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr, - pate, &h_raddr, &h_prot, - &h_page_size, true, - /* mmu_idx is 5 because we're translating from hypervisor scope */ + /* mmu_idx is 5 because we're translating from hypervisor scope */ + ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, + prtbe_addr, pate, &h_raddr, + &h_prot, &h_page_size, true, 5, guest_visible); if (ret) { return ret; @@ -519,11 +535,11 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, * translation */ do { - ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr, - pate, &h_raddr, &h_prot, - &h_page_size, true, /* mmu_idx is 5 because we're translating from hypervisor scope */ - 5, guest_visible); + ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, + pte_addr, pate, &h_raddr, + &h_prot, &h_page_size, + true, 5, guest_visible); if (ret) { return ret; } diff --git a/target/ppc/translate.c b/target/ppc/translate.c index b591f2e496..372ee600b2 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -234,6 +234,28 @@ struct opc_handler_t { void (*handler)(DisasContext *ctx); }; +static inline bool gen_serialize(DisasContext *ctx) +{ + if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; + return false; + } + return true; +} + +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) +static inline bool gen_serialize_core(DisasContext *ctx) +{ + if (ctx->flags & POWERPC_FLAG_SMT) { + return gen_serialize(ctx); + } + + return true; +} +#endif + /* SPR load/store helpers */ static inline void gen_load_spr(TCGv t, int reg) { @@ -416,9 +438,32 @@ void spr_write_generic32(DisasContext *ctx, int sprn, int gprn) #endif } +static void spr_write_CTRL_ST(DisasContext *ctx, int sprn, int gprn) +{ + /* This does not implement >1 thread */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + tcg_gen_extract_tl(t0, cpu_gpr[gprn], 0, 1); /* Extract RUN field */ + tcg_gen_shli_tl(t1, t0, 8); /* Duplicate the bit in TS */ + tcg_gen_or_tl(t1, t1, t0); + gen_store_spr(sprn, t1); +} + void spr_write_CTRL(DisasContext *ctx, int sprn, int gprn) { - spr_write_generic32(ctx, sprn, gprn); + if (!(ctx->flags & POWERPC_FLAG_SMT)) { + spr_write_CTRL_ST(ctx, sprn, gprn); + goto out; + } + + if (!gen_serialize(ctx)) { + return; + } + + gen_helper_spr_write_CTRL(cpu_env, tcg_constant_i32(sprn), + cpu_gpr[gprn]); +out: + spr_store_dump_spr(sprn); /* * SPR_CTRL writes must force a new translation block, @@ -770,11 +815,19 @@ void spr_write_pcr(DisasContext *ctx, int sprn, int gprn) /* DPDES */ void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn) { + if (!gen_serialize_core(ctx)) { + return; + } + gen_helper_load_dpdes(cpu_gpr[gprn], cpu_env); } void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn) { + if (!gen_serialize_core(ctx)) { + return; + } + gen_helper_store_dpdes(cpu_env, cpu_gpr[gprn]); } #endif @@ -4422,7 +4475,12 @@ static void gen_sc(DisasContext *ctx) { uint32_t lev; - lev = (ctx->opcode >> 5) & 0x7F; + /* + * LEV is a 7-bit field, but the top 6 bits are treated as a reserved + * field (i.e., ignored). ISA v3.1 changes that to 5 bits, but that is + * for Ultravisor which TCG does not support, so just ignore the top 6. + */ + lev = (ctx->opcode >> 5) & 0x1; gen_exception_err(ctx, POWERPC_SYSCALL, lev); } diff --git a/tests/avocado/ppc_pseries.py b/tests/avocado/ppc_pseries.py index d8b04dc3ea..a8311e6555 100644 --- a/tests/avocado/ppc_pseries.py +++ b/tests/avocado/ppc_pseries.py @@ -14,12 +14,9 @@ class pseriesMachine(QemuSystemTest): timeout = 90 KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 ' panic_message = 'Kernel panic - not syncing' + good_message = 'VFS: Cannot open root device' - def test_ppc64_pseries(self): - """ - :avocado: tags=arch:ppc64 - :avocado: tags=machine:pseries - """ + def do_test_ppc64_linux_boot(self): kernel_url = ('https://archives.fedoraproject.org/pub/archive' '/fedora-secondary/releases/29/Everything/ppc64le/os' '/ppc/ppc64/vmlinuz') @@ -31,5 +28,69 @@ class pseriesMachine(QemuSystemTest): self.vm.add_args('-kernel', kernel_path, '-append', kernel_command_line) self.vm.launch() - console_pattern = 'Kernel command line: %s' % kernel_command_line + + def test_ppc64_vof_linux_boot(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + + self.vm.add_args('-machine', 'x-vof=on') + self.do_test_ppc64_linux_boot() + console_pattern = 'VFS: Cannot open root device' wait_for_console_pattern(self, console_pattern, self.panic_message) + + def test_ppc64_linux_boot(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + + self.do_test_ppc64_linux_boot() + console_pattern = 'VFS: Cannot open root device' + wait_for_console_pattern(self, console_pattern, self.panic_message) + + def test_ppc64_linux_smp_boot(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + + self.vm.add_args('-smp', '4') + self.do_test_ppc64_linux_boot() + console_pattern = 'smp: Brought up 1 node, 4 CPUs' + wait_for_console_pattern(self, console_pattern, self.panic_message) + wait_for_console_pattern(self, self.good_message, self.panic_message) + + def test_ppc64_linux_smt_boot(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + + self.vm.add_args('-smp', '4,threads=4') + self.do_test_ppc64_linux_boot() + console_pattern = 'CPU maps initialized for 4 threads per core' + wait_for_console_pattern(self, console_pattern, self.panic_message) + console_pattern = 'smp: Brought up 1 node, 4 CPUs' + wait_for_console_pattern(self, console_pattern, self.panic_message) + wait_for_console_pattern(self, self.good_message, self.panic_message) + + def test_ppc64_linux_big_boot(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + + self.vm.add_args('-smp', '16,threads=4,cores=2,sockets=2') + self.vm.add_args('-m', '512M', + '-object', 'memory-backend-ram,size=256M,id=m0', + '-object', 'memory-backend-ram,size=256M,id=m1') + self.vm.add_args('-numa', 'node,nodeid=0,memdev=m0') + self.vm.add_args('-numa', 'node,nodeid=1,memdev=m1') + self.do_test_ppc64_linux_boot() + console_pattern = 'CPU maps initialized for 4 threads per core' + wait_for_console_pattern(self, console_pattern, self.panic_message) + console_pattern = 'smp: Brought up 2 nodes, 16 CPUs' + wait_for_console_pattern(self, console_pattern, self.panic_message) + wait_for_console_pattern(self, self.good_message, self.panic_message)