diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 59435390ba..21081041d5 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1691,10 +1691,15 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); + MemoryRegion *mr; uint64_t align = TARGET_PAGE_SIZE; bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); + mr = ddc->get_memory_region(dimm, &local_err); + if (local_err) { + goto out; + } + if (memory_region_get_alignment(mr) && pcmc->enforce_aligned_dimm) { align = memory_region_get_alignment(mr); } @@ -1758,10 +1763,15 @@ static void pc_dimm_unplug(HotplugHandler *hotplug_dev, PCMachineState *pcms = PC_MACHINE(hotplug_dev); PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); + MemoryRegion *mr; HotplugHandlerClass *hhc; Error *local_err = NULL; + mr = ddc->get_memory_region(dimm, &local_err); + if (local_err) { + goto out; + } + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index db896b0bb6..952fce5ec8 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -71,7 +71,7 @@ static void nvdimm_init(Object *obj) NULL, NULL); } -static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice *dimm) +static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice *dimm, Error **errp) { NVDIMMDevice *nvdimm = NVDIMM(dimm); diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index ea67b461c2..bdf6649083 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -363,7 +363,10 @@ static void pc_dimm_get_size(Object *obj, Visitor *v, const char *name, PCDIMMDevice *dimm = PC_DIMM(obj); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(obj); - mr = ddc->get_memory_region(dimm); + mr = ddc->get_memory_region(dimm, errp); + if (!mr) { + return; + } value = memory_region_size(mr); visit_type_uint64(v, name, &value, errp); @@ -411,9 +414,14 @@ static void pc_dimm_unrealize(DeviceState *dev, Error **errp) host_memory_backend_set_mapped(dimm->hostmem, false); } -static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm) +static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm, Error **errp) { - return host_memory_backend_get_memory(dimm->hostmem, &error_abort); + if (!dimm->hostmem) { + error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set"); + return NULL; + } + + return host_memory_backend_get_memory(dimm->hostmem, errp); } static MemoryRegion *pc_dimm_get_vmstate_memory_region(PCDIMMDevice *dimm) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index f7a19720dc..cec441cbf4 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2772,10 +2772,15 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); - uint64_t align = memory_region_get_alignment(mr); - uint64_t size = memory_region_size(mr); - uint64_t addr; + MemoryRegion *mr; + uint64_t align, size, addr; + + mr = ddc->get_memory_region(dimm, &local_err); + if (local_err) { + goto out; + } + align = memory_region_get_alignment(mr); + size = memory_region_size(mr); pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err); if (local_err) { @@ -2808,10 +2813,16 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, { PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); - uint64_t size = memory_region_size(mr); + MemoryRegion *mr; + uint64_t size; char *mem_dev; + mr = ddc->get_memory_region(dimm, errp); + if (!mr) { + return; + } + size = memory_region_size(mr); + if (size % SPAPR_MEMORY_BLOCK_SIZE) { error_setg(errp, "Hotplugged memory size must be a multiple of " "%lld MB", SPAPR_MEMORY_BLOCK_SIZE / M_BYTE); @@ -2882,7 +2893,7 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms, { sPAPRDRConnector *drc; PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort); uint64_t size = memory_region_size(mr); uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; uint32_t avail_lmbs = 0; @@ -2912,7 +2923,7 @@ void spapr_lmb_release(DeviceState *dev) sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_hotplug_handler(dev)); PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort); sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev)); /* This information will get lost if a migration occurs @@ -2945,12 +2956,19 @@ static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, Error *local_err = NULL; PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); - uint64_t size = memory_region_size(mr); - uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; - uint64_t addr_start, addr; + MemoryRegion *mr; + uint32_t nr_lmbs; + uint64_t size, addr_start, addr; int i; sPAPRDRConnector *drc; + + mr = ddc->get_memory_region(dimm, &local_err); + if (local_err) { + goto out; + } + size = memory_region_size(mr); + nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); if (local_err) { diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 5260b5d363..605697d8bd 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -442,12 +442,17 @@ void spapr_drc_reset(sPAPRDRConnector *drc) if (drc->dev) { /* A device present at reset is ready to go, same as coldplugged */ drc->state = drck->ready_state; + /* + * Ensure that we are able to send the FDT fragment again + * via configure-connector call if the guest requests. + */ + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; } else { drc->state = drck->empty_state; + drc->ccs_offset = -1; + drc->ccs_depth = -1; } - - drc->ccs_offset = -1; - drc->ccs_depth = -1; } static void drc_reset(void *opaque) @@ -1071,8 +1076,14 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, } if ((drc->state != SPAPR_DRC_STATE_LOGICAL_UNISOLATE) - && (drc->state != SPAPR_DRC_STATE_PHYSICAL_UNISOLATE)) { - /* Need to unisolate the device before configuring */ + && (drc->state != SPAPR_DRC_STATE_PHYSICAL_UNISOLATE) + && (drc->state != SPAPR_DRC_STATE_LOGICAL_CONFIGURED) + && (drc->state != SPAPR_DRC_STATE_PHYSICAL_CONFIGURED)) { + /* + * Need to unisolate the device before configuring + * or it should already be in configured state to + * allow configure-connector be called repeatedly. + */ rc = SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE; goto out; } @@ -1108,8 +1119,13 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, /* done sending the device tree, move to configured state */ trace_spapr_drc_set_configured(drc_index); drc->state = drck->ready_state; - drc->ccs_offset = -1; - drc->ccs_depth = -1; + /* + * Ensure that we are able to send the FDT fragment + * again via configure-connector call if the guest requests. + */ + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; + fdt_offset_next = drc->fdt_start_offset; resp = SPAPR_DR_CC_RESPONSE_SUCCESS; } else { resp = SPAPR_DR_CC_RESPONSE_PREV_PARENT; diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index e614621a83..ed2d53559a 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -618,6 +618,8 @@ static void spapr_tce_table_class_init(ObjectClass *klass, void *data) dc->init = spapr_tce_table_realize; dc->reset = spapr_tce_reset; dc->unrealize = spapr_tce_table_unrealize; + /* Reason: This is just an internal device for handling the hypercalls */ + dc->user_creatable = false; QLIST_INIT(&spapr_tce_tables); diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c index 00a4e4c717..9ec3078691 100644 --- a/hw/ppc/spapr_rtc.c +++ b/hw/ppc/spapr_rtc.c @@ -164,6 +164,8 @@ static void spapr_rtc_class_init(ObjectClass *oc, void *data) dc->realize = spapr_rtc_realize; dc->vmsd = &vmstate_spapr_rtc; + /* Reason: This is an internal device only for handling the hypercalls */ + dc->user_creatable = false; spapr_rtas_register(RTAS_GET_TIME_OF_DAY, "get-time-of-day", rtas_get_time_of_day); diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index 1e483f2670..6f8c3eb1b3 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -71,7 +71,7 @@ typedef struct PCDIMMDeviceClass { /* public */ void (*realize)(PCDIMMDevice *dimm, Error **errp); - MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm); + MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm, Error **errp); MemoryRegion *(*get_vmstate_memory_region)(PCDIMMDevice *dimm); } PCDIMMDeviceClass; diff --git a/target/ppc/compat.c b/target/ppc/compat.c index f1b67faa97..f8729fe46d 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -140,16 +140,17 @@ void ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp) cpu_synchronize_state(CPU(cpu)); - cpu->compat_pvr = compat_pvr; - env->spr[SPR_PCR] = pcr & pcc->pcr_mask; - - if (kvm_enabled()) { + if (kvm_enabled() && cpu->compat_pvr != compat_pvr) { int ret = kvmppc_set_compat(cpu, cpu->compat_pvr); if (ret < 0) { error_setg_errno(errp, -ret, "Unable to set CPU compatibility mode in KVM"); + return; } } + + cpu->compat_pvr = compat_pvr; + env->spr[SPR_PCR] = pcr & pcc->pcr_mask; } typedef struct { diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 85713795de..f31c67e1b1 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -90,6 +90,7 @@ static int cap_htm; /* Hardware transactional memory support */ static int cap_mmu_radix; static int cap_mmu_hash_v3; static int cap_resize_hpt; +static int cap_ppc_pvr_compat; static uint32_t debug_inst_opcode; @@ -147,6 +148,13 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); + /* + * Note: setting it to false because there is not such capability + * in KVM at this moment. + * + * TODO: call kvm_vm_check_extension() with the right capability + * after the kernel starts implementing it.*/ + cap_ppc_pvr_compat = false; if (!cap_interrupt_level) { fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " @@ -2785,3 +2793,33 @@ void kvmppc_update_sdr1(target_ulong sdr1) run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1)); } } + +/* + * This is a helper function to detect a post migration scenario + * in which a guest, running as KVM-HV, freezes in cpu_post_load because + * the guest kernel can't handle a PVR value other than the actual host + * PVR in KVM_SET_SREGS, even if pvr_match() returns true. + * + * If we don't have cap_ppc_pvr_compat and we're not running in PR + * (so, we're HV), return true. The workaround itself is done in + * cpu_post_load. + * + * The order here is important: we'll only check for KVM PR as a + * fallback if the guest kernel can't handle the situation itself. + * We need to avoid as much as possible querying the running KVM type + * in QEMU level. + */ +bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) +{ + CPUState *cs = CPU(cpu); + + if (!kvm_enabled()) { + return false; + } + + if (cap_ppc_pvr_compat) { + return false; + } + + return !kvmppc_is_pr(cs->kvm_state); +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index 6bc6fb3e2d..381afe6240 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -67,6 +67,7 @@ void kvmppc_check_papr_resize_hpt(Error **errp); int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift); int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift); void kvmppc_update_sdr1(target_ulong sdr1); +bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu); bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path); diff --git a/target/ppc/machine.c b/target/ppc/machine.c index abe0a1cdf0..e36b7100cb 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -9,6 +9,7 @@ #include "mmu-hash64.h" #include "migration/cpu.h" #include "qapi/error.h" +#include "kvm_ppc.h" static int cpu_load_old(QEMUFile *f, void *opaque, int version_id) { @@ -249,6 +250,27 @@ static int cpu_post_load(void *opaque, int version_id) } } + /* + * If we're running with KVM HV, there is a chance that the guest + * is running with KVM HV and its kernel does not have the + * capability of dealing with a different PVR other than this + * exact host PVR in KVM_SET_SREGS. If that happens, the + * guest freezes after migration. + * + * The function kvmppc_pvr_workaround_required does this verification + * by first checking if the kernel has the cap, returning true immediately + * if that is the case. Otherwise, it checks if we're running in KVM PR. + * If the guest kernel does not have the cap and we're not running KVM-PR + * (so, it is running KVM-HV), we need to ensure that KVM_SET_SREGS will + * receive the PVR it expects as a workaround. + * + */ +#if defined(CONFIG_KVM) + if (kvmppc_pvr_workaround_required(cpu)) { + env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value; + } +#endif + env->lr = env->spr[SPR_LR]; env->ctr = env->spr[SPR_CTR]; cpu_write_xer(env, env->spr[SPR_XER]); diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c index a8ca877168..b95c5e74ea 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c @@ -78,7 +78,11 @@ static void test_machine(const void *data) fd = mkstemp(tmpname); g_assert(fd != -1); - args = g_strdup_printf("-M %s,accel=kvm:tcg " + /* + * Make sure that this test uses tcg if available: It is used as a + * fast-enough smoketest for that. + */ + args = g_strdup_printf("-M %s,accel=tcg:kvm " "-chardev file,id=serial0,path=%s " "-no-shutdown -serial chardev:serial0 %s", test->machine, tmpname, test->extra);