ppc patch queue 2017-07-17

This pull requests supersedes the one from 2017-07-14.  That one had a
 couple of subtle regressions: there was a build error for mingw32, and
 an instance_size which was theoretically wrong everywhere, but only
 actually bit on the Travis OSX build.
 
 There are two major batches in this set, rather than the usual
 collection of assorted fixes.
 
     * More DRC cleanup.  This gets the state management into a state
       which should fix many of the hotplug+migration problems we've
       had.  Plus it gets the migration stream format into something
       well defined and pretty minimal which we can reasonably support
       into the future.
 
     * Hashed Page Table resizing.  It's been a while since this was
       posted, but it's been through several previous rounds of review.
       The kernel parts (both guest and host) are merged in 4.11, so
       this is the only remaining piece left to allow resizing of the
       HPT in a running guest.
 
 There are also a handful of unrelated fixes.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEdfRlhq5hpmzETofcbDjKyiDZs5IFAllsWwQACgkQbDjKyiDZ
 s5LMnA//dpoqWrTPiEmx2DsXMkjLefn/2Yl1dkQDzhyb7v+tNGFYmxpbb7nPRfJE
 tfvcKu1Tz23NPOp6+1VC9eTyTO1YOXTgvQrNSbF1MmIg4PGN6s2DHrLviAqCS15M
 29x6+RdRaeLUSCsk8elsViiWb8h7cISDuN0SMA0WWjWP3bO/drz5nq5z5dRgdVFe
 Z5O0qwDNoN0NypJ68Cld+riP1uDAYMONPxA0QOWCLx8qowoJ3hYMuyNnqBQU5OJn
 PpAA3EfdxkN6rtaBjDt7xHkJfm9Xkm9SsT8qTcj/R2JjkENef8EbzrdjFE+pSVz0
 7c9C4evgYgmhUCUFvnZfgN+VBL1lS/p5UGnFPyNQ7KbSXDE71OAgWH/f/7kzsJPy
 MxbJWM6eUN9Ny0APxM8olLV1FM4GzEoCSLfDVhStrdJ6P5wBmjLSugqSOLB8aMtd
 8NwBY06nTpmo9xXGz9enLUWlpSeoReKU3TxvQvY+JcOWWpasDZOO4zD8B3bdLbA/
 I8jdkH5Vs0pyPLaWD+1FxlQvlF45CuwpwoiAz00V2XkkMu8jKCGsQ0iuqXorSqvs
 /7tQ1pHlUybAX+5W9raaJmphgc4gk33P3PlQCjhgYzxRu4yzRsEzS9hahoO/TAmq
 Y70CooZaaeGNOBEDcKLZEzJdBr52cqW4MM8t1xHWTg3VCHJGeYI=
 =O6NQ
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.10-20170717' into staging

ppc patch queue 2017-07-17

This pull requests supersedes the one from 2017-07-14.  That one had a
couple of subtle regressions: there was a build error for mingw32, and
an instance_size which was theoretically wrong everywhere, but only
actually bit on the Travis OSX build.

There are two major batches in this set, rather than the usual
collection of assorted fixes.

    * More DRC cleanup.  This gets the state management into a state
      which should fix many of the hotplug+migration problems we've
      had.  Plus it gets the migration stream format into something
      well defined and pretty minimal which we can reasonably support
      into the future.

    * Hashed Page Table resizing.  It's been a while since this was
      posted, but it's been through several previous rounds of review.
      The kernel parts (both guest and host) are merged in 4.11, so
      this is the only remaining piece left to allow resizing of the
      HPT in a running guest.

There are also a handful of unrelated fixes.

# gpg: Signature made Mon 17 Jul 2017 07:36:52 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.10-20170717: (21 commits)
  target/ppc: fix CPU hotplug when radix is enabled (TCG)
  spapr: fix memory leak in spapr_core_pre_plug()
  pseries: Allow HPT resizing with KVM
  pseries: Use smaller default hash page tables when guest can resize
  pseries: Enable HPT resizing for 2.10
  pseries: Implement HPT resizing
  pseries: Stubs for HPT resizing
  ppc/pnv: Remove unused XICSState reference
  spapr: fix potential memory leak in spapr_core_plug()
  spapr: Implement DR-indicator for physical DRCs only
  spapr: Remove sPAPRConfigureConnectorState sub-structure
  spapr: Consolidate DRC state variables
  spapr: Cleanups relating to DRC awaiting_release field
  spapr: Refactor spapr_drc_detach()
  spapr: Abort on delete failure in spapr_drc_release()
  spapr: Simplify unplug path
  spapr: Remove 'awaiting_allocation' DRC flag
  spapr: Treat devices added before inbound migration as coldplugged
  spapr: Minor cleanups to events handling
  spapr: migrate pending_events of spapr state
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2017-07-17 12:52:59 +01:00
commit 77031ee1ce
14 changed files with 1070 additions and 356 deletions

View File

@ -98,8 +98,6 @@
#define PHANDLE_XICP 0x00001111
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
static ICSState *spapr_ics_create(sPAPRMachineState *spapr,
const char *type_ics,
int nr_irqs, Error **errp)
@ -874,6 +872,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
add_str(hypertas, "hcall-multi-tce");
}
if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
add_str(hypertas, "hcall-hpt-resize");
}
_FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
hypertas->str, hypertas->len));
g_string_free(hypertas, TRUE);
@ -1264,7 +1267,7 @@ static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
}
}
static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
{
int shift;
@ -1285,8 +1288,8 @@ void spapr_free_hpt(sPAPRMachineState *spapr)
close_htab_fd(spapr);
}
static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
Error **errp)
void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
Error **errp)
{
long rc;
@ -1334,9 +1337,17 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
{
spapr_reallocate_hpt(spapr,
spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size),
&error_fatal);
int hpt_shift;
if ((spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED)
|| (spapr->cas_reboot
&& !spapr_ovec_test(spapr->ov5_cas, OV5_HPT_RESIZE))) {
hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
} else {
hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->ram_size);
}
spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal);
if (spapr->vrma_adjust) {
spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
spapr->htab_shift);
@ -1517,6 +1528,37 @@ static bool version_before_3(void *opaque, int version_id)
return version_id < 3;
}
static bool spapr_pending_events_needed(void *opaque)
{
sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
return !QTAILQ_EMPTY(&spapr->pending_events);
}
static const VMStateDescription vmstate_spapr_event_entry = {
.name = "spapr_event_log_entry",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(summary, sPAPREventLogEntry),
VMSTATE_UINT32(extended_length, sPAPREventLogEntry),
VMSTATE_VBUFFER_ALLOC_UINT32(extended_log, sPAPREventLogEntry, 0,
NULL, extended_length),
VMSTATE_END_OF_LIST()
},
};
static const VMStateDescription vmstate_spapr_pending_events = {
.name = "spapr_pending_events",
.version_id = 1,
.minimum_version_id = 1,
.needed = spapr_pending_events_needed,
.fields = (VMStateField[]) {
VMSTATE_QTAILQ_V(pending_events, sPAPRMachineState, 1,
vmstate_spapr_event_entry, sPAPREventLogEntry, next),
VMSTATE_END_OF_LIST()
},
};
static bool spapr_ov5_cas_needed(void *opaque)
{
sPAPRMachineState *spapr = opaque;
@ -1615,6 +1657,7 @@ static const VMStateDescription vmstate_spapr = {
.subsections = (const VMStateDescription*[]) {
&vmstate_spapr_ov5_cas,
&vmstate_spapr_patb_entry,
&vmstate_spapr_pending_events,
NULL
}
};
@ -2116,12 +2159,41 @@ static void ppc_spapr_init(MachineState *machine)
hwaddr node0_size = spapr_node0_size();
long load_limit, fw_size;
char *filename;
Error *resize_hpt_err = NULL;
msi_nonbroken = true;
QLIST_INIT(&spapr->phbs);
QTAILQ_INIT(&spapr->pending_dimm_unplugs);
/* Check HPT resizing availability */
kvmppc_check_papr_resize_hpt(&resize_hpt_err);
if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
/*
* If the user explicitly requested a mode we should either
* supply it, or fail completely (which we do below). But if
* it's not set explicitly, we reset our mode to something
* that works
*/
if (resize_hpt_err) {
spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
error_free(resize_hpt_err);
resize_hpt_err = NULL;
} else {
spapr->resize_hpt = smc->resize_hpt_default;
}
}
assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
/*
* User requested HPT resize, but this host can't supply it. Bail out
*/
error_report_err(resize_hpt_err);
exit(1);
}
/* Allocate RMA if necessary */
rma_alloc_size = kvmppc_alloc_rma(&rma);
@ -2190,6 +2262,11 @@ static void ppc_spapr_init(MachineState *machine)
spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
}
/* advertise support for HPT resizing */
if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
spapr_ovec_set(spapr->ov5, OV5_HPT_RESIZE);
}
/* init CPUs */
if (machine->cpu_model == NULL) {
machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu;
@ -2547,6 +2624,40 @@ static void spapr_set_modern_hotplug_events(Object *obj, bool value,
spapr->use_hotplug_event_source = value;
}
static char *spapr_get_resize_hpt(Object *obj, Error **errp)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
switch (spapr->resize_hpt) {
case SPAPR_RESIZE_HPT_DEFAULT:
return g_strdup("default");
case SPAPR_RESIZE_HPT_DISABLED:
return g_strdup("disabled");
case SPAPR_RESIZE_HPT_ENABLED:
return g_strdup("enabled");
case SPAPR_RESIZE_HPT_REQUIRED:
return g_strdup("required");
}
g_assert_not_reached();
}
static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
if (strcmp(value, "default") == 0) {
spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
} else if (strcmp(value, "disabled") == 0) {
spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
} else if (strcmp(value, "enabled") == 0) {
spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
} else if (strcmp(value, "required") == 0) {
spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
} else {
error_setg(errp, "Bad value for \"resize-hpt\" property");
}
}
static void spapr_machine_initfn(Object *obj)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
@ -2571,6 +2682,12 @@ static void spapr_machine_initfn(Object *obj)
ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr,
"Maximum permitted CPU compatibility mode",
&error_fatal);
object_property_add_str(obj, "resize-hpt",
spapr_get_resize_hpt, spapr_set_resize_hpt, NULL);
object_property_set_description(obj, "resize-hpt",
"Resizing of the Hash Page Table (enabled, disabled, required)",
NULL);
}
static void spapr_machine_finalizefn(Object *obj)
@ -2604,6 +2721,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
int i, fdt_offset, fdt_size;
void *fdt;
uint64_t addr = addr_start;
bool hotplugged = spapr_drc_hotplugged(dev);
Error *local_err = NULL;
for (i = 0; i < nr_lmbs; i++) {
@ -2621,18 +2739,21 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
addr -= SPAPR_MEMORY_BLOCK_SIZE;
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
addr / SPAPR_MEMORY_BLOCK_SIZE);
spapr_drc_detach(drc, dev, NULL);
spapr_drc_detach(drc);
}
g_free(fdt);
error_propagate(errp, local_err);
return;
}
if (!hotplugged) {
spapr_drc_reset(drc);
}
addr += SPAPR_MEMORY_BLOCK_SIZE;
}
/* send hotplug notification to the
* guest only in case of hotplugged memory
*/
if (dev->hotplugged) {
if (hotplugged) {
if (dedicated_hp_event_source) {
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
addr_start / SPAPR_MEMORY_BLOCK_SIZE);
@ -2780,8 +2901,10 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
/* Callback to be called during DRC release. */
void spapr_lmb_release(DeviceState *dev)
{
HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl);
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_hotplug_handler(dev));
PCDIMMDevice *dimm = PC_DIMM(dev);
PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
MemoryRegion *mr = ddc->get_memory_region(dimm);
sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
/* This information will get lost if a migration occurs
@ -2802,18 +2925,7 @@ void spapr_lmb_release(DeviceState *dev)
* Now that all the LMBs have been removed by the guest, call the
* pc-dimm unplug handler to cleanup up the pc-dimm device.
*/
hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
}
static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
PCDIMMDevice *dimm = PC_DIMM(dev);
PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
MemoryRegion *mr = ddc->get_memory_region(dimm);
pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
pc_dimm_memory_unplug(dev, &spapr->hotplug_memory, mr);
object_unparent(OBJECT(dev));
}
@ -2849,7 +2961,7 @@ static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
addr / SPAPR_MEMORY_BLOCK_SIZE);
g_assert(drc);
spapr_drc_detach(drc, dev, errp);
spapr_drc_detach(drc);
addr += SPAPR_MEMORY_BLOCK_SIZE;
}
@ -2882,10 +2994,10 @@ static void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset,
return fdt;
}
static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
/* Callback to be called during DRC release. */
void spapr_core_release(DeviceState *dev)
{
MachineState *ms = MACHINE(qdev_get_machine());
MachineState *ms = MACHINE(qdev_get_hotplug_handler(dev));
sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
CPUCore *cc = CPU_CORE(dev);
CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL);
@ -2909,22 +3021,12 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
object_unparent(OBJECT(dev));
}
/* Callback to be called during DRC release. */
void spapr_core_release(DeviceState *dev)
{
HotplugHandler *hotplug_ctrl;
hotplug_ctrl = qdev_get_hotplug_handler(dev);
hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
}
static
void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
int index;
sPAPRDRConnector *drc;
Error *local_err = NULL;
CPUCore *cc = CPU_CORE(dev);
int smt = kvmppc_smt_threads();
@ -2941,11 +3043,7 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index * smt);
g_assert(drc);
spapr_drc_detach(drc, dev, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
spapr_drc_detach(drc);
spapr_hotplug_req_remove_by_index(drc);
}
@ -2961,11 +3059,10 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
CPUState *cs = CPU(core->threads);
sPAPRDRConnector *drc;
Error *local_err = NULL;
void *fdt = NULL;
int fdt_offset = 0;
int smt = kvmppc_smt_threads();
CPUArchId *core_slot;
int index;
bool hotplugged = spapr_drc_hotplugged(dev);
core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
if (!core_slot) {
@ -2977,24 +3074,30 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
g_assert(drc || !mc->has_hotpluggable_cpus);
fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
if (drc) {
void *fdt;
int fdt_offset;
fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
if (local_err) {
g_free(fdt);
error_propagate(errp, local_err);
return;
}
if (hotplugged) {
/*
* Send hotplug notification interrupt to the guest only
* in case of hotplugged CPUs.
*/
spapr_hotplug_req_add_by_index(drc);
} else {
spapr_drc_reset(drc);
}
}
if (dev->hotplugged) {
/*
* Send hotplug notification interrupt to the guest only in case
* of hotplugged CPUs.
*/
spapr_hotplug_req_add_by_index(drc);
}
core_slot->cpu = OBJECT(dev);
if (smc->pre_2_10_has_unused_icps) {
@ -3047,9 +3150,9 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
* total vcpus not a multiple of threads-per-core.
*/
if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) {
error_setg(errp, "invalid nr-threads %d, must be %d",
error_setg(&local_err, "invalid nr-threads %d, must be %d",
cc->nr_threads, smp_threads);
return;
goto out;
}
core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
@ -3119,27 +3222,6 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
}
}
static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
sPAPRMachineState *sms = SPAPR_MACHINE(qdev_get_machine());
MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
spapr_memory_unplug(hotplug_dev, dev, errp);
} else {
error_setg(errp, "Memory hot unplug not supported for this guest");
}
} else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
if (!mc->has_hotpluggable_cpus) {
error_setg(errp, "CPU hot unplug not supported on this machine");
return;
}
spapr_core_unplug(hotplug_dev, dev, errp);
}
}
static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
@ -3357,7 +3439,6 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
mc->get_hotplug_handler = spapr_get_hotplug_handler;
hc->pre_plug = spapr_machine_device_pre_plug;
hc->plug = spapr_machine_device_plug;
hc->unplug = spapr_machine_device_unplug;
mc->cpu_index_to_instance_props = spapr_cpu_index_to_props;
mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
hc->unplug_request = spapr_machine_device_unplug_request;
@ -3365,6 +3446,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
smc->dr_lmb_enabled = true;
smc->tcg_default_cpu = "POWER8";
mc->has_hotpluggable_cpus = true;
smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;
fwc->get_dev_path = spapr_get_fw_dev_path;
nc->nmi_monitor_handler = spapr_nmi;
smc->phb_placement = spapr_phb_placement;
@ -3471,6 +3553,7 @@ static void spapr_machine_2_9_class_options(MachineClass *mc)
SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9);
mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
smc->pre_2_10_has_unused_icps = true;
smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
}
DEFINE_SPAPR_MACHINE(2_9, "2.9", false);

View File

@ -48,40 +48,40 @@ uint32_t spapr_drc_index(sPAPRDRConnector *drc)
static uint32_t drc_isolate_physical(sPAPRDRConnector *drc)
{
/* if the guest is configuring a device attached to this DRC, we
* should reset the configuration state at this point since it may
* no longer be reliable (guest released device and needs to start
* over, or unplug occurred so the FDT is no longer valid)
*/
g_free(drc->ccs);
drc->ccs = NULL;
drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED;
/* if we're awaiting release, but still in an unconfigured state,
* it's likely the guest is still in the process of configuring
* the device and is transitioning the devices to an ISOLATED
* state as a part of that process. so we only complete the
* removal when this transition happens for a device in a
* configured state, as suggested by the state diagram from PAPR+
* 2.7, 13.4
*/
if (drc->awaiting_release) {
uint32_t drc_index = spapr_drc_index(drc);
if (drc->configured) {
trace_spapr_drc_set_isolation_state_finalizing(drc_index);
spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
} else {
trace_spapr_drc_set_isolation_state_deferring(drc_index);
}
switch (drc->state) {
case SPAPR_DRC_STATE_PHYSICAL_POWERON:
return RTAS_OUT_SUCCESS; /* Nothing to do */
case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED:
break; /* see below */
case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE:
return RTAS_OUT_PARAM_ERROR; /* not allowed */
default:
g_assert_not_reached();
}
drc->state = SPAPR_DRC_STATE_PHYSICAL_POWERON;
if (drc->unplug_requested) {
uint32_t drc_index = spapr_drc_index(drc);
trace_spapr_drc_set_isolation_state_finalizing(drc_index);
spapr_drc_detach(drc);
}
drc->configured = false;
return RTAS_OUT_SUCCESS;
}
static uint32_t drc_unisolate_physical(sPAPRDRConnector *drc)
{
switch (drc->state) {
case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE:
case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED:
return RTAS_OUT_SUCCESS; /* Nothing to do */
case SPAPR_DRC_STATE_PHYSICAL_POWERON:
break; /* see below */
default:
g_assert_not_reached();
}
/* cannot unisolate a non-existent resource, and, or resources
* which are in an 'UNUSABLE' allocation state. (PAPR 2.7,
* 13.5.3.5)
@ -90,20 +90,26 @@ static uint32_t drc_unisolate_physical(sPAPRDRConnector *drc)
return RTAS_OUT_NO_SUCH_INDICATOR;
}
drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
drc->state = SPAPR_DRC_STATE_PHYSICAL_UNISOLATE;
drc->ccs_offset = drc->fdt_start_offset;
drc->ccs_depth = 0;
return RTAS_OUT_SUCCESS;
}
static uint32_t drc_isolate_logical(sPAPRDRConnector *drc)
{
/* if the guest is configuring a device attached to this DRC, we
* should reset the configuration state at this point since it may
* no longer be reliable (guest released device and needs to start
* over, or unplug occurred so the FDT is no longer valid)
*/
g_free(drc->ccs);
drc->ccs = NULL;
switch (drc->state) {
case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
return RTAS_OUT_SUCCESS; /* Nothing to do */
case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
break; /* see below */
case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
return RTAS_OUT_PARAM_ERROR; /* not allowed */
default:
g_assert_not_reached();
}
/*
* Fail any requests to ISOLATE the LMB DRC if this LMB doesn't
@ -116,11 +122,11 @@ static uint32_t drc_isolate_logical(sPAPRDRConnector *drc)
* actually being unplugged, fail the isolation request here.
*/
if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB
&& !drc->awaiting_release) {
&& !drc->unplug_requested) {
return RTAS_OUT_HW_ERROR;
}
drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED;
drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE;
/* if we're awaiting release, but still in an unconfigured state,
* it's likely the guest is still in the process of configuring
@ -130,38 +136,51 @@ static uint32_t drc_isolate_logical(sPAPRDRConnector *drc)
* configured state, as suggested by the state diagram from PAPR+
* 2.7, 13.4
*/
if (drc->awaiting_release) {
if (drc->unplug_requested) {
uint32_t drc_index = spapr_drc_index(drc);
if (drc->configured) {
trace_spapr_drc_set_isolation_state_finalizing(drc_index);
spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
} else {
trace_spapr_drc_set_isolation_state_deferring(drc_index);
}
trace_spapr_drc_set_isolation_state_finalizing(drc_index);
spapr_drc_detach(drc);
}
drc->configured = false;
return RTAS_OUT_SUCCESS;
}
static uint32_t drc_unisolate_logical(sPAPRDRConnector *drc)
{
/* cannot unisolate a non-existent resource, and, or resources
* which are in an 'UNUSABLE' allocation state. (PAPR 2.7,
* 13.5.3.5)
*/
if (!drc->dev ||
drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
return RTAS_OUT_NO_SUCH_INDICATOR;
switch (drc->state) {
case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
return RTAS_OUT_SUCCESS; /* Nothing to do */
case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
break; /* see below */
case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */
default:
g_assert_not_reached();
}
drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
/* Move to AVAILABLE state should have ensured device was present */
g_assert(drc->dev);
drc->state = SPAPR_DRC_STATE_LOGICAL_UNISOLATE;
drc->ccs_offset = drc->fdt_start_offset;
drc->ccs_depth = 0;
return RTAS_OUT_SUCCESS;
}
static uint32_t drc_set_usable(sPAPRDRConnector *drc)
{
switch (drc->state) {
case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
return RTAS_OUT_SUCCESS; /* Nothing to do */
case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
break; /* see below */
default:
g_assert_not_reached();
}
/* if there's no resource/device associated with the DRC, there's
* no way for us to put it in an allocation state consistent with
* being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should
@ -170,30 +189,36 @@ static uint32_t drc_set_usable(sPAPRDRConnector *drc)
if (!drc->dev) {
return RTAS_OUT_NO_SUCH_INDICATOR;
}
if (drc->awaiting_release && drc->awaiting_allocation) {
/* kernel is acknowledging a previous hotplug event
* while we are already removing it.
* it's safe to ignore awaiting_allocation here since we know the
* situation is predicated on the guest either already having done
* so (boot-time hotplug), or never being able to acquire in the
* first place (hotplug followed by immediate unplug).
*/
if (drc->unplug_requested) {
/* Don't allow the guest to move a device away from UNUSABLE
* state when we want to unplug it */
return RTAS_OUT_NO_SUCH_INDICATOR;
}
drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
drc->awaiting_allocation = false;
drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE;
return RTAS_OUT_SUCCESS;
}
static uint32_t drc_set_unusable(sPAPRDRConnector *drc)
{
drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE;
if (drc->awaiting_release) {
switch (drc->state) {
case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
return RTAS_OUT_SUCCESS; /* Nothing to do */
case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
break; /* see below */
case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */
default:
g_assert_not_reached();
}
drc->state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE;
if (drc->unplug_requested) {
uint32_t drc_index = spapr_drc_index(drc);
trace_spapr_drc_set_allocation_state_finalizing(drc_index);
spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
spapr_drc_detach(drc);
}
return RTAS_OUT_SUCCESS;
@ -247,11 +272,16 @@ static sPAPRDREntitySense physical_entity_sense(sPAPRDRConnector *drc)
static sPAPRDREntitySense logical_entity_sense(sPAPRDRConnector *drc)
{
if (drc->dev
&& (drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE)) {
return SPAPR_DR_ENTITY_SENSE_PRESENT;
} else {
switch (drc->state) {
case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
return SPAPR_DR_ENTITY_SENSE_UNUSABLE;
case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
g_assert(drc->dev);
return SPAPR_DR_ENTITY_SENSE_PRESENT;
default:
g_assert_not_reached();
}
}
@ -344,23 +374,18 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
{
trace_spapr_drc_attach(spapr_drc_index(drc));
if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) {
if (drc->dev) {
error_setg(errp, "an attached device is still awaiting release");
return;
}
if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) {
g_assert(drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE);
}
g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE)
|| (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON));
g_assert(fdt);
drc->dev = d;
drc->fdt = fdt;
drc->fdt_start_offset = fdt_start_offset;
if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
drc->awaiting_allocation = true;
}
object_property_add_link(OBJECT(drc), "device",
object_get_typename(OBJECT(drc->dev)),
(Object **)(&drc->dev),
@ -373,85 +398,65 @@ static void spapr_drc_release(sPAPRDRConnector *drc)
drck->release(drc->dev);
drc->awaiting_release = false;
drc->unplug_requested = false;
g_free(drc->fdt);
drc->fdt = NULL;
drc->fdt_start_offset = 0;
object_property_del(OBJECT(drc), "device", NULL);
object_property_del(OBJECT(drc), "device", &error_abort);
drc->dev = NULL;
}
void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
void spapr_drc_detach(sPAPRDRConnector *drc)
{
sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
trace_spapr_drc_detach(spapr_drc_index(drc));
if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) {
trace_spapr_drc_awaiting_isolated(spapr_drc_index(drc));
drc->awaiting_release = true;
return;
}
g_assert(drc->dev);
if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI &&
drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
trace_spapr_drc_awaiting_unusable(spapr_drc_index(drc));
drc->awaiting_release = true;
return;
}
drc->unplug_requested = true;
if (drc->awaiting_allocation) {
drc->awaiting_release = true;
trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
if (drc->state != drck->empty_state) {
trace_spapr_drc_awaiting_quiesce(spapr_drc_index(drc));
return;
}
spapr_drc_release(drc);
}
static bool release_pending(sPAPRDRConnector *drc)
void spapr_drc_reset(sPAPRDRConnector *drc)
{
return drc->awaiting_release;
}
static void drc_reset(void *opaque)
{
sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(opaque);
sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
trace_spapr_drc_reset(spapr_drc_index(drc));
g_free(drc->ccs);
drc->ccs = NULL;
/* immediately upon reset we can safely assume DRCs whose devices
* are pending removal can be safely removed.
*/
if (drc->awaiting_release) {
if (drc->unplug_requested) {
spapr_drc_release(drc);
}
drc->awaiting_allocation = false;
if (drc->dev) {
/* A device present at reset is coldplugged */
drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
}
drc->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE;
/* A device present at reset is ready to go, same as coldplugged */
drc->state = drck->ready_state;
} else {
/* Otherwise device is absent, but might be hotplugged */
drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED;
if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE;
}
drc->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE;
drc->state = drck->empty_state;
}
drc->ccs_offset = -1;
drc->ccs_depth = -1;
}
static void drc_reset(void *opaque)
{
spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
}
static bool spapr_drc_needed(void *opaque)
{
sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
bool rc = false;
sPAPRDREntitySense value = drck->dr_entity_sense(drc);
/* If no dev is plugged in there is no need to migrate the DRC state */
@ -460,23 +465,10 @@ static bool spapr_drc_needed(void *opaque)
}
/*
* If there is dev plugged in, we need to migrate the DRC state when
* it is different from cold-plugged state
*/
switch (spapr_drc_type(drc)) {
case SPAPR_DR_CONNECTOR_TYPE_PCI:
case SPAPR_DR_CONNECTOR_TYPE_CPU:
case SPAPR_DR_CONNECTOR_TYPE_LMB:
rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) &&
(drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) &&
drc->configured && !drc->awaiting_release);
break;
case SPAPR_DR_CONNECTOR_TYPE_PHB:
case SPAPR_DR_CONNECTOR_TYPE_VIO:
default:
g_assert_not_reached();
}
return rc;
* We need to migrate the state if it's not equal to the expected
* long-term state, which is the same as the coldplugged initial
* state */
return (drc->state != drck->ready_state);
}
static const VMStateDescription vmstate_spapr_drc = {
@ -485,12 +477,7 @@ static const VMStateDescription vmstate_spapr_drc = {
.minimum_version_id = 1,
.needed = spapr_drc_needed,
.fields = (VMStateField []) {
VMSTATE_UINT32(isolation_state, sPAPRDRConnector),
VMSTATE_UINT32(allocation_state, sPAPRDRConnector),
VMSTATE_UINT32(dr_indicator, sPAPRDRConnector),
VMSTATE_BOOL(configured, sPAPRDRConnector),
VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
VMSTATE_UINT32(state, sPAPRDRConnector),
VMSTATE_END_OF_LIST()
}
};
@ -559,46 +546,96 @@ sPAPRDRConnector *spapr_dr_connector_new(Object *owner, const char *type,
object_property_set_bool(OBJECT(drc), true, "realized", NULL);
g_free(prop_name);
/* PCI slot always start in a USABLE state, and stay there */
if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) {
drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
}
return drc;
}
static void spapr_dr_connector_instance_init(Object *obj)
{
sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj);
sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
object_property_add_uint32_ptr(obj, "id", &drc->id, NULL);
object_property_add(obj, "index", "uint32", prop_get_index,
NULL, NULL, NULL, NULL);
object_property_add(obj, "fdt", "struct", prop_get_fdt,
NULL, NULL, NULL, NULL);
drc->state = drck->empty_state;
}
static void spapr_dr_connector_class_init(ObjectClass *k, void *data)
{
DeviceClass *dk = DEVICE_CLASS(k);
sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
dk->realize = realize;
dk->unrealize = unrealize;
drck->release_pending = release_pending;
/*
* Reason: it crashes FIXME find and document the real reason
*/
dk->user_creatable = false;
}
static bool drc_physical_needed(void *opaque)
{
sPAPRDRCPhysical *drcp = (sPAPRDRCPhysical *)opaque;
sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(drcp);
if ((drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_ACTIVE))
|| (!drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_INACTIVE))) {
return false;
}
return true;
}
static const VMStateDescription vmstate_spapr_drc_physical = {
.name = "spapr_drc/physical",
.version_id = 1,
.minimum_version_id = 1,
.needed = drc_physical_needed,
.fields = (VMStateField []) {
VMSTATE_UINT32(dr_indicator, sPAPRDRCPhysical),
VMSTATE_END_OF_LIST()
}
};
static void drc_physical_reset(void *opaque)
{
sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(opaque);
sPAPRDRCPhysical *drcp = SPAPR_DRC_PHYSICAL(drc);
if (drc->dev) {
drcp->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE;
} else {
drcp->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE;
}
}
static void realize_physical(DeviceState *d, Error **errp)
{
sPAPRDRCPhysical *drcp = SPAPR_DRC_PHYSICAL(d);
Error *local_err = NULL;
realize(d, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
vmstate_register(DEVICE(drcp), spapr_drc_index(SPAPR_DR_CONNECTOR(drcp)),
&vmstate_spapr_drc_physical, drcp);
qemu_register_reset(drc_physical_reset, drcp);
}
static void spapr_drc_physical_class_init(ObjectClass *k, void *data)
{
DeviceClass *dk = DEVICE_CLASS(k);
sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
dk->realize = realize_physical;
drck->dr_entity_sense = physical_entity_sense;
drck->isolate = drc_isolate_physical;
drck->unisolate = drc_unisolate_physical;
drck->ready_state = SPAPR_DRC_STATE_PHYSICAL_CONFIGURED;
drck->empty_state = SPAPR_DRC_STATE_PHYSICAL_POWERON;
}
static void spapr_drc_logical_class_init(ObjectClass *k, void *data)
@ -608,6 +645,8 @@ static void spapr_drc_logical_class_init(ObjectClass *k, void *data)
drck->dr_entity_sense = logical_entity_sense;
drck->isolate = drc_isolate_logical;
drck->unisolate = drc_unisolate_logical;
drck->ready_state = SPAPR_DRC_STATE_LOGICAL_CONFIGURED;
drck->empty_state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE;
}
static void spapr_drc_cpu_class_init(ObjectClass *k, void *data)
@ -653,7 +692,7 @@ static const TypeInfo spapr_dr_connector_info = {
static const TypeInfo spapr_drc_physical_info = {
.name = TYPE_SPAPR_DRC_PHYSICAL,
.parent = TYPE_SPAPR_DR_CONNECTOR,
.instance_size = sizeof(sPAPRDRConnector),
.instance_size = sizeof(sPAPRDRCPhysical),
.class_init = spapr_drc_physical_class_init,
.abstract = true,
};
@ -661,7 +700,6 @@ static const TypeInfo spapr_drc_physical_info = {
static const TypeInfo spapr_drc_logical_info = {
.name = TYPE_SPAPR_DRC_LOGICAL,
.parent = TYPE_SPAPR_DR_CONNECTOR,
.instance_size = sizeof(sPAPRDRConnector),
.class_init = spapr_drc_logical_class_init,
.abstract = true,
};
@ -669,21 +707,18 @@ static const TypeInfo spapr_drc_logical_info = {
static const TypeInfo spapr_drc_cpu_info = {
.name = TYPE_SPAPR_DRC_CPU,
.parent = TYPE_SPAPR_DRC_LOGICAL,
.instance_size = sizeof(sPAPRDRConnector),
.class_init = spapr_drc_cpu_class_init,
};
static const TypeInfo spapr_drc_pci_info = {
.name = TYPE_SPAPR_DRC_PCI,
.parent = TYPE_SPAPR_DRC_PHYSICAL,
.instance_size = sizeof(sPAPRDRConnector),
.class_init = spapr_drc_pci_class_init,
};
static const TypeInfo spapr_drc_lmb_info = {
.name = TYPE_SPAPR_DRC_LMB,
.parent = TYPE_SPAPR_DRC_LOGICAL,
.instance_size = sizeof(sPAPRDRConnector),
.class_init = spapr_drc_lmb_class_init,
};
@ -896,12 +931,18 @@ static uint32_t rtas_set_dr_indicator(uint32_t idx, uint32_t state)
{
sPAPRDRConnector *drc = spapr_drc_by_index(idx);
if (!drc) {
return RTAS_OUT_PARAM_ERROR;
if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_PHYSICAL)) {
return RTAS_OUT_NO_SUCH_INDICATOR;
}
if ((state != SPAPR_DR_INDICATOR_INACTIVE)
&& (state != SPAPR_DR_INDICATOR_ACTIVE)
&& (state != SPAPR_DR_INDICATOR_IDENTIFY)
&& (state != SPAPR_DR_INDICATOR_ACTION)) {
return RTAS_OUT_PARAM_ERROR; /* bad state parameter */
}
trace_spapr_drc_set_dr_indicator(idx, state);
drc->dr_indicator = state;
SPAPR_DRC_PHYSICAL(drc)->dr_indicator = state;
return RTAS_OUT_SUCCESS;
}
@ -1011,7 +1052,7 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
uint64_t wa_offset;
uint32_t drc_index;
sPAPRDRConnector *drc;
sPAPRConfigureConnectorState *ccs;
sPAPRDRConnectorClass *drck;
sPAPRDRCCResponse resp = SPAPR_DR_CC_RESPONSE_CONTINUE;
int rc;
@ -1030,18 +1071,16 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
goto out;
}
if (!drc->fdt) {
trace_spapr_rtas_ibm_configure_connector_missing_fdt(drc_index);
if ((drc->state != SPAPR_DRC_STATE_LOGICAL_UNISOLATE)
&& (drc->state != SPAPR_DRC_STATE_PHYSICAL_UNISOLATE)) {
/* Need to unisolate the device before configuring */
rc = SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE;
goto out;
}
ccs = drc->ccs;
if (!ccs) {
ccs = g_new0(sPAPRConfigureConnectorState, 1);
ccs->fdt_offset = drc->fdt_start_offset;
drc->ccs = ccs;
}
g_assert(drc->fdt);
drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
do {
uint32_t tag;
@ -1049,12 +1088,12 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
const struct fdt_property *prop;
int fdt_offset_next, prop_len;
tag = fdt_next_tag(drc->fdt, ccs->fdt_offset, &fdt_offset_next);
tag = fdt_next_tag(drc->fdt, drc->ccs_offset, &fdt_offset_next);
switch (tag) {
case FDT_BEGIN_NODE:
ccs->fdt_depth++;
name = fdt_get_name(drc->fdt, ccs->fdt_offset, NULL);
drc->ccs_depth++;
name = fdt_get_name(drc->fdt, drc->ccs_offset, NULL);
/* provide the name of the next OF node */
wa_offset = CC_VAL_DATA_OFFSET;
@ -1063,30 +1102,22 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
resp = SPAPR_DR_CC_RESPONSE_NEXT_CHILD;
break;
case FDT_END_NODE:
ccs->fdt_depth--;
if (ccs->fdt_depth == 0) {
sPAPRDRIsolationState state = drc->isolation_state;
drc->ccs_depth--;
if (drc->ccs_depth == 0) {
uint32_t drc_index = spapr_drc_index(drc);
/* done sending the device tree, don't need to track
* the state anymore
*/
/* done sending the device tree, move to configured state */
trace_spapr_drc_set_configured(drc_index);
if (state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) {
drc->configured = true;
} else {
/* guest should be not configuring an isolated device */
trace_spapr_drc_set_configured_skipping(drc_index);
}
g_free(ccs);
drc->ccs = NULL;
ccs = NULL;
drc->state = drck->ready_state;
drc->ccs_offset = -1;
drc->ccs_depth = -1;
resp = SPAPR_DR_CC_RESPONSE_SUCCESS;
} else {
resp = SPAPR_DR_CC_RESPONSE_PREV_PARENT;
}
break;
case FDT_PROP:
prop = fdt_get_property_by_offset(drc->fdt, ccs->fdt_offset,
prop = fdt_get_property_by_offset(drc->fdt, drc->ccs_offset,
&prop_len);
name = fdt_string(drc->fdt, fdt32_to_cpu(prop->nameoff));
@ -1111,8 +1142,8 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
/* keep seeking for an actionable tag */
break;
}
if (ccs) {
ccs->fdt_offset = fdt_offset_next;
if (drc->ccs_offset >= 0) {
drc->ccs_offset = fdt_offset_next;
}
} while (resp == SPAPR_DR_CC_RESPONSE_CONTINUE);

View File

@ -42,8 +42,6 @@
#include "hw/ppc/spapr_ovec.h"
#include <libfdt.h>
struct rtas_error_log {
uint32_t summary;
#define RTAS_LOG_VERSION_MASK 0xff000000
#define RTAS_LOG_VERSION_6 0x06000000
#define RTAS_LOG_SEVERITY_MASK 0x00e00000
@ -85,6 +83,9 @@ struct rtas_error_log {
#define RTAS_LOG_TYPE_ECC_CORR 0x0000000a
#define RTAS_LOG_TYPE_EPOW 0x00000040
#define RTAS_LOG_TYPE_HOTPLUG 0x000000e5
struct rtas_error_log {
uint32_t summary;
uint32_t extended_length;
} QEMU_PACKED;
@ -166,8 +167,7 @@ struct rtas_event_log_v6_epow {
uint64_t reason_code;
} QEMU_PACKED;
struct epow_log_full {
struct rtas_error_log hdr;
struct epow_extended_log {
struct rtas_event_log_v6 v6hdr;
struct rtas_event_log_v6_maina maina;
struct rtas_event_log_v6_mainb mainb;
@ -205,8 +205,7 @@ struct rtas_event_log_v6_hp {
union drc_identifier drc_id;
} QEMU_PACKED;
struct hp_log_full {
struct rtas_error_log hdr;
struct hp_extended_log {
struct rtas_event_log_v6 v6hdr;
struct rtas_event_log_v6_maina maina;
struct rtas_event_log_v6_mainb mainb;
@ -341,25 +340,26 @@ static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type)
return source->irq;
}
static void rtas_event_log_queue(int log_type, void *data)
static uint32_t spapr_event_log_entry_type(sPAPREventLogEntry *entry)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1);
return entry->summary & RTAS_LOG_TYPE_MASK;
}
g_assert(data);
entry->log_type = log_type;
entry->data = data;
static void rtas_event_log_queue(sPAPRMachineState *spapr,
sPAPREventLogEntry *entry)
{
QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next);
}
static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask)
static sPAPREventLogEntry *rtas_event_log_dequeue(sPAPRMachineState *spapr,
uint32_t event_mask)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
sPAPREventLogEntry *entry = NULL;
QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
const sPAPREventSource *source =
rtas_event_log_to_source(spapr, entry->log_type);
rtas_event_log_to_source(spapr,
spapr_event_log_entry_type(entry));
if (source->mask & event_mask) {
break;
@ -380,7 +380,8 @@ static bool rtas_event_log_contains(uint32_t event_mask)
QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
const sPAPREventSource *source =
rtas_event_log_to_source(spapr, entry->log_type);
rtas_event_log_to_source(spapr,
spapr_event_log_entry_type(entry));
if (source->mask & event_mask) {
return true;
@ -428,27 +429,28 @@ static void spapr_init_maina(struct rtas_event_log_v6_maina *maina,
static void spapr_powerdown_req(Notifier *n, void *opaque)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
struct rtas_error_log *hdr;
sPAPREventLogEntry *entry;
struct rtas_event_log_v6 *v6hdr;
struct rtas_event_log_v6_maina *maina;
struct rtas_event_log_v6_mainb *mainb;
struct rtas_event_log_v6_epow *epow;
struct epow_log_full *new_epow;
struct epow_extended_log *new_epow;
entry = g_new(sPAPREventLogEntry, 1);
new_epow = g_malloc0(sizeof(*new_epow));
hdr = &new_epow->hdr;
entry->extended_log = new_epow;
v6hdr = &new_epow->v6hdr;
maina = &new_epow->maina;
mainb = &new_epow->mainb;
epow = &new_epow->epow;
hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
| RTAS_LOG_SEVERITY_EVENT
| RTAS_LOG_DISPOSITION_NOT_RECOVERED
| RTAS_LOG_OPTIONAL_PART_PRESENT
| RTAS_LOG_TYPE_EPOW);
hdr->extended_length = cpu_to_be32(sizeof(*new_epow)
- sizeof(new_epow->hdr));
entry->summary = RTAS_LOG_VERSION_6
| RTAS_LOG_SEVERITY_EVENT
| RTAS_LOG_DISPOSITION_NOT_RECOVERED
| RTAS_LOG_OPTIONAL_PART_PRESENT
| RTAS_LOG_TYPE_EPOW;
entry->extended_length = sizeof(*new_epow);
spapr_init_v6hdr(v6hdr);
spapr_init_maina(maina, 3 /* Main-A, Main-B and EPOW */);
@ -468,7 +470,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;
rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow);
rtas_event_log_queue(spapr, entry);
qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
rtas_event_log_to_irq(spapr,
@ -480,28 +482,29 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
union drc_identifier *drc_id)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
struct hp_log_full *new_hp;
struct rtas_error_log *hdr;
sPAPREventLogEntry *entry;
struct hp_extended_log *new_hp;
struct rtas_event_log_v6 *v6hdr;
struct rtas_event_log_v6_maina *maina;
struct rtas_event_log_v6_mainb *mainb;
struct rtas_event_log_v6_hp *hp;
new_hp = g_malloc0(sizeof(struct hp_log_full));
hdr = &new_hp->hdr;
entry = g_new(sPAPREventLogEntry, 1);
new_hp = g_malloc0(sizeof(struct hp_extended_log));
entry->extended_log = new_hp;
v6hdr = &new_hp->v6hdr;
maina = &new_hp->maina;
mainb = &new_hp->mainb;
hp = &new_hp->hp;
hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
| RTAS_LOG_SEVERITY_EVENT
| RTAS_LOG_DISPOSITION_NOT_RECOVERED
| RTAS_LOG_OPTIONAL_PART_PRESENT
| RTAS_LOG_INITIATOR_HOTPLUG
| RTAS_LOG_TYPE_HOTPLUG);
hdr->extended_length = cpu_to_be32(sizeof(*new_hp)
- sizeof(new_hp->hdr));
entry->summary = RTAS_LOG_VERSION_6
| RTAS_LOG_SEVERITY_EVENT
| RTAS_LOG_DISPOSITION_NOT_RECOVERED
| RTAS_LOG_OPTIONAL_PART_PRESENT
| RTAS_LOG_INITIATOR_HOTPLUG
| RTAS_LOG_TYPE_HOTPLUG;
entry->extended_length = sizeof(*new_hp);
spapr_init_v6hdr(v6hdr);
spapr_init_maina(maina, 3 /* Main-A, Main-B, HP */);
@ -551,7 +554,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
cpu_to_be32(drc_id->count_indexed.index);
}
rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp);
rtas_event_log_queue(spapr, entry);
qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
rtas_event_log_to_irq(spapr,
@ -628,7 +631,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t mask, buf, len, event_len;
uint64_t xinfo;
sPAPREventLogEntry *event;
struct rtas_error_log *hdr;
struct rtas_error_log header;
int i;
if ((nargs < 6) || (nargs > 7) || nret != 1) {
@ -644,21 +647,24 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
xinfo |= (uint64_t)rtas_ld(args, 6) << 32;
}
event = rtas_event_log_dequeue(mask);
event = rtas_event_log_dequeue(spapr, mask);
if (!event) {
goto out_no_events;
}
hdr = event->data;
event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr);
event_len = event->extended_length + sizeof(header);
if (event_len < len) {
len = event_len;
}
cpu_physical_memory_write(buf, event->data, len);
header.summary = cpu_to_be32(event->summary);
header.extended_length = cpu_to_be32(event->extended_length);
cpu_physical_memory_write(buf, &header, sizeof(header));
cpu_physical_memory_write(buf + sizeof(header), event->extended_log,
event->extended_length);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
g_free(event->data);
g_free(event->extended_log);
g_free(event);
/* according to PAPR+, the IRQ must be left asserted, or re-asserted, if

View File

@ -3,6 +3,7 @@
#include "sysemu/hw_accel.h"
#include "sysemu/sysemu.h"
#include "qemu/log.h"
#include "qemu/error-report.h"
#include "cpu.h"
#include "exec/exec-all.h"
#include "helper_regs.h"
@ -354,6 +355,401 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
return H_SUCCESS;
}
struct sPAPRPendingHPT {
/* These fields are read-only after initialization */
int shift;
QemuThread thread;
/* These fields are protected by the BQL */
bool complete;
/* These fields are private to the preparation thread if
* !complete, otherwise protected by the BQL */
int ret;
void *hpt;
};
static void free_pending_hpt(sPAPRPendingHPT *pending)
{
if (pending->hpt) {
qemu_vfree(pending->hpt);
}
g_free(pending);
}
static void *hpt_prepare_thread(void *opaque)
{
sPAPRPendingHPT *pending = opaque;
size_t size = 1ULL << pending->shift;
pending->hpt = qemu_memalign(size, size);
if (pending->hpt) {
memset(pending->hpt, 0, size);
pending->ret = H_SUCCESS;
} else {
pending->ret = H_NO_MEM;
}
qemu_mutex_lock_iothread();
if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
/* Ready to go */
pending->complete = true;
} else {
/* We've been cancelled, clean ourselves up */
free_pending_hpt(pending);
}
qemu_mutex_unlock_iothread();
return NULL;
}
/* Must be called with BQL held */
static void cancel_hpt_prepare(sPAPRMachineState *spapr)
{
sPAPRPendingHPT *pending = spapr->pending_hpt;
/* Let the thread know it's cancelled */
spapr->pending_hpt = NULL;
if (!pending) {
/* Nothing to do */
return;
}
if (!pending->complete) {
/* thread will clean itself up */
return;
}
free_pending_hpt(pending);
}
/* Convert a return code from the KVM ioctl()s implementing resize HPT
* into a PAPR hypercall return code */
static target_ulong resize_hpt_convert_rc(int ret)
{
if (ret >= 100000) {
return H_LONG_BUSY_ORDER_100_SEC;
} else if (ret >= 10000) {
return H_LONG_BUSY_ORDER_10_SEC;
} else if (ret >= 1000) {
return H_LONG_BUSY_ORDER_1_SEC;
} else if (ret >= 100) {
return H_LONG_BUSY_ORDER_100_MSEC;
} else if (ret >= 10) {
return H_LONG_BUSY_ORDER_10_MSEC;
} else if (ret > 0) {
return H_LONG_BUSY_ORDER_1_MSEC;
}
switch (ret) {
case 0:
return H_SUCCESS;
case -EPERM:
return H_AUTHORITY;
case -EINVAL:
return H_PARAMETER;
case -ENXIO:
return H_CLOSED;
case -ENOSPC:
return H_PTEG_FULL;
case -EBUSY:
return H_BUSY;
case -ENOMEM:
return H_NO_MEM;
default:
return H_HARDWARE;
}
}
static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
sPAPRMachineState *spapr,
target_ulong opcode,
target_ulong *args)
{
target_ulong flags = args[0];
int shift = args[1];
sPAPRPendingHPT *pending = spapr->pending_hpt;
uint64_t current_ram_size = MACHINE(spapr)->ram_size;
int rc;
if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
return H_AUTHORITY;
}
if (!spapr->htab_shift) {
/* Radix guest, no HPT */
return H_NOT_AVAILABLE;
}
trace_spapr_h_resize_hpt_prepare(flags, shift);
if (flags != 0) {
return H_PARAMETER;
}
if (shift && ((shift < 18) || (shift > 46))) {
return H_PARAMETER;
}
current_ram_size = pc_existing_dimms_capacity(&error_fatal);
/* We only allow the guest to allocate an HPT one order above what
* we'd normally give them (to stop a small guest claiming a huge
* chunk of resources in the HPT */
if (shift > (spapr_hpt_shift_for_ramsize(current_ram_size) + 1)) {
return H_RESOURCE;
}
rc = kvmppc_resize_hpt_prepare(cpu, flags, shift);
if (rc != -ENOSYS) {
return resize_hpt_convert_rc(rc);
}
if (pending) {
/* something already in progress */
if (pending->shift == shift) {
/* and it's suitable */
if (pending->complete) {
return pending->ret;
} else {
return H_LONG_BUSY_ORDER_100_MSEC;
}
}
/* not suitable, cancel and replace */
cancel_hpt_prepare(spapr);
}
if (!shift) {
/* nothing to do */
return H_SUCCESS;
}
/* start new prepare */
pending = g_new0(sPAPRPendingHPT, 1);
pending->shift = shift;
pending->ret = H_HARDWARE;
qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
spapr->pending_hpt = pending;
/* In theory we could estimate the time more accurately based on
* the new size, but there's not much point */
return H_LONG_BUSY_ORDER_100_MSEC;
}
static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
{
uint8_t *addr = htab;
addr += pteg * HASH_PTEG_SIZE_64;
addr += slot * HASH_PTE_SIZE_64;
return ldq_p(addr);
}
static void new_hpte_store(void *htab, uint64_t pteg, int slot,
uint64_t pte0, uint64_t pte1)
{
uint8_t *addr = htab;
addr += pteg * HASH_PTEG_SIZE_64;
addr += slot * HASH_PTE_SIZE_64;
stq_p(addr, pte0);
stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1);
}
static int rehash_hpte(PowerPCCPU *cpu,
const ppc_hash_pte64_t *hptes,
void *old_hpt, uint64_t oldsize,
void *new_hpt, uint64_t newsize,
uint64_t pteg, int slot)
{
uint64_t old_hash_mask = (oldsize >> 7) - 1;
uint64_t new_hash_mask = (newsize >> 7) - 1;
target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
target_ulong pte1;
uint64_t avpn;
unsigned base_pg_shift;
uint64_t hash, new_pteg, replace_pte0;
if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
return H_SUCCESS;
}
pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
if (pte0 & HPTE64_V_SECONDARY) {
pteg = ~pteg;
}
if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
uint64_t offset, vsid;
/* We only have 28 - 23 bits of offset in avpn */
offset = (avpn & 0x1f) << 23;
vsid = avpn >> 5;
/* We can find more bits from the pteg value */
if (base_pg_shift < 23) {
offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
}
hash = vsid ^ (offset >> base_pg_shift);
} else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
uint64_t offset, vsid;
/* We only have 40 - 23 bits of seg_off in avpn */
offset = (avpn & 0x1ffff) << 23;
vsid = avpn >> 17;
if (base_pg_shift < 23) {
offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
<< base_pg_shift;
}
hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
} else {
error_report("rehash_pte: Bad segment size in HPTE");
return H_HARDWARE;
}
new_pteg = hash & new_hash_mask;
if (pte0 & HPTE64_V_SECONDARY) {
assert(~pteg == (hash & old_hash_mask));
new_pteg = ~new_pteg;
} else {
assert(pteg == (hash & old_hash_mask));
}
assert((oldsize != newsize) || (pteg == new_pteg));
replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
/*
* Strictly speaking, we don't need all these tests, since we only
* ever rehash bolted HPTEs. We might in future handle non-bolted
* HPTEs, though so make the logic correct for those cases as
* well.
*/
if (replace_pte0 & HPTE64_V_VALID) {
assert(newsize < oldsize);
if (replace_pte0 & HPTE64_V_BOLTED) {
if (pte0 & HPTE64_V_BOLTED) {
/* Bolted collision, nothing we can do */
return H_PTEG_FULL;
} else {
/* Discard this hpte */
return H_SUCCESS;
}
}
}
new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
return H_SUCCESS;
}
static int rehash_hpt(PowerPCCPU *cpu,
void *old_hpt, uint64_t oldsize,
void *new_hpt, uint64_t newsize)
{
uint64_t n_ptegs = oldsize >> 7;
uint64_t pteg;
int slot;
int rc;
for (pteg = 0; pteg < n_ptegs; pteg++) {
hwaddr ptex = pteg * HPTES_PER_GROUP;
const ppc_hash_pte64_t *hptes
= ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
if (!hptes) {
return H_HARDWARE;
}
for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
pteg, slot);
if (rc != H_SUCCESS) {
ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
return rc;
}
}
ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
}
return H_SUCCESS;
}
static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu,
sPAPRMachineState *spapr,
target_ulong opcode,
target_ulong *args)
{
target_ulong flags = args[0];
target_ulong shift = args[1];
sPAPRPendingHPT *pending = spapr->pending_hpt;
int rc;
size_t newsize;
if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
return H_AUTHORITY;
}
trace_spapr_h_resize_hpt_commit(flags, shift);
rc = kvmppc_resize_hpt_commit(cpu, flags, shift);
if (rc != -ENOSYS) {
return resize_hpt_convert_rc(rc);
}
if (flags != 0) {
return H_PARAMETER;
}
if (!pending || (pending->shift != shift)) {
/* no matching prepare */
return H_CLOSED;
}
if (!pending->complete) {
/* prepare has not completed */
return H_BUSY;
}
/* Shouldn't have got past PREPARE without an HPT */
g_assert(spapr->htab_shift);
newsize = 1ULL << pending->shift;
rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
pending->hpt, newsize);
if (rc == H_SUCCESS) {
qemu_vfree(spapr->htab);
spapr->htab = pending->hpt;
spapr->htab_shift = pending->shift;
if (kvm_enabled()) {
/* For KVM PR, update the HPT pointer */
target_ulong sdr1 = (target_ulong)(uintptr_t)spapr->htab
| (spapr->htab_shift - 18);
kvmppc_update_sdr1(sdr1);
}
pending->hpt = NULL; /* so it's not free()d */
}
/* Clean up */
spapr->pending_hpt = NULL;
free_pending_hpt(pending);
return rc;
}
static target_ulong h_set_sprg0(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
@ -1133,6 +1529,45 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300);
spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300);
/*
* HPT resizing is a bit of a special case, because when enabled
* we assume an HPT guest will support it until it says it
* doesn't, instead of assuming it won't support it until it says
* it does. Strictly speaking that approach could break for
* guests which don't make a CAS call, but those are so old we
* don't care about them. Without that assumption we'd have to
* make at least a temporary allocation of an HPT sized for max
* memory, which could be impossibly difficult under KVM HV if
* maxram is large.
*/
if (!guest_radix && !spapr_ovec_test(ov5_guest, OV5_HPT_RESIZE)) {
int maxshift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
if (spapr->resize_hpt == SPAPR_RESIZE_HPT_REQUIRED) {
error_report(
"h_client_architecture_support: Guest doesn't support HPT resizing, but resize-hpt=required");
exit(1);
}
if (spapr->htab_shift < maxshift) {
CPUState *cs;
/* Guest doesn't know about HPT resizing, so we
* pre-emptively resize for the maximum permitted RAM. At
* the point this is called, nothing should have been
* entered into the existing HPT */
spapr_reallocate_hpt(spapr, maxshift, &error_fatal);
CPU_FOREACH(cs) {
if (kvm_enabled()) {
/* For KVM PR, update the HPT pointer */
target_ulong sdr1 = (target_ulong)(uintptr_t)spapr->htab
| (spapr->htab_shift - 18);
kvmppc_update_sdr1(sdr1);
}
}
}
}
/* NOTE: there are actually a number of ov5 bits where input from the
* guest is always zero, and the platform/QEMU enables them independently
* of guest input. To model these properly we'd want some sort of mask,
@ -1246,6 +1681,10 @@ static void hypercall_register_types(void)
/* hcall-bulk */
spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
/* hcall-hpt-resize */
spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare);
spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit);
/* hcall-splpar */
spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
spapr_register_hypercall(H_CEDE, h_cede);

View File

@ -1443,7 +1443,9 @@ static void spapr_pci_plug(HotplugHandler *plug_handler,
/* If this is function 0, signal hotplug for all the device functions.
* Otherwise defer sending the hotplug event.
*/
if (plugged_dev->hotplugged && PCI_FUNC(pdev->devfn) == 0) {
if (!spapr_drc_hotplugged(plugged_dev)) {
spapr_drc_reset(drc);
} else if (PCI_FUNC(pdev->devfn) == 0) {
int i;
for (i = 0; i < 8; i++) {
@ -1474,9 +1476,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
{
sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
PCIDevice *pdev = PCI_DEVICE(plugged_dev);
sPAPRDRConnectorClass *drck;
sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
Error *local_err = NULL;
if (!phb->dr_enabled) {
error_setg(errp, QERR_BUS_NO_HOTPLUG,
@ -1487,8 +1487,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
g_assert(drc);
g_assert(drc->dev == plugged_dev);
drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
if (!drck->release_pending(drc)) {
if (!spapr_drc_unplug_requested(drc)) {
PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
uint32_t slotnr = PCI_SLOT(pdev->devfn);
sPAPRDRConnector *func_drc;
@ -1504,7 +1503,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
state = func_drck->dr_entity_sense(func_drc);
if (state == SPAPR_DR_ENTITY_SENSE_PRESENT
&& !func_drck->release_pending(func_drc)) {
&& !spapr_drc_unplug_requested(func_drc)) {
error_setg(errp,
"PCI: slot %d, function %d still present. "
"Must unplug all non-0 functions first.",
@ -1514,11 +1513,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
}
}
spapr_drc_detach(drc, DEVICE(pdev), &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
spapr_drc_detach(drc);
/* if this isn't func 0, defer unplug event. otherwise signal removal
* for all present functions

View File

@ -16,6 +16,8 @@ spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes"
# hw/ppc/spapr_hcall.c
spapr_cas_pvr_try(uint32_t pvr) "%x"
spapr_cas_pvr(uint32_t cur_pvr, bool explicit_match, uint32_t new_pvr) "current=%x, explicit_match=%u, new=%x"
spapr_h_resize_hpt_prepare(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
spapr_h_resize_hpt_commit(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
# hw/ppc/spapr_iommu.c
spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64
@ -46,8 +48,7 @@ spapr_drc_set_configured(uint32_t index) "drc: 0x%"PRIx32
spapr_drc_set_configured_skipping(uint32_t index) "drc: 0x%"PRIx32", isolated device"
spapr_drc_attach(uint32_t index) "drc: 0x%"PRIx32
spapr_drc_detach(uint32_t index) "drc: 0x%"PRIx32
spapr_drc_awaiting_isolated(uint32_t index) "drc: 0x%"PRIx32
spapr_drc_awaiting_unusable(uint32_t index) "drc: 0x%"PRIx32
spapr_drc_awaiting_quiesce(uint32_t index) "drc: 0x%"PRIx32
spapr_drc_awaiting_allocation(uint32_t index) "drc: 0x%"PRIx32
spapr_drc_reset(uint32_t index) "drc: 0x%"PRIx32
spapr_drc_realize(uint32_t index) "drc: 0x%"PRIx32

View File

@ -28,8 +28,6 @@
#define PSIHB_XSCOM_MAX 0x20
typedef struct XICSState XICSState;
typedef struct PnvPsi {
SysBusDevice parent;

View File

@ -13,6 +13,7 @@ struct sPAPRPHBState;
struct sPAPRNVRAM;
typedef struct sPAPREventLogEntry sPAPREventLogEntry;
typedef struct sPAPREventSource sPAPREventSource;
typedef struct sPAPRPendingHPT sPAPRPendingHPT;
#define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL
#define SPAPR_ENTRY_POINT 0x100
@ -42,6 +43,13 @@ typedef struct sPAPRMachineClass sPAPRMachineClass;
#define SPAPR_MACHINE_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE)
typedef enum {
SPAPR_RESIZE_HPT_DEFAULT = 0,
SPAPR_RESIZE_HPT_DISABLED,
SPAPR_RESIZE_HPT_ENABLED,
SPAPR_RESIZE_HPT_REQUIRED,
} sPAPRResizeHPT;
/**
* sPAPRMachineClass:
*/
@ -58,6 +66,7 @@ struct sPAPRMachineClass {
uint64_t *buid, hwaddr *pio,
hwaddr *mmio32, hwaddr *mmio64,
unsigned n_dma, uint32_t *liobns, Error **errp);
sPAPRResizeHPT resize_hpt_default;
};
/**
@ -73,9 +82,12 @@ struct sPAPRMachineState {
ICSState *ics;
sPAPRRTCState rtc;
sPAPRResizeHPT resize_hpt;
void *htab;
uint32_t htab_shift;
uint64_t patb_entry; /* Process tbl registed in H_REGISTER_PROCESS_TABLE */
sPAPRPendingHPT *pending_hpt; /* in-progress resize */
hwaddr rma_size;
int vrma_adjust;
ssize_t rtas_size;
@ -367,6 +379,8 @@ struct sPAPRMachineState {
#define H_XIRR_X 0x2FC
#define H_RANDOM 0x300
#define H_SET_MODE 0x31C
#define H_RESIZE_HPT_PREPARE 0x36C
#define H_RESIZE_HPT_COMMIT 0x370
#define H_CLEAN_SLB 0x374
#define H_INVALIDATE_PID 0x378
#define H_REGISTER_PROC_TBL 0x37C
@ -607,8 +621,9 @@ struct sPAPRTCETable {
sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn);
struct sPAPREventLogEntry {
int log_type;
void *data;
uint32_t summary;
uint32_t extended_length;
void *extended_log;
QTAILQ_ENTRY(sPAPREventLogEntry) next;
};
@ -644,6 +659,9 @@ void spapr_hotplug_req_add_by_count_indexed(sPAPRDRConnectorType drc_type,
void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
uint32_t count, uint32_t index);
void spapr_cpu_parse_features(sPAPRMachineState *spapr);
int spapr_hpt_shift_for_ramsize(uint64_t ramsize);
void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
Error **errp);
/* CPU and LMB DRC release callbacks. */
void spapr_core_release(DeviceState *dev);
@ -684,4 +702,6 @@ int spapr_rng_populate_dt(void *fdt);
void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg);
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
#endif /* HW_SPAPR_H */

View File

@ -15,6 +15,7 @@
#include <libfdt.h>
#include "qom/object.h"
#include "sysemu/sysemu.h"
#include "hw/qdev.h"
#define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector"
@ -32,7 +33,7 @@
#define SPAPR_DRC_PHYSICAL_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, \
TYPE_SPAPR_DRC_PHYSICAL)
#define SPAPR_DRC_PHYSICAL(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \
#define SPAPR_DRC_PHYSICAL(obj) OBJECT_CHECK(sPAPRDRCPhysical, (obj), \
TYPE_SPAPR_DRC_PHYSICAL)
#define TYPE_SPAPR_DRC_LOGICAL "spapr-drc-logical"
@ -172,11 +173,23 @@ typedef enum {
SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE = -9003,
} sPAPRDRCCResponse;
/* rtas-configure-connector state */
typedef struct sPAPRConfigureConnectorState {
int fdt_offset;
int fdt_depth;
} sPAPRConfigureConnectorState;
typedef enum {
/*
* Values come from Fig. 12 in LoPAPR section 13.4
*
* These are exposed in the migration stream, so don't change
* them.
*/
SPAPR_DRC_STATE_INVALID = 0,
SPAPR_DRC_STATE_LOGICAL_UNUSABLE = 1,
SPAPR_DRC_STATE_LOGICAL_AVAILABLE = 2,
SPAPR_DRC_STATE_LOGICAL_UNISOLATE = 3,
SPAPR_DRC_STATE_LOGICAL_CONFIGURED = 4,
SPAPR_DRC_STATE_PHYSICAL_AVAILABLE = 5,
SPAPR_DRC_STATE_PHYSICAL_POWERON = 6,
SPAPR_DRC_STATE_PHYSICAL_UNISOLATE = 7,
SPAPR_DRC_STATE_PHYSICAL_CONFIGURED = 8,
} sPAPRDRCState;
typedef struct sPAPRDRConnector {
/*< private >*/
@ -185,29 +198,25 @@ typedef struct sPAPRDRConnector {
uint32_t id;
Object *owner;
/* DR-indicator */
uint32_t dr_indicator;
uint32_t state;
/* sensor/indicator states */
uint32_t isolation_state;
uint32_t allocation_state;
/* configure-connector state */
void *fdt;
int fdt_start_offset;
bool configured;
sPAPRConfigureConnectorState *ccs;
bool awaiting_release;
bool awaiting_allocation;
/* RTAS ibm,configure-connector state */
/* (only valid in UNISOLATE state) */
int ccs_offset;
int ccs_depth;
/* device pointer, via link property */
DeviceState *dev;
bool unplug_requested;
void *fdt;
int fdt_start_offset;
} sPAPRDRConnector;
typedef struct sPAPRDRConnectorClass {
/*< private >*/
DeviceClass parent;
sPAPRDRCState empty_state;
sPAPRDRCState ready_state;
/*< public >*/
sPAPRDRConnectorTypeShift typeshift;
@ -218,11 +227,23 @@ typedef struct sPAPRDRConnectorClass {
uint32_t (*isolate)(sPAPRDRConnector *drc);
uint32_t (*unisolate)(sPAPRDRConnector *drc);
void (*release)(DeviceState *dev);
/* QEMU interfaces for managing hotplug operations */
bool (*release_pending)(sPAPRDRConnector *drc);
} sPAPRDRConnectorClass;
typedef struct sPAPRDRCPhysical {
/*< private >*/
sPAPRDRConnector parent;
/* DR-indicator */
uint32_t dr_indicator;
} sPAPRDRCPhysical;
static inline bool spapr_drc_hotplugged(DeviceState *dev)
{
return dev->hotplugged && !runstate_check(RUN_STATE_INMIGRATE);
}
void spapr_drc_reset(sPAPRDRConnector *drc);
uint32_t spapr_drc_index(sPAPRDRConnector *drc);
sPAPRDRConnectorType spapr_drc_type(sPAPRDRConnector *drc);
@ -235,6 +256,11 @@ int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner,
void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
int fdt_start_offset, Error **errp);
void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp);
void spapr_drc_detach(sPAPRDRConnector *drc);
static inline bool spapr_drc_unplug_requested(sPAPRDRConnector *drc)
{
return drc->unplug_requested;
}
#endif /* HW_SPAPR_DRC_H */

View File

@ -50,6 +50,7 @@ typedef struct sPAPROptionVector sPAPROptionVector;
#define OV5_DRCONF_MEMORY OV_BIT(2, 2)
#define OV5_FORM1_AFFINITY OV_BIT(5, 0)
#define OV5_HP_EVT OV_BIT(6, 5)
#define OV5_HPT_RESIZE OV_BIT(6, 7)
#define OV5_XIVE_EXPLOIT OV_BIT(23, 7)
/* ISA 3.00 MMU features: */

View File

@ -22,6 +22,7 @@
#include <linux/kvm.h>
#include "qemu-common.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "cpu.h"
#include "cpu-models.h"
@ -88,6 +89,7 @@ static int cap_fixup_hcalls;
static int cap_htm; /* Hardware transactional memory support */
static int cap_mmu_radix;
static int cap_mmu_hash_v3;
static int cap_resize_hpt;
static uint32_t debug_inst_opcode;
@ -144,6 +146,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
if (!cap_interrupt_level) {
fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
@ -2709,3 +2712,76 @@ int kvmppc_enable_hwrng(void)
return kvmppc_enable_hcall(kvm_state, H_RANDOM);
}
void kvmppc_check_papr_resize_hpt(Error **errp)
{
if (!kvm_enabled()) {
return; /* No KVM, we're good */
}
if (cap_resize_hpt) {
return; /* Kernel has explicit support, we're good */
}
/* Otherwise fallback on looking for PR KVM */
if (kvmppc_is_pr(kvm_state)) {
return;
}
error_setg(errp,
"Hash page table resizing not available with this KVM version");
}
int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
{
CPUState *cs = CPU(cpu);
struct kvm_ppc_resize_hpt rhpt = {
.flags = flags,
.shift = shift,
};
if (!cap_resize_hpt) {
return -ENOSYS;
}
return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
}
int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
{
CPUState *cs = CPU(cpu);
struct kvm_ppc_resize_hpt rhpt = {
.flags = flags,
.shift = shift,
};
if (!cap_resize_hpt) {
return -ENOSYS;
}
return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
}
static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg)
{
target_ulong sdr1 = arg.target_ptr;
PowerPCCPU *cpu = POWERPC_CPU(cs);
CPUPPCState *env = &cpu->env;
/* This is just for the benefit of PR KVM */
cpu_synchronize_state(cs);
env->spr[SPR_SDR1] = sdr1;
if (kvmppc_put_books_sregs(cpu) < 0) {
error_report("Unable to update SDR1 in KVM");
exit(1);
}
}
void kvmppc_update_sdr1(target_ulong sdr1)
{
CPUState *cs;
CPU_FOREACH(cs) {
run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1));
}
}

View File

@ -63,6 +63,10 @@ bool kvmppc_has_cap_mmu_hash_v3(void);
int kvmppc_enable_hwrng(void);
int kvmppc_put_books_sregs(PowerPCCPU *cpu);
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
void kvmppc_check_papr_resize_hpt(Error **errp);
int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift);
int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift);
void kvmppc_update_sdr1(target_ulong sdr1);
bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path);
@ -297,6 +301,28 @@ static inline PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
return NULL;
}
static inline void kvmppc_check_papr_resize_hpt(Error **errp)
{
return;
}
static inline int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu,
target_ulong flags, int shift)
{
return -ENOSYS;
}
static inline int kvmppc_resize_hpt_commit(PowerPCCPU *cpu,
target_ulong flags, int shift)
{
return -ENOSYS;
}
static inline void kvmppc_update_sdr1(target_ulong sdr1)
{
abort();
}
#endif
#ifndef CONFIG_KVM

View File

@ -63,11 +63,15 @@ void ppc_hash64_update_rmls(CPUPPCState *env);
#define HASH_PTE_SIZE_64 16
#define HASH_PTEG_SIZE_64 (HASH_PTE_SIZE_64 * HPTES_PER_GROUP)
#define HPTE64_V_SSIZE SLB_VSID_B
#define HPTE64_V_SSIZE_256M SLB_VSID_B_256M
#define HPTE64_V_SSIZE_1T SLB_VSID_B_1T
#define HPTE64_V_SSIZE_SHIFT 62
#define HPTE64_V_AVPN_SHIFT 7
#define HPTE64_V_AVPN 0x3fffffffffffff80ULL
#define HPTE64_V_AVPN_VAL(x) (((x) & HPTE64_V_AVPN) >> HPTE64_V_AVPN_SHIFT)
#define HPTE64_V_COMPARE(x, y) (!(((x) ^ (y)) & 0xffffffffffffff83ULL))
#define HPTE64_V_BOLTED 0x0000000000000010ULL
#define HPTE64_V_LARGE 0x0000000000000004ULL
#define HPTE64_V_SECONDARY 0x0000000000000002ULL
#define HPTE64_V_VALID 0x0000000000000001ULL

View File

@ -9011,8 +9011,16 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
/* By default we choose legacy mode and switch to new hash or radix
* when a register process table hcall is made. So disable process
* tables and guest translation shootdown by default
*
* Hot-plugged CPUs inherit from the guest radix setting under
* KVM but not under TCG. Update the default LPCR to keep new
* CPUs in sync when radix is enabled.
*/
lpcr->default_value &= ~(LPCR_UPRT | LPCR_GTSE);
if (ppc64_radix_guest(cpu)) {
lpcr->default_value |= LPCR_UPRT | LPCR_GTSE;
} else {
lpcr->default_value &= ~(LPCR_UPRT | LPCR_GTSE);
}
lpcr->default_value |= LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE |
LPCR_OEE;
break;