ppc patch queue 2020-09-08

This supersedes ppc-for-5.2-20200904, it fixes a couple of bugs in
 that PR and adds a few extra patches.
 
 Next pull request for qemu-5.2.  The biggest thing here is the
 generalization of ARM's start-powered-off machine property to all
 targets.  This can fix a number of odd little edge cases where KVM
 could run vcpus before they were properly initialized.  This does
 include changes to a number of files that aren't normally in my
 purview.  There are suitable Acked-by lines and Peter requested this
 come in via my tree, since the most pressing requirement for it is in
 pseries machines with the POWER secure virtual machine facility.
 
 In addition we have:
  * Daniel Barboza's rework and clean up of pseries machine NUMA handling
  * Correction to behaviour of the nvdimm= generic machine property on
    pseries
  * An optimization to the allocation of XIVE interrupts on KVM
  * Some fixes for confused behaviour with kernel_irqchip when both
    XICS and XIVE are in play
  * Add HIOMAP comamnd to pnv flash
  * Properly advertise the fact that spapr_vscsi doesn't handle
    hotplugged disks
  * Some assorted minor enhancements
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEdfRlhq5hpmzETofcbDjKyiDZs5IFAl9XFGYACgkQbDjKyiDZ
 s5JQHA/9HvOB3jIhqFsLrP0/qqcKMVrhLVT69Efv1GM7yO0oGs/xo8bQinoEnU7O
 OV52LOgYx+H7ELbzBZogDqWSMfAvraPHAkUYdb6MlKA6Tgkv3hXyIXkPt4V6bzGd
 0D5gE6V7SpoxQoLH8B3MDIozbZ/iZ6DRlxu3zS/Y308DbEr/06ANrTtA/CLgk0f7
 nZBBzulMwNPx8bxFeBPpCbm1giuLdbgq7FsRe9J2IJLUmB8QdtMW6tP42o4WGeCe
 Uc4UbSjLDrdmPW7nXJqE21GQ+icKv2E7Mq0aZkhjZfnu+qQaJ+bYVD0fTPIRIt4g
 /O7g0huTn1yV4csGZV9HDp+QbrJK331CBR9zQXjKjlfpuJxrGChuuwQpM/VU/HkM
 e0us4nh6h5BWe1Ird5J4+47MglUR+NFrzgryPwkGnGw4rvuDuGA0cBfn/Zvhb1fc
 qjR87NHYKhjGaE+uuIZHOESbKrPTOqHoVAr43AZnEg3uqh2NHUD4PozFVDC3CJ8G
 g3dimKgbZ/r+vbXByagvWuqzueHj7aXgqIkrHlJ6kkYylgDhMRhgt0TAV+fuiIjM
 x2ClBM7aMA0o8lZW4a/eDva5vGecuXBb7VRAYqS3IdFz7otm2q0b8Y2+Hn2/s7H/
 m5YtpelyFVg+ymYsVGdfTApnTCbs+d7sKtbR6O3KjjBuvXOMf1E=
 =Hm7N
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-5.2-20200908' into staging

ppc patch queue 2020-09-08

This supersedes ppc-for-5.2-20200904, it fixes a couple of bugs in
that PR and adds a few extra patches.

Next pull request for qemu-5.2.  The biggest thing here is the
generalization of ARM's start-powered-off machine property to all
targets.  This can fix a number of odd little edge cases where KVM
could run vcpus before they were properly initialized.  This does
include changes to a number of files that aren't normally in my
purview.  There are suitable Acked-by lines and Peter requested this
come in via my tree, since the most pressing requirement for it is in
pseries machines with the POWER secure virtual machine facility.

In addition we have:
 * Daniel Barboza's rework and clean up of pseries machine NUMA handling
 * Correction to behaviour of the nvdimm= generic machine property on
   pseries
 * An optimization to the allocation of XIVE interrupts on KVM
 * Some fixes for confused behaviour with kernel_irqchip when both
   XICS and XIVE are in play
 * Add HIOMAP comamnd to pnv flash
 * Properly advertise the fact that spapr_vscsi doesn't handle
   hotplugged disks
 * Some assorted minor enhancements

# gpg: Signature made Tue 08 Sep 2020 06:19:34 BST
# gpg:                using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full]
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full]
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full]
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown]
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-5.2-20200908: (33 commits)
  spapr_numa: use spapr_numa_get_vcpu_assoc() in home_node hcall
  spapr_numa: create a vcpu associativity helper
  spapr: move h_home_node_associativity to spapr_numa.c
  spapr_numa: move NVLink2 associativity handling to spapr_numa.c
  spapr, spapr_numa: move lookup-arrays handling to spapr_numa.c
  spapr, spapr_numa: handle vcpu ibm,associativity
  spapr: introduce SpaprMachineState::numa_assoc_array
  ppc/spapr_nvdimm: turn spapr_dt_nvdimm() static
  ppc: introducing spapr_numa.c NUMA code helper
  hw/ppc/ppc4xx_pci: Replace pointless warning by assert()
  hw/ppc/ppc4xx_pci: Use ARRAY_SIZE() instead of magic value
  target/s390x: Use start-powered-off CPUState property
  sparc/sun4m: Use start-powered-off CPUState property
  sparc/sun4m: Don't set cs->halted = 0 in main_cpu_reset()
  mips/cps: Use start-powered-off CPUState property
  ppc/e500: Use start-powered-off CPUState property
  ppc/spapr: Use start-powered-off CPUState property
  target/arm: Move setting of CPU halted state to generic code
  target/arm: Move start-powered-off property to generic CPUState
  ppc/spapr_nvdimm: do not enable support with 'nvdimm=off'
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-09-08 16:18:48 +01:00
commit b95ba83fc5
33 changed files with 548 additions and 313 deletions

1
exec.c
View File

@ -899,6 +899,7 @@ Property cpu_common_props[] = {
DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION,
MemoryRegion *),
#endif
DEFINE_PROP_BOOL("start-powered-off", CPUState, start_powered_off, false),
DEFINE_PROP_END_OF_LIST(),
};

View File

@ -258,7 +258,7 @@ static void cpu_common_reset(DeviceState *dev)
}
cpu->interrupt_request = 0;
cpu->halted = 0;
cpu->halted = cpu->start_powered_off;
cpu->mem_io_pc = 0;
cpu->icount_extra = 0;
atomic_set(&cpu->icount_decr_ptr->u32, 0);

View File

@ -309,6 +309,7 @@ static void adb_device_class_init(ObjectClass *oc, void *data)
static const TypeInfo adb_device_type_info = {
.name = TYPE_ADB_DEVICE,
.parent = TYPE_DEVICE,
.class_size = sizeof(ADBDeviceClass),
.instance_size = sizeof(ADBDevice),
.abstract = true,
.class_init = adb_device_class_init,

View File

@ -595,6 +595,7 @@ static Property spapr_xive_properties[] = {
DEFINE_PROP_UINT32("nr-ends", SpaprXive, nr_ends, 0),
DEFINE_PROP_UINT64("vc-base", SpaprXive, vc_base, SPAPR_XIVE_VC_BASE),
DEFINE_PROP_UINT64("tm-base", SpaprXive, tm_base, SPAPR_XIVE_TM_BASE),
DEFINE_PROP_UINT8("hv-prio", SpaprXive, hv_prio, 7),
DEFINE_PROP_END_OF_LIST(),
};
@ -692,12 +693,13 @@ static void spapr_xive_dt(SpaprInterruptController *intc, uint32_t nr_servers,
cpu_to_be32(16), /* 64K */
};
/*
* The following array is in sync with the reserved priorities
* defined by the 'spapr_xive_priority_is_reserved' routine.
* QEMU/KVM only needs to define a single range to reserve the
* escalation priority. A priority bitmask would have been more
* appropriate.
*/
uint32_t plat_res_int_priorities[] = {
cpu_to_be32(7), /* start */
cpu_to_be32(0xf8), /* count */
cpu_to_be32(xive->hv_prio), /* start */
cpu_to_be32(0xff - xive->hv_prio), /* count */
};
/* Thread Interrupt Management Area : User (ring 3) and OS (ring 2) */
@ -844,19 +846,12 @@ type_init(spapr_xive_register_types)
*/
/*
* Linux hosts under OPAL reserve priority 7 for their own escalation
* interrupts (DD2.X POWER9). So we only allow the guest to use
* priorities [0..6].
* On POWER9, the KVM XIVE device uses priority 7 for the escalation
* interrupts. So we only allow the guest to use priorities [0..6].
*/
static bool spapr_xive_priority_is_reserved(uint8_t priority)
static bool spapr_xive_priority_is_reserved(SpaprXive *xive, uint8_t priority)
{
switch (priority) {
case 0 ... 6:
return false;
case 7: /* OPAL escalation queue */
default:
return true;
}
return priority >= xive->hv_prio;
}
/*
@ -1053,7 +1048,7 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
new_eas.w = eas.w & cpu_to_be64(~EAS_MASKED);
}
if (spapr_xive_priority_is_reserved(priority)) {
if (spapr_xive_priority_is_reserved(xive, priority)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
" is reserved\n", priority);
return H_P4;
@ -1212,7 +1207,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
* This is not needed when running the emulation under QEMU
*/
if (spapr_xive_priority_is_reserved(priority)) {
if (spapr_xive_priority_is_reserved(xive, priority)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
" is reserved\n", priority);
return H_P3;
@ -1299,7 +1294,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
* This is not needed when running the emulation under QEMU
*/
if (spapr_xive_priority_is_reserved(priority)) {
if (spapr_xive_priority_is_reserved(xive, priority)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
" is reserved\n", priority);
return H_P3;
@ -1466,7 +1461,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
* This is not needed when running the emulation under QEMU
*/
if (spapr_xive_priority_is_reserved(priority)) {
if (spapr_xive_priority_is_reserved(xive, priority)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
" is reserved\n", priority);
return H_P3;

View File

@ -36,10 +36,9 @@ typedef struct KVMEnabledCPU {
static QLIST_HEAD(, KVMEnabledCPU)
kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
static bool kvm_cpu_is_enabled(CPUState *cs)
static bool kvm_cpu_is_enabled(unsigned long vcpu_id)
{
KVMEnabledCPU *enabled_cpu;
unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
if (enabled_cpu->vcpu_id == vcpu_id) {
@ -147,6 +146,45 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
return s.ret;
}
/*
* Allocate the vCPU IPIs from the vCPU context. This will allocate
* the XIVE IPI interrupt on the chip on which the vCPU is running.
* This gives a better distribution of IPIs when the guest has a lot
* of vCPUs. When the vCPUs are pinned, this will make the IPI local
* to the chip of the vCPU. It will reduce rerouting between interrupt
* controllers and gives better performance.
*/
typedef struct {
SpaprXive *xive;
Error *err;
int rc;
} XiveInitIPI;
static void kvmppc_xive_reset_ipi_on_cpu(CPUState *cs, run_on_cpu_data arg)
{
unsigned long ipi = kvm_arch_vcpu_id(cs);
XiveInitIPI *s = arg.host_ptr;
uint64_t state = 0;
s->rc = kvm_device_access(s->xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi,
&state, true, &s->err);
}
static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp)
{
XiveInitIPI s = {
.xive = xive,
.err = NULL,
.rc = 0,
};
run_on_cpu(cs, kvmppc_xive_reset_ipi_on_cpu, RUN_ON_CPU_HOST_PTR(&s));
if (s.err) {
error_propagate(errp, s.err);
}
return s.rc;
}
int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
{
ERRP_GUARD();
@ -157,7 +195,7 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
assert(xive->fd != -1);
/* Check if CPU was hot unplugged and replugged. */
if (kvm_cpu_is_enabled(tctx->cs)) {
if (kvm_cpu_is_enabled(kvm_arch_vcpu_id(tctx->cs))) {
return 0;
}
@ -176,6 +214,12 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
return ret;
}
/* Create/reset the vCPU IPI */
ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp);
if (ret < 0) {
return ret;
}
kvm_cpu_enable(tctx->cs);
return 0;
}
@ -235,6 +279,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
assert(xive->fd != -1);
/*
* The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect()
* and not with all sources in kvmppc_xive_source_reset()
*/
assert(srcno >= SPAPR_XIRQ_BASE);
if (xive_source_irq_is_lsi(xsrc, srcno)) {
state |= KVM_XIVE_LEVEL_SENSITIVE;
if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
@ -246,12 +296,28 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
true, errp);
}
/*
* To be valid, a source must have been claimed by the machine (valid
* entry in the EAS table) and if it is a vCPU IPI, the vCPU should
* have been enabled, which means the IPI has been allocated in
* kvmppc_xive_cpu_connect().
*/
static bool xive_source_is_valid(SpaprXive *xive, int i)
{
return xive_eas_is_valid(&xive->eat[i]) &&
(i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i));
}
static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
{
SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
int i;
for (i = 0; i < xsrc->nr_irqs; i++) {
/*
* Skip the vCPU IPIs. These are created/reset when the vCPUs are
* connected in kvmppc_xive_cpu_connect()
*/
for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) {
int ret;
if (!xive_eas_is_valid(&xive->eat[i])) {
@ -333,7 +399,7 @@ static void kvmppc_xive_source_get_state(XiveSource *xsrc)
for (i = 0; i < xsrc->nr_irqs; i++) {
uint8_t pq;
if (!xive_eas_is_valid(&xive->eat[i])) {
if (!xive_source_is_valid(xive, i)) {
continue;
}
@ -516,7 +582,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running,
uint8_t pq;
uint8_t old_pq;
if (!xive_eas_is_valid(&xive->eat[i])) {
if (!xive_source_is_valid(xive, i)) {
continue;
}
@ -544,7 +610,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running,
for (i = 0; i < xsrc->nr_irqs; i++) {
uint8_t pq;
if (!xive_eas_is_valid(&xive->eat[i])) {
if (!xive_source_is_valid(xive, i)) {
continue;
}
@ -647,22 +713,22 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
}
}
/*
* We can only restore the source config if the source has been
* previously set in KVM. Since we don't do that at reset time
* when restoring a VM, let's do it now.
*/
ret = kvmppc_xive_source_reset(&xive->source, &local_err);
if (ret < 0) {
goto fail;
}
/* Restore the EAT */
for (i = 0; i < xive->nr_irqs; i++) {
if (!xive_eas_is_valid(&xive->eat[i])) {
if (!xive_source_is_valid(xive, i)) {
continue;
}
/*
* We can only restore the source config if the source has been
* previously set in KVM. Since we don't do that for all interrupts
* at reset time anymore, let's do it now.
*/
ret = kvmppc_xive_source_reset_one(&xive->source, i, &local_err);
if (ret < 0) {
goto fail;
}
ret = kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err);
if (ret < 0) {
goto fail;

View File

@ -52,9 +52,6 @@ static void main_cpu_reset(void *opaque)
CPUState *cs = CPU(cpu);
cpu_reset(cs);
/* All VPs are halted on reset. Leave powering up to CPC. */
cs->halted = 1;
}
static bool cpu_mips_itu_supported(CPUMIPSState *env)
@ -76,7 +73,17 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
bool saar_present = false;
for (i = 0; i < s->num_vp; i++) {
cpu = MIPS_CPU(cpu_create(s->cpu_type));
cpu = MIPS_CPU(object_new(s->cpu_type));
/* All VPs are halted on reset. Leave powering up to CPC. */
if (!object_property_set_bool(OBJECT(cpu), "start-powered-off", true,
errp)) {
return;
}
if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) {
return;
}
/* Init internal devices */
cpu_mips_irq_init_cpu(cpu);

View File

@ -704,9 +704,6 @@ static void ppce500_cpu_reset_sec(void *opaque)
cpu_reset(cs);
/* Secondary CPU starts in halted state for now. Needs to change when
implementing non-kernel boot. */
cs->halted = 1;
cs->exception_index = EXCP_HLT;
}
@ -865,7 +862,7 @@ void ppce500_init(MachineState *machine)
CPUState *cs;
qemu_irq *input;
cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
cpu = POWERPC_CPU(object_new(machine->cpu_type));
env = &cpu->env;
cs = CPU(cpu);
@ -875,6 +872,14 @@ void ppce500_init(MachineState *machine)
exit(1);
}
/*
* Secondary CPU starts in halted state for now. Needs to change
* when implementing non-kernel boot.
*/
object_property_set_bool(OBJECT(cs), "start-powered-off", i != 0,
&error_fatal);
qdev_realize_and_unref(DEVICE(cs), NULL, &error_fatal);
if (!firstenv) {
firstenv = env;
}

View File

@ -25,7 +25,8 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files(
'spapr_irq.c',
'spapr_tpm_proxy.c',
'spapr_nvdimm.c',
'spapr_rtas_ddw.c'
'spapr_rtas_ddw.c',
'spapr_numa.c',
))
ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c'))
ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files(

View File

@ -140,6 +140,27 @@ static uint16_t bytes_to_blocks(uint32_t bytes)
return bytes >> BLOCK_SHIFT;
}
static uint32_t blocks_to_bytes(uint16_t blocks)
{
return blocks << BLOCK_SHIFT;
}
static int hiomap_erase(PnvPnor *pnor, uint32_t offset, uint32_t size)
{
MemTxResult result;
int i;
for (i = 0; i < size / 4; i++) {
result = memory_region_dispatch_write(&pnor->mmio, offset + i * 4,
0xFFFFFFFF, MO_32,
MEMTXATTRS_UNSPECIFIED);
if (result != MEMTX_OK) {
return -1;
}
}
return 0;
}
static void hiomap_cmd(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len,
RspBuffer *rsp)
{
@ -155,10 +176,16 @@ static void hiomap_cmd(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len,
switch (cmd[2]) {
case HIOMAP_C_MARK_DIRTY:
case HIOMAP_C_FLUSH:
case HIOMAP_C_ERASE:
case HIOMAP_C_ACK:
break;
case HIOMAP_C_ERASE:
if (hiomap_erase(pnor, blocks_to_bytes(cmd[5] << 8 | cmd[4]),
blocks_to_bytes(cmd[7] << 8 | cmd[6]))) {
rsp_buffer_set_error(rsp, IPMI_CC_UNSPECIFIED);
}
break;
case HIOMAP_C_GET_INFO:
rsp_buffer_push(rsp, 2); /* Version 2 */
rsp_buffer_push(rsp, BLOCK_SHIFT); /* block size */

View File

@ -646,7 +646,6 @@ static void pnv_lpc_power8_class_init(ObjectClass *klass, void *data)
static const TypeInfo pnv_lpc_power8_info = {
.name = TYPE_PNV8_LPC,
.parent = TYPE_PNV_LPC,
.instance_size = sizeof(PnvLpcController),
.class_init = pnv_lpc_power8_class_init,
.interfaces = (InterfaceInfo[]) {
{ TYPE_PNV_XSCOM_INTERFACE },
@ -687,7 +686,6 @@ static void pnv_lpc_power9_class_init(ObjectClass *klass, void *data)
static const TypeInfo pnv_lpc_power9_info = {
.name = TYPE_PNV9_LPC,
.parent = TYPE_PNV_LPC,
.instance_size = sizeof(PnvLpcController),
.class_init = pnv_lpc_power9_class_init,
};
@ -768,6 +766,7 @@ static void pnv_lpc_class_init(ObjectClass *klass, void *data)
static const TypeInfo pnv_lpc_info = {
.name = TYPE_PNV_LPC,
.parent = TYPE_DEVICE,
.instance_size = sizeof(PnvLpcController),
.class_init = pnv_lpc_class_init,
.class_size = sizeof(PnvLpcClass),
.abstract = true,

View File

@ -256,10 +256,7 @@ static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level)
qemu_irq *pci_irqs = opaque;
trace_ppc4xx_pci_set_irq(irq_num);
if (irq_num < 0) {
fprintf(stderr, "%s: PCI irq %d\n", __func__, irq_num);
return;
}
assert(irq_num >= 0);
qemu_set_irq(pci_irqs[irq_num], level);
}
@ -320,7 +317,8 @@ static void ppc4xx_pcihost_realize(DeviceState *dev, Error **errp)
b = pci_register_root_bus(dev, NULL, ppc4xx_pci_set_irq,
ppc4xx_pci_map_irq, s->irq, get_system_memory(),
get_system_io(), 0, 4, TYPE_PCI_BUS);
get_system_io(), 0, ARRAY_SIZE(s->irq),
TYPE_PCI_BUS);
h->bus = b;
pci_create_simple(b, 0, "ppc4xx-host-bridge");

View File

@ -81,6 +81,7 @@
#include "hw/mem/memory-device.h"
#include "hw/ppc/spapr_tpm_proxy.h"
#include "hw/ppc/spapr_nvdimm.h"
#include "hw/ppc/spapr_numa.h"
#include "monitor/monitor.h"
@ -201,21 +202,6 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
return ret;
}
static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu)
{
int index = spapr_get_vcpu_id(cpu);
uint32_t associativity[] = {cpu_to_be32(0x5),
cpu_to_be32(0x0),
cpu_to_be32(0x0),
cpu_to_be32(0x0),
cpu_to_be32(cpu->node_id),
cpu_to_be32(index)};
/* Advertise NUMA via ibm,associativity */
return fdt_setprop(fdt, offset, "ibm,associativity", associativity,
sizeof(associativity));
}
static void spapr_dt_pa_features(SpaprMachineState *spapr,
PowerPCCPU *cpu,
void *fdt, int offset)
@ -313,14 +299,9 @@ static void add_str(GString *s, const gchar *s1)
g_string_append_len(s, s1, strlen(s1) + 1);
}
static int spapr_dt_memory_node(void *fdt, int nodeid, hwaddr start,
hwaddr size)
static int spapr_dt_memory_node(SpaprMachineState *spapr, void *fdt, int nodeid,
hwaddr start, hwaddr size)
{
uint32_t associativity[] = {
cpu_to_be32(0x4), /* length */
cpu_to_be32(0x0), cpu_to_be32(0x0),
cpu_to_be32(0x0), cpu_to_be32(nodeid)
};
char mem_name[32];
uint64_t mem_reg_property[2];
int off;
@ -334,8 +315,7 @@ static int spapr_dt_memory_node(void *fdt, int nodeid, hwaddr start,
_FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
_FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
sizeof(mem_reg_property))));
_FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
sizeof(associativity))));
spapr_numa_write_associativity_dt(spapr, fdt, off, nodeid);
return off;
}
@ -555,13 +535,10 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
void *fdt)
{
MachineState *machine = MACHINE(spapr);
int nb_numa_nodes = machine->numa_state->num_nodes;
int ret, i, offset;
int ret, offset;
uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
uint32_t prop_lmb_size[] = {cpu_to_be32(lmb_size >> 32),
cpu_to_be32(lmb_size & 0xffffffff)};
uint32_t *int_buf, *cur_index, buf_len;
int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
MemoryDeviceInfoList *dimms = NULL;
/*
@ -602,25 +579,7 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
return ret;
}
/* ibm,associativity-lookup-arrays */
buf_len = (nr_nodes * 4 + 2) * sizeof(uint32_t);
cur_index = int_buf = g_malloc0(buf_len);
int_buf[0] = cpu_to_be32(nr_nodes);
int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */
cur_index += 2;
for (i = 0; i < nr_nodes; i++) {
uint32_t associativity[] = {
cpu_to_be32(0x0),
cpu_to_be32(0x0),
cpu_to_be32(0x0),
cpu_to_be32(i)
};
memcpy(cur_index, associativity, sizeof(associativity));
cur_index += 4;
}
ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
(cur_index - int_buf) * sizeof(uint32_t));
g_free(int_buf);
ret = spapr_numa_write_assoc_lookup_arrays(spapr, fdt, offset);
return ret;
}
@ -648,7 +607,7 @@ static int spapr_dt_memory(SpaprMachineState *spapr, void *fdt)
if (!mem_start) {
/* spapr_machine_init() checks for rma_size <= node0_size
* already */
spapr_dt_memory_node(fdt, i, 0, spapr->rma_size);
spapr_dt_memory_node(spapr, fdt, i, 0, spapr->rma_size);
mem_start += spapr->rma_size;
node_size -= spapr->rma_size;
}
@ -660,7 +619,7 @@ static int spapr_dt_memory(SpaprMachineState *spapr, void *fdt)
sizetmp = 1ULL << ctzl(mem_start);
}
spapr_dt_memory_node(fdt, i, mem_start, sizetmp);
spapr_dt_memory_node(spapr, fdt, i, mem_start, sizetmp);
node_size -= sizetmp;
mem_start += sizetmp;
}
@ -790,7 +749,7 @@ static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset,
pft_size_prop, sizeof(pft_size_prop))));
if (ms->numa_state->num_nodes > 1) {
_FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cpu));
_FDT(spapr_numa_fixup_cpu_dt(spapr, fdt, offset, cpu));
}
_FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
@ -891,16 +850,9 @@ static int spapr_dt_rng(void *fdt)
static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
{
MachineState *ms = MACHINE(spapr);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
int rtas;
GString *hypertas = g_string_sized_new(256);
GString *qemu_hypertas = g_string_sized_new(256);
uint32_t refpoints[] = {
cpu_to_be32(0x4),
cpu_to_be32(0x4),
cpu_to_be32(0x2),
};
uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
uint64_t max_device_addr = MACHINE(spapr)->device_memory->base +
memory_region_size(&MACHINE(spapr)->device_memory->mr);
uint32_t lrdr_capacity[] = {
@ -910,14 +862,6 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0xffffffff),
cpu_to_be32(ms->smp.max_cpus / ms->smp.threads),
};
uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
uint32_t maxdomains[] = {
cpu_to_be32(4),
maxdomain,
maxdomain,
maxdomain,
cpu_to_be32(spapr->gpu_numa_id),
};
_FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
@ -953,15 +897,7 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
qemu_hypertas->str, qemu_hypertas->len));
g_string_free(qemu_hypertas, TRUE);
if (smc->pre_5_1_assoc_refpoints) {
nr_refpoints = 2;
}
_FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
refpoints, nr_refpoints * sizeof(refpoints[0])));
_FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
maxdomains, sizeof(maxdomains)));
spapr_numa_write_rtas_dt(spapr, fdt, rtas);
/*
* FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log,
@ -1297,7 +1233,7 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
/* NVDIMM devices */
if (mc->nvdimm_supported) {
spapr_dt_persistent_memory(fdt);
spapr_dt_persistent_memory(spapr, fdt);
}
return fdt;
@ -2832,6 +2768,9 @@ static void spapr_machine_init(MachineState *machine)
*/
spapr->gpu_numa_id = MAX(1, machine->numa_state->num_nodes);
/* Init numa_assoc_array */
spapr_numa_associativity_init(spapr, machine);
if ((!kvm_enabled() || kvmppc_has_cap_mmu_radix()) &&
ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0,
spapr->max_compat_pvr)) {
@ -3416,7 +3355,7 @@ int spapr_lmb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
addr = spapr_drc_index(drc) * SPAPR_MEMORY_BLOCK_SIZE;
node = object_property_get_uint(OBJECT(drc->dev), PC_DIMM_NODE_PROP,
&error_abort);
*fdt_start_offset = spapr_dt_memory_node(fdt, node, addr,
*fdt_start_offset = spapr_dt_memory_node(spapr, fdt, node, addr,
SPAPR_MEMORY_BLOCK_SIZE);
return 0;
}
@ -3520,7 +3459,6 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
{
const SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(hotplug_dev);
SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
const MachineClass *mc = MACHINE_CLASS(smc);
bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
PCDIMMDevice *dimm = PC_DIMM(dev);
Error *local_err = NULL;
@ -3533,27 +3471,22 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
return;
}
if (is_nvdimm && !mc->nvdimm_supported) {
error_setg(errp, "NVDIMM hotplug not supported for this machine");
return;
}
size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
if (!is_nvdimm && size % SPAPR_MEMORY_BLOCK_SIZE) {
error_setg(errp, "Hotplugged memory size must be a multiple of "
"%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB);
return;
} else if (is_nvdimm) {
spapr_nvdimm_validate_opts(NVDIMM(dev), size, &local_err);
if (is_nvdimm) {
spapr_nvdimm_validate(hotplug_dev, NVDIMM(dev), size, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
} else if (size % SPAPR_MEMORY_BLOCK_SIZE) {
error_setg(errp, "Hotplugged memory size must be a multiple of "
"%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB);
return;
}
memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP,

View File

@ -36,11 +36,6 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
cpu_reset(cs);
/* All CPUs start halted. CPU0 is unhalted from the machine level
* reset code and the rest are explicitly started up by the guest
* using an RTAS call */
cs->halted = 1;
env->spr[SPR_HIOR] = 0;
lpcr = env->spr[SPR_LPCR];
@ -274,6 +269,11 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
cs = CPU(obj);
cpu = POWERPC_CPU(obj);
/*
* All CPUs start halted. CPU0 is unhalted from the machine level reset code
* and the rest are explicitly started up by the guest using an RTAS call.
*/
cs->start_powered_off = true;
cs->cpu_index = cc->core_id + i;
spapr_set_vcpu_id(cpu, cs->cpu_index, &local_err);
if (local_err) {

View File

@ -1873,42 +1873,6 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
return ret;
}
static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong opcode,
target_ulong *args)
{
target_ulong flags = args[0];
target_ulong procno = args[1];
PowerPCCPU *tcpu;
int idx;
/* only support procno from H_REGISTER_VPA */
if (flags != 0x1) {
return H_FUNCTION;
}
tcpu = spapr_find_cpu(procno);
if (tcpu == NULL) {
return H_P2;
}
/* sequence is the same as in the "ibm,associativity" property */
idx = 0;
#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \
((uint64_t)(b) & 0xffffffff))
args[idx++] = ASSOCIATIVITY(0, 0);
args[idx++] = ASSOCIATIVITY(0, tcpu->node_id);
args[idx++] = ASSOCIATIVITY(procno, -1);
for ( ; idx < 6; idx++) {
args[idx] = -1;
}
#undef ASSOCIATIVITY
return H_SUCCESS;
}
static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong opcode,
@ -2139,10 +2103,6 @@ static void hypercall_register_types(void)
spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support);
spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt);
/* Virtual Processor Home Node */
spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY,
h_home_node_associativity);
}
type_init(hypercall_register_types)

View File

@ -172,7 +172,7 @@ static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
* To cover both and not confuse the OS, add an early failure in
* QEMU.
*/
if (spapr->irq == &spapr_irq_xive) {
if (!spapr->irq->xics) {
error_setg(errp, "XIVE-only machines require a POWER9 CPU");
return -1;
}

242
hw/ppc/spapr_numa.c Normal file
View File

@ -0,0 +1,242 @@
/*
* QEMU PowerPC pSeries Logical Partition NUMA associativity handling
*
* Copyright IBM Corp. 2020
*
* Authors:
* Daniel Henrique Barboza <danielhb413@gmail.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "hw/ppc/spapr_numa.h"
#include "hw/pci-host/spapr.h"
#include "hw/ppc/fdt.h"
/* Moved from hw/ppc/spapr_pci_nvlink2.c */
#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
void spapr_numa_associativity_init(SpaprMachineState *spapr,
MachineState *machine)
{
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
int nb_numa_nodes = machine->numa_state->num_nodes;
int i, j, max_nodes_with_gpus;
/*
* For all associativity arrays: first position is the size,
* position MAX_DISTANCE_REF_POINTS is always the numa_id,
* represented by the index 'i'.
*
* This will break on sparse NUMA setups, when/if QEMU starts
* to support it, because there will be no more guarantee that
* 'i' will be a valid node_id set by the user.
*/
for (i = 0; i < nb_numa_nodes; i++) {
spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
}
/*
* Initialize NVLink GPU associativity arrays. We know that
* the first GPU will take the first available NUMA id, and
* we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine.
* At this point we're not sure if there are GPUs or not, but
* let's initialize the associativity arrays and allow NVLink
* GPUs to be handled like regular NUMA nodes later on.
*/
max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;
for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
spapr->numa_assoc_array[i][j] = gpu_assoc;
}
spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
}
}
void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
int offset, int nodeid)
{
_FDT((fdt_setprop(fdt, offset, "ibm,associativity",
spapr->numa_assoc_array[nodeid],
sizeof(spapr->numa_assoc_array[nodeid]))));
}
static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr,
PowerPCCPU *cpu)
{
uint32_t *vcpu_assoc = g_new(uint32_t, VCPU_ASSOC_SIZE);
int index = spapr_get_vcpu_id(cpu);
/*
* VCPUs have an extra 'cpu_id' value in ibm,associativity
* compared to other resources. Increment the size at index
* 0, put cpu_id last, then copy the remaining associativity
* domains.
*/
vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1);
vcpu_assoc[VCPU_ASSOC_SIZE - 1] = cpu_to_be32(index);
memcpy(vcpu_assoc + 1, spapr->numa_assoc_array[cpu->node_id] + 1,
(VCPU_ASSOC_SIZE - 2) * sizeof(uint32_t));
return vcpu_assoc;
}
int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt,
int offset, PowerPCCPU *cpu)
{
g_autofree uint32_t *vcpu_assoc = NULL;
vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu);
/* Advertise NUMA via ibm,associativity */
return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc,
VCPU_ASSOC_SIZE * sizeof(uint32_t));
}
int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
int offset)
{
MachineState *machine = MACHINE(spapr);
int nb_numa_nodes = machine->numa_state->num_nodes;
int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
uint32_t *int_buf, *cur_index, buf_len;
int ret, i;
/* ibm,associativity-lookup-arrays */
buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t);
cur_index = int_buf = g_malloc0(buf_len);
int_buf[0] = cpu_to_be32(nr_nodes);
/* Number of entries per associativity list */
int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
cur_index += 2;
for (i = 0; i < nr_nodes; i++) {
/*
* For the lookup-array we use the ibm,associativity array,
* from numa_assoc_array. without the first element (size).
*/
uint32_t *associativity = spapr->numa_assoc_array[i];
memcpy(cur_index, ++associativity,
sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS);
cur_index += MAX_DISTANCE_REF_POINTS;
}
ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
(cur_index - int_buf) * sizeof(uint32_t));
g_free(int_buf);
return ret;
}
/*
* Helper that writes ibm,associativity-reference-points and
* max-associativity-domains in the RTAS pointed by @rtas
* in the DT @fdt.
*/
void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
{
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
uint32_t refpoints[] = {
cpu_to_be32(0x4),
cpu_to_be32(0x4),
cpu_to_be32(0x2),
};
uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
uint32_t maxdomains[] = {
cpu_to_be32(4),
maxdomain,
maxdomain,
maxdomain,
cpu_to_be32(spapr->gpu_numa_id),
};
if (smc->pre_5_1_assoc_refpoints) {
nr_refpoints = 2;
}
_FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
refpoints, nr_refpoints * sizeof(refpoints[0])));
_FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
maxdomains, sizeof(maxdomains)));
}
static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong opcode,
target_ulong *args)
{
g_autofree uint32_t *vcpu_assoc = NULL;
target_ulong flags = args[0];
target_ulong procno = args[1];
PowerPCCPU *tcpu;
int idx, assoc_idx;
/* only support procno from H_REGISTER_VPA */
if (flags != 0x1) {
return H_FUNCTION;
}
tcpu = spapr_find_cpu(procno);
if (tcpu == NULL) {
return H_P2;
}
/*
* Given that we want to be flexible with the sizes and indexes,
* we must consider that there is a hard limit of how many
* associativities domain we can fit in R4 up to R9, which would be
* 12 associativity domains for vcpus. Assert and bail if that's
* not the case.
*/
G_STATIC_ASSERT((VCPU_ASSOC_SIZE - 1) <= 12);
vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu);
/* assoc_idx starts at 1 to skip associativity size */
assoc_idx = 1;
#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \
((uint64_t)(b) & 0xffffffff))
for (idx = 0; idx < 6; idx++) {
int32_t a, b;
/*
* vcpu_assoc[] will contain the associativity domains for tcpu,
* including tcpu->node_id and procno, meaning that we don't
* need to use these variables here.
*
* We'll read 2 values at a time to fill up the ASSOCIATIVITY()
* macro. The ternary will fill the remaining registers with -1
* after we went through vcpu_assoc[].
*/
a = assoc_idx < VCPU_ASSOC_SIZE ?
be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
b = assoc_idx < VCPU_ASSOC_SIZE ?
be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
args[idx] = ASSOCIATIVITY(a, b);
}
#undef ASSOCIATIVITY
return H_SUCCESS;
}
static void spapr_numa_register_types(void)
{
/* Virtual Processor Home Node */
spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY,
h_home_node_associativity);
}
type_init(spapr_numa_register_types)

View File

@ -27,16 +27,41 @@
#include "hw/ppc/spapr_nvdimm.h"
#include "hw/mem/nvdimm.h"
#include "qemu/nvdimm-utils.h"
#include "qemu/option.h"
#include "hw/ppc/fdt.h"
#include "qemu/range.h"
#include "sysemu/sysemu.h"
#include "hw/ppc/spapr_numa.h"
void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size,
Error **errp)
void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
uint64_t size, Error **errp)
{
char *uuidstr = NULL;
const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
const MachineState *ms = MACHINE(hotplug_dev);
const char *nvdimm_opt = qemu_opt_get(qemu_get_machine_opts(), "nvdimm");
g_autofree char *uuidstr = NULL;
QemuUUID uuid;
int ret;
if (!mc->nvdimm_supported) {
error_setg(errp, "NVDIMM hotplug not supported for this machine");
return;
}
/*
* NVDIMM support went live in 5.1 without considering that, in
* other archs, the user needs to enable NVDIMM support with the
* 'nvdimm' machine option and the default behavior is NVDIMM
* support disabled. It is too late to roll back to the standard
* behavior without breaking 5.1 guests. What we can do is to
* ensure that, if the user sets nvdimm=off, we error out
* regardless of being 5.1 or newer.
*/
if (!ms->nvdimms_state->is_enabled && nvdimm_opt) {
error_setg(errp, "nvdimm device found but 'nvdimm=off' was set");
return;
}
if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP,
&error_abort) == 0) {
error_setg(errp, "PAPR requires NVDIMM devices to have label-size set");
@ -54,7 +79,6 @@ void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size,
&error_abort);
ret = qemu_uuid_parse(uuidstr, &uuid);
g_assert(!ret);
g_free(uuidstr);
if (qemu_uuid_is_null(&uuid)) {
error_setg(errp, "NVDIMM device requires the uuid to be set");
@ -83,16 +107,6 @@ void spapr_add_nvdimm(DeviceState *dev, uint64_t slot, Error **errp)
}
}
int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
void *fdt, int *fdt_start_offset, Error **errp)
{
NVDIMMDevice *nvdimm = NVDIMM(drc->dev);
*fdt_start_offset = spapr_dt_nvdimm(fdt, 0, nvdimm);
return 0;
}
void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr)
{
MachineState *machine = MACHINE(spapr);
@ -104,8 +118,8 @@ void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr)
}
int spapr_dt_nvdimm(void *fdt, int parent_offset,
NVDIMMDevice *nvdimm)
static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
int parent_offset, NVDIMMDevice *nvdimm)
{
int child_offset;
char *buf;
@ -115,11 +129,6 @@ int spapr_dt_nvdimm(void *fdt, int parent_offset,
&error_abort);
uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP,
&error_abort);
uint32_t associativity[] = {
cpu_to_be32(0x4), /* length */
cpu_to_be32(0x0), cpu_to_be32(0x0),
cpu_to_be32(0x0), cpu_to_be32(node)
};
uint64_t lsize = nvdimm->label_size;
uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
NULL);
@ -139,8 +148,7 @@ int spapr_dt_nvdimm(void *fdt, int parent_offset,
_FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory")));
_FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory")));
_FDT((fdt_setprop(fdt, child_offset, "ibm,associativity", associativity,
sizeof(associativity))));
spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node);
buf = qemu_uuid_unparse_strdup(&nvdimm->uuid);
_FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf)));
@ -161,7 +169,17 @@ int spapr_dt_nvdimm(void *fdt, int parent_offset,
return child_offset;
}
void spapr_dt_persistent_memory(void *fdt)
int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
void *fdt, int *fdt_start_offset, Error **errp)
{
NVDIMMDevice *nvdimm = NVDIMM(drc->dev);
*fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm);
return 0;
}
void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt)
{
int offset = fdt_subnode_offset(fdt, 0, "persistent-memory");
GSList *iter, *nvdimms = nvdimm_get_device_list();
@ -179,7 +197,7 @@ void spapr_dt_persistent_memory(void *fdt)
for (iter = nvdimms; iter; iter = iter->next) {
NVDIMMDevice *nvdimm = iter->data;
spapr_dt_nvdimm(fdt, offset, nvdimm);
spapr_dt_nvdimm(spapr, fdt, offset, nvdimm);
}
g_slist_free(nvdimms);

View File

@ -52,6 +52,7 @@
#include "sysemu/kvm.h"
#include "sysemu/hostmem.h"
#include "sysemu/numa.h"
#include "hw/ppc/spapr_numa.h"
/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
#define RTAS_QUERY_FN 0
@ -2321,11 +2322,6 @@ int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
cpu_to_be32(1),
cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
};
uint32_t associativity[] = {cpu_to_be32(0x4),
cpu_to_be32(0x0),
cpu_to_be32(0x0),
cpu_to_be32(0x0),
cpu_to_be32(phb->numa_node)};
SpaprTceTable *tcet;
SpaprDrc *drc;
Error *err = NULL;
@ -2358,8 +2354,7 @@ int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
/* Advertise NUMA via ibm,associativity */
if (phb->numa_node != -1) {
_FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
sizeof(associativity)));
spapr_numa_write_associativity_dt(spapr, fdt, bus_off, phb->numa_node);
}
/* Build the interrupt-map, this must matches what is done

View File

@ -26,6 +26,7 @@
#include "qemu-common.h"
#include "hw/pci/pci.h"
#include "hw/pci-host/spapr.h"
#include "hw/ppc/spapr_numa.h"
#include "qemu/error-report.h"
#include "hw/ppc/fdt.h"
#include "hw/pci/pci_bridge.h"
@ -37,8 +38,6 @@
#define PHANDLE_NVLINK(phb, gn, nn) (0x00130000 | (((phb)->index) << 8) | \
((gn) << 4) | (nn))
#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
typedef struct SpaprPhbPciNvGpuSlot {
uint64_t tgt;
uint64_t gpa;
@ -360,13 +359,6 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev),
"nvlink2-mr[0]",
&error_abort);
uint32_t associativity[] = {
cpu_to_be32(0x4),
cpu_to_be32(nvslot->numa_id),
cpu_to_be32(nvslot->numa_id),
cpu_to_be32(nvslot->numa_id),
cpu_to_be32(nvslot->numa_id)
};
uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL);
uint64_t mem_reg[2] = { cpu_to_be64(nvslot->gpa), cpu_to_be64(size) };
char *mem_name = g_strdup_printf("memory@%"PRIx64, nvslot->gpa);
@ -376,14 +368,8 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
_FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
_FDT((fdt_setprop(fdt, off, "reg", mem_reg, sizeof(mem_reg))));
if (sphb->pre_5_1_assoc) {
associativity[1] = SPAPR_GPU_NUMA_ID;
associativity[2] = SPAPR_GPU_NUMA_ID;
associativity[3] = SPAPR_GPU_NUMA_ID;
}
_FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
sizeof(associativity))));
spapr_numa_write_associativity_dt(SPAPR_MACHINE(qdev_get_machine()),
fdt, off, nvslot->numa_id);
_FDT((fdt_setprop_string(fdt, off, "compatible",
"ibm,coherent-device-memory")));

View File

@ -1219,6 +1219,9 @@ static void spapr_vscsi_realize(SpaprVioDevice *dev, Error **errp)
scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev),
&vscsi_scsi_info, NULL);
/* ibmvscsi SCSI bus does not allow hotplug. */
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
}
void spapr_vscsi_create(SpaprVioBus *bus)

View File

@ -218,22 +218,12 @@ static void dummy_cpu_set_irq(void *opaque, int irq, int level)
{
}
static void main_cpu_reset(void *opaque)
static void sun4m_cpu_reset(void *opaque)
{
SPARCCPU *cpu = opaque;
CPUState *cs = CPU(cpu);
cpu_reset(cs);
cs->halted = 0;
}
static void secondary_cpu_reset(void *opaque)
{
SPARCCPU *cpu = opaque;
CPUState *cs = CPU(cpu);
cpu_reset(cs);
cs->halted = 1;
}
static void cpu_halt_signal(void *opaque, int irq, int level)
@ -819,21 +809,17 @@ static const TypeInfo ram_info = {
static void cpu_devinit(const char *cpu_type, unsigned int id,
uint64_t prom_addr, qemu_irq **cpu_irqs)
{
CPUState *cs;
SPARCCPU *cpu;
CPUSPARCState *env;
cpu = SPARC_CPU(cpu_create(cpu_type));
cpu = SPARC_CPU(object_new(cpu_type));
env = &cpu->env;
cpu_sparc_set_id(env, id);
if (id == 0) {
qemu_register_reset(main_cpu_reset, cpu);
} else {
qemu_register_reset(secondary_cpu_reset, cpu);
cs = CPU(cpu);
cs->halted = 1;
}
qemu_register_reset(sun4m_cpu_reset, cpu);
object_property_set_bool(OBJECT(cpu), "start-powered-off", id != 0,
&error_fatal);
qdev_realize_and_unref(DEVICE(cpu), NULL, &error_fatal);
*cpu_irqs = qemu_allocate_irqs(cpu_set_irq, cpu, MAX_PILS);
env->prom_addr = prom_addr;
}

View File

@ -374,6 +374,10 @@ struct CPUState {
bool created;
bool stop;
bool stopped;
/* Should CPU start in powered-off state? */
bool start_powered_off;
bool unplug;
bool crash_occurred;
bool exit_request;

View File

@ -53,6 +53,7 @@ enum ipmi_op {
#define IPMI_CC_INVALID_DATA_FIELD 0xcc
#define IPMI_CC_BMC_INIT_IN_PROGRESS 0xd2
#define IPMI_CC_COMMAND_NOT_SUPPORTED 0xd5
#define IPMI_CC_UNSPECIFIED 0xff
#define IPMI_NETFN_APP 0x06
#define IPMI_NETFN_OEM 0x3a

View File

@ -105,6 +105,21 @@ typedef enum {
#define FDT_MAX_SIZE 0x100000
/*
* NUMA related macros. MAX_DISTANCE_REF_POINTS was taken
* from Linux kernel arch/powerpc/mm/numa.h. It represents the
* amount of associativity domains for non-CPU resources.
*
* NUMA_ASSOC_SIZE is the base array size of an ibm,associativity
* array for any non-CPU resource.
*
* VCPU_ASSOC_SIZE represents the size of ibm,associativity array
* for CPUs, which has an extra element (vcpu_id) in the end.
*/
#define MAX_DISTANCE_REF_POINTS 4
#define NUMA_ASSOC_SIZE (MAX_DISTANCE_REF_POINTS + 1)
#define VCPU_ASSOC_SIZE (NUMA_ASSOC_SIZE + 1)
typedef struct SpaprCapabilities SpaprCapabilities;
struct SpaprCapabilities {
uint8_t caps[SPAPR_CAP_NUM];
@ -231,6 +246,8 @@ struct SpaprMachineState {
unsigned gpu_numa_id;
SpaprTpmProxy *tpm_proxy;
uint32_t numa_assoc_array[MAX_NODES][NUMA_ASSOC_SIZE];
Error *fwnmi_migration_blocker;
};

View File

@ -29,62 +29,21 @@
TYPE_SPAPR_DR_CONNECTOR)
#define TYPE_SPAPR_DRC_PHYSICAL "spapr-drc-physical"
#define SPAPR_DRC_PHYSICAL_GET_CLASS(obj) \
OBJECT_GET_CLASS(SpaprDrcClass, obj, TYPE_SPAPR_DRC_PHYSICAL)
#define SPAPR_DRC_PHYSICAL_CLASS(klass) \
OBJECT_CLASS_CHECK(SpaprDrcClass, klass, \
TYPE_SPAPR_DRC_PHYSICAL)
#define SPAPR_DRC_PHYSICAL(obj) OBJECT_CHECK(SpaprDrcPhysical, (obj), \
TYPE_SPAPR_DRC_PHYSICAL)
#define TYPE_SPAPR_DRC_LOGICAL "spapr-drc-logical"
#define SPAPR_DRC_LOGICAL_GET_CLASS(obj) \
OBJECT_GET_CLASS(SpaprDrcClass, obj, TYPE_SPAPR_DRC_LOGICAL)
#define SPAPR_DRC_LOGICAL_CLASS(klass) \
OBJECT_CLASS_CHECK(SpaprDrcClass, klass, \
TYPE_SPAPR_DRC_LOGICAL)
#define SPAPR_DRC_LOGICAL(obj) OBJECT_CHECK(SpaprDrc, (obj), \
TYPE_SPAPR_DRC_LOGICAL)
#define TYPE_SPAPR_DRC_CPU "spapr-drc-cpu"
#define SPAPR_DRC_CPU_GET_CLASS(obj) \
OBJECT_GET_CLASS(SpaprDrcClass, obj, TYPE_SPAPR_DRC_CPU)
#define SPAPR_DRC_CPU_CLASS(klass) \
OBJECT_CLASS_CHECK(SpaprDrcClass, klass, TYPE_SPAPR_DRC_CPU)
#define SPAPR_DRC_CPU(obj) OBJECT_CHECK(SpaprDrc, (obj), \
TYPE_SPAPR_DRC_CPU)
#define TYPE_SPAPR_DRC_PCI "spapr-drc-pci"
#define SPAPR_DRC_PCI_GET_CLASS(obj) \
OBJECT_GET_CLASS(SpaprDrcClass, obj, TYPE_SPAPR_DRC_PCI)
#define SPAPR_DRC_PCI_CLASS(klass) \
OBJECT_CLASS_CHECK(SpaprDrcClass, klass, TYPE_SPAPR_DRC_PCI)
#define SPAPR_DRC_PCI(obj) OBJECT_CHECK(SpaprDrc, (obj), \
TYPE_SPAPR_DRC_PCI)
#define TYPE_SPAPR_DRC_LMB "spapr-drc-lmb"
#define SPAPR_DRC_LMB_GET_CLASS(obj) \
OBJECT_GET_CLASS(SpaprDrcClass, obj, TYPE_SPAPR_DRC_LMB)
#define SPAPR_DRC_LMB_CLASS(klass) \
OBJECT_CLASS_CHECK(SpaprDrcClass, klass, TYPE_SPAPR_DRC_LMB)
#define SPAPR_DRC_LMB(obj) OBJECT_CHECK(SpaprDrc, (obj), \
TYPE_SPAPR_DRC_LMB)
#define TYPE_SPAPR_DRC_PHB "spapr-drc-phb"
#define SPAPR_DRC_PHB_GET_CLASS(obj) \
OBJECT_GET_CLASS(SpaprDrcClass, obj, TYPE_SPAPR_DRC_PHB)
#define SPAPR_DRC_PHB_CLASS(klass) \
OBJECT_CLASS_CHECK(SpaprDrcClass, klass, TYPE_SPAPR_DRC_PHB)
#define SPAPR_DRC_PHB(obj) OBJECT_CHECK(SpaprDrc, (obj), \
TYPE_SPAPR_DRC_PHB)
#define TYPE_SPAPR_DRC_PMEM "spapr-drc-pmem"
#define SPAPR_DRC_PMEM_GET_CLASS(obj) \
OBJECT_GET_CLASS(SpaprDrcClass, obj, TYPE_SPAPR_DRC_PMEM)
#define SPAPR_DRC_PMEM_CLASS(klass) \
OBJECT_CLASS_CHECK(SpaprDrcClass, klass, TYPE_SPAPR_DRC_PMEM)
#define SPAPR_DRC_PMEM(obj) OBJECT_CHECK(SpaprDrc, (obj), \
TYPE_SPAPR_DRC_PMEM)
/*
* Various hotplug types managed by SpaprDrc
*

View File

@ -0,0 +1,35 @@
/*
* QEMU PowerPC pSeries Logical Partition NUMA associativity handling
*
* Copyright IBM Corp. 2020
*
* Authors:
* Daniel Henrique Barboza <danielhb413@gmail.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef HW_SPAPR_NUMA_H
#define HW_SPAPR_NUMA_H
#include "hw/boards.h"
#include "hw/ppc/spapr.h"
/*
* Having both SpaprMachineState and MachineState as arguments
* feels odd, but it will spare a MACHINE() call inside the
* function. spapr_machine_init() is the only caller for it, and
* it has both pointers resolved already.
*/
void spapr_numa_associativity_init(SpaprMachineState *spapr,
MachineState *machine);
void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas);
void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
int offset, int nodeid);
int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt,
int offset, PowerPCCPU *cpu);
int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
int offset);
#endif /* HW_SPAPR_NUMA_H */

View File

@ -27,10 +27,9 @@ QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE);
int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
void *fdt, int *fdt_start_offset, Error **errp);
int spapr_dt_nvdimm(void *fdt, int parent_offset, NVDIMMDevice *nvdimm);
void spapr_dt_persistent_memory(void *fdt);
void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size,
Error **errp);
void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt);
void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
uint64_t size, Error **errp);
void spapr_add_nvdimm(DeviceState *dev, uint64_t slot, Error **errp);
void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr);

View File

@ -49,6 +49,8 @@ typedef struct SpaprXive {
void *tm_mmap;
MemoryRegion tm_mmio_kvm;
VMChangeStateEntry *change;
uint8_t hv_prio;
} SpaprXive;
typedef struct SpaprXiveClass {

View File

@ -174,8 +174,7 @@ static void arm_cpu_reset(DeviceState *dev)
env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1;
env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2;
cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON;
s->halted = cpu->start_powered_off;
cpu->power_state = s->start_powered_off ? PSCI_OFF : PSCI_ON;
if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q';
@ -2186,7 +2185,6 @@ static const ARMCPUInfo arm_cpus[] = {
};
static Property arm_cpu_properties[] = {
DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0),
DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0),
DEFINE_PROP_UINT64("mp-affinity", ARMCPU,

View File

@ -817,9 +817,6 @@ struct ARMCPU {
*/
uint32_t psci_version;
/* Should CPU start in PSCI powered-off state? */
bool start_powered_off;
/* Current power state, access guarded by BQL */
ARMPSCIState power_state;

View File

@ -218,7 +218,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
/* Determine init features for this CPU */
memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
if (cpu->start_powered_off) {
if (cs->start_powered_off) {
cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
}
if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {

View File

@ -774,7 +774,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
/* Determine init features for this CPU */
memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
if (cpu->start_powered_off) {
if (cs->start_powered_off) {
cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
}
if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {

View File

@ -291,9 +291,9 @@ static void s390_cpu_initfn(Object *obj)
S390CPU *cpu = S390_CPU(obj);
cpu_set_cpustate_pointers(cpu);
cs->halted = 1;
cs->exception_index = EXCP_HLT;
#if !defined(CONFIG_USER_ONLY)
cs->start_powered_off = true;
object_property_add(obj, "crash-information", "GuestPanicInformation",
s390_cpu_get_crash_info_qom, NULL, NULL, NULL);
cpu->env.tod_timer =