From cd523a4181b152004fc0aed18381aefe600ac38e Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 2 Nov 2021 16:51:57 +0100 Subject: [PATCH 01/20] net/vhost-vdpa: fix memory leak in vhost_vdpa_get_max_queue_pairs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use g_autofree to ensure that `config` is freed when vhost_vdpa_get_max_queue_pairs() returns. Reported-by: Coverity (CID 1465228: RESOURCE_LEAK) Fixes: 402378407d ("vhost-vdpa: multiqueue support") Signed-off-by: Stefano Garzarella Message-Id: <20211102155157.241034-1-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé Acked-by: Jason Wang --- net/vhost-vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 49ab322511..373b706b90 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -214,7 +214,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) { unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); - struct vhost_vdpa_config *config; + g_autofree struct vhost_vdpa_config *config = NULL; __virtio16 *max_queue_pairs; uint64_t features; int ret; From b66cecb238d065628b85a18357d28d21618a4580 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Nov 2021 16:33:42 +0000 Subject: [PATCH 02/20] softmmu/qdev-monitor: fix use-after-free in qdev_set_id() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by Coverity (CID 1465222). Fixes: 4a1d937796de0fecd8b22d7dbebf87f38e8282fd ("softmmu/qdev-monitor: add error handling in qdev_set_id") Cc: Damien Hedde Cc: Kevin Wolf Cc: Michael S. Tsirkin Signed-off-by: Stefan Hajnoczi Message-Id: <20211102163342.31162-1-stefanha@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Kevin Wolf Reviewed-by: Marc-André Lureau Reviewed-by: Damien Hedde Reviewed-by: Markus Armbruster --- softmmu/qdev-monitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index f8b3a4cd82..588a62b88d 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -593,8 +593,8 @@ const char *qdev_set_id(DeviceState *dev, char *id, Error **errp) if (prop) { dev->id = id; } else { - g_free(id); error_setg(errp, "Duplicate device ID '%s'", id); + g_free(id); return NULL; } } else { From 245cf2c24eb0d5a4fe75151e1794aa5cd53b130d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 4 Nov 2021 09:56:24 +0100 Subject: [PATCH 03/20] vhost: Rename last_index to vq_index_end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The doc of this field pointed out that last_index is the last vq index. This is misleading, since it's actually one past the end of the vqs. Renaming and modifying comment. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20211104085625.2054959-2-eperezma@redhat.com> Reviewed-by: Juan Quintela Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 4 ++-- hw/virtio/vhost-vdpa.c | 2 +- include/hw/virtio/vhost.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 0d888f29a6..29f2c4212f 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -232,10 +232,10 @@ fail: } static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, - int last_index) + int vq_index_end) { net->dev.vq_index = vq_index; - net->dev.last_index = last_index; + net->dev.vq_index_end = vq_index_end; } static int vhost_net_start_one(struct vhost_net *net, diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 0d8051426c..bcaf00e09f 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -645,7 +645,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); } - if (dev->vq_index + dev->nvqs != dev->last_index) { + if (dev->vq_index + dev->nvqs != dev->vq_index_end) { return 0; } diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 3fa0b554ef..58a73e7b7a 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -74,8 +74,8 @@ struct vhost_dev { unsigned int nvqs; /* the first virtqueue which would be used by this vhost dev */ int vq_index; - /* the last vq index for the virtio device (not vhost) */ - int last_index; + /* one past the last vq index for the virtio device (not vhost) */ + int vq_index_end; /* if non-zero, minimum required value for max_queues */ int num_queues; uint64_t features; From 14c81b2191fffb61cd6d2a7a839d34ec2d5e09a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 4 Nov 2021 09:56:25 +0100 Subject: [PATCH 04/20] vhost: Fix last vq queue index of devices with no cvq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The -1 assumes that cvq device model is accounted in data_queue_pairs, if cvq does not exists, but it's actually the opposite: Devices with !cvq are ok but devices with cvq does not add the last queue to data_queue_pairs. This is not a problem to vhost-net, but it is to vhost-vdpa: * Devices with cvq gets initialized at last data vq device model, not at cvq one. * Devices with !cvq never gets initialized, since last_index is the first queue of the last device model. Because of that, the right change in last_index is to actually add the cvq, not to remove the missing one. This is not a problem to vhost-net, but it is to vhost-vdpa, which device model trust to reach the last index to finish starting the device. Also, as the previous commit, rename it to index_end. Tested with vp_vdpa with host's vhost=on and vhost=off, with ctrl_vq=on and ctrl_vq=off. Fixes: 049eb15b5fc9 ("vhost: record the last virtqueue index for the virtio device") Reviewed-by: Juan Quintela Signed-off-by: Eugenio Pérez Message-Id: <20211104085625.2054959-3-eperezma@redhat.com> Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 29f2c4212f..30379d2ca4 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -326,11 +326,11 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, VirtIONet *n = VIRTIO_NET(dev); int nvhosts = data_queue_pairs + cvq; struct vhost_net *net; - int r, e, i, last_index = data_queue_pairs * 2; + int r, e, i, index_end = data_queue_pairs * 2; NetClientState *peer; - if (!cvq) { - last_index -= 1; + if (cvq) { + index_end += 1; } if (!k->set_guest_notifiers) { @@ -347,7 +347,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, } net = get_vhost_net(peer); - vhost_net_set_vq_index(net, i * 2, last_index); + vhost_net_set_vq_index(net, i * 2, index_end); /* Suppress the masking guest notifiers on vhost user * because vhost user doesn't interrupt masking/unmasking From be81ba604260f4beae1522241c579eeda1b891fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 6 Nov 2021 15:50:16 +0100 Subject: [PATCH 05/20] hw/mem/pc-dimm: Restrict NUMA-specific code to NUMA machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When trying to use the pc-dimm device on a non-NUMA machine, we get: $ qemu-system-arm -M none -cpu max -S \ -object memory-backend-file,id=mem1,size=1M,mem-path=/tmp/1m \ -device pc-dimm,id=dimm1,memdev=mem1 Segmentation fault (core dumped) (gdb) bt #0 pc_dimm_realize (dev=0x555556da3e90, errp=0x7fffffffcd10) at hw/mem/pc-dimm.c:184 #1 0x0000555555fe1f8f in device_set_realized (obj=0x555556da3e90, value=true, errp=0x7fffffffce18) at hw/core/qdev.c:531 #2 0x0000555555feb4a9 in property_set_bool (obj=0x555556da3e90, v=0x555556e54420, name=0x5555563c3c41 "realized", opaque=0x555556a704f0, errp=0x7fffffffce18) at qom/object.c:2257 To avoid that crash, restrict the pc-dimm NUMA check to machines supporting NUMA, and do not allow the use of 'node' property on non-NUMA machines. Suggested-by: Igor Mammedov Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20211106145016.611332-1-f4bug@amsat.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/pc-dimm.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index a3a2560301..48b913aba6 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -181,7 +181,21 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp) PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MachineState *ms = MACHINE(qdev_get_machine()); - int nb_numa_nodes = ms->numa_state->num_nodes; + + if (ms->numa_state) { + int nb_numa_nodes = ms->numa_state->num_nodes; + + if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) || + (!nb_numa_nodes && dimm->node)) { + error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %" + PRIu32 "' which exceeds the number of numa nodes: %d", + dimm->node, nb_numa_nodes ? nb_numa_nodes : 1); + return; + } + } else if (dimm->node > 0) { + error_setg(errp, "machine doesn't support NUMA"); + return; + } if (!dimm->hostmem) { error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set"); @@ -191,13 +205,6 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp) object_get_canonical_path_component(OBJECT(dimm->hostmem))); return; } - if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) || - (!nb_numa_nodes && dimm->node)) { - error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %" - PRIu32 "' which exceeds the number of numa nodes: %d", - dimm->node, nb_numa_nodes ? nb_numa_nodes : 1); - return; - } if (ddc->realize) { ddc->realize(dimm, errp); From 2aa1842d6d79dcd1b84c58eeb44591a99a9e56df Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 12 Nov 2021 06:08:53 -0500 Subject: [PATCH 06/20] pcie: rename 'native-hotplug' to 'x-native-hotplug' Mark property as experimental/internal adding 'x-' prefix. Property was introduced in 6.1 and it should have provided ability to turn on native PCIE hotplug on port even when ACPI PCI hotplug is in use is user explicitly sets property on CLI. However that never worked since slot is wired to ACPI hotplug controller. Another non-intended usecase: disable native hotplug on slot when APCI based hotplug is disabled, which works but slot has 'hotplug' property for this taks. It should be relatively safe to rename it to experimental as no users should exist for it and given that the property is broken we don't really want to leave it around for much longer lest users start using it. Signed-off-by: Igor Mammedov Reviewed-by: Ani Sinha Message-Id: <20211112110857.3116853-2-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_q35.c | 2 +- hw/pci/pcie_port.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 797e09500b..fc34b905ee 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -243,7 +243,7 @@ static void pc_q35_init(MachineState *machine) NULL); if (acpi_pcihp) { - object_register_sugar_prop(TYPE_PCIE_SLOT, "native-hotplug", + object_register_sugar_prop(TYPE_PCIE_SLOT, "x-native-hotplug", "false", true); } diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index da850e8dde..e95c1e5519 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -148,7 +148,7 @@ static Property pcie_slot_props[] = { DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true), - DEFINE_PROP_BOOL("native-hotplug", PCIESlot, native_hotplug, true), + DEFINE_PROP_BOOL("x-native-hotplug", PCIESlot, native_hotplug, true), DEFINE_PROP_END_OF_LIST() }; From c318bef76206c2ecb6016e8e68c4ac6ff9a4c8cb Mon Sep 17 00:00:00 2001 From: Julia Suvorova Date: Fri, 12 Nov 2021 06:08:54 -0500 Subject: [PATCH 07/20] hw/acpi/ich9: Add compat prop to keep HPC bit set for 6.1 machine type To solve issues [1-2] the Hot Plug Capable bit in PCIe Slots will be turned on, while the switch to ACPI Hot-plug will be done in the DSDT table. Introducing 'x-keep-native-hpc' property disables the HPC bit only in 6.1 and as a result keeps the forced 'reserve-io' on pcie-root-ports in 6.1 too. [1] https://gitlab.com/qemu-project/qemu/-/issues/641 [2] https://bugzilla.redhat.com/show_bug.cgi?id=2006409 Signed-off-by: Julia Suvorova Signed-off-by: Igor Mammedov Message-Id: <20211112110857.3116853-3-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ich9.c | 18 ++++++++++++++++++ hw/i386/pc.c | 2 ++ hw/i386/pc_q35.c | 7 ++++++- include/hw/acpi/ich9.h | 1 + 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 1ee2ba2c50..ebe08ed831 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -419,6 +419,20 @@ static void ich9_pm_set_acpi_pci_hotplug(Object *obj, bool value, Error **errp) s->pm.use_acpi_hotplug_bridge = value; } +static bool ich9_pm_get_keep_pci_slot_hpc(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + return s->pm.keep_pci_slot_hpc; +} + +static void ich9_pm_set_keep_pci_slot_hpc(Object *obj, bool value, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + s->pm.keep_pci_slot_hpc = value; +} + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; @@ -428,6 +442,7 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) pm->disable_s4 = 0; pm->s4_val = 2; pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, &pm->pm_io_base, OBJ_PROP_FLAG_READ); @@ -454,6 +469,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) object_property_add_bool(obj, ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, ich9_pm_get_acpi_pci_hotplug, ich9_pm_set_acpi_pci_hotplug); + object_property_add_bool(obj, "x-keep-pci-slot-hpc", + ich9_pm_get_keep_pci_slot_hpc, + ich9_pm_set_keep_pci_slot_hpc); } void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 2592a82148..a2ef40ecbc 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -98,6 +98,7 @@ GlobalProperty pc_compat_6_1[] = { { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, + { "ICH9-LPC", "x-keep-pci-slot-hpc", "false" }, }; const size_t pc_compat_6_1_len = G_N_ELEMENTS(pc_compat_6_1); @@ -107,6 +108,7 @@ GlobalProperty pc_compat_6_0[] = { { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, + { "ICH9-LPC", "x-keep-pci-slot-hpc", "true" }, }; const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index fc34b905ee..e1e100316d 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -137,6 +137,7 @@ static void pc_q35_init(MachineState *machine) DriveInfo *hd[MAX_SATA_PORTS]; MachineClass *mc = MACHINE_GET_CLASS(machine); bool acpi_pcihp; + bool keep_pci_slot_hpc; /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping @@ -242,7 +243,11 @@ static void pc_q35_init(MachineState *machine) ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, NULL); - if (acpi_pcihp) { + keep_pci_slot_hpc = object_property_get_bool(OBJECT(lpc), + "x-keep-pci-slot-hpc", + NULL); + + if (!keep_pci_slot_hpc && acpi_pcihp) { object_register_sugar_prop(TYPE_PCIE_SLOT, "x-native-hotplug", "false", true); } diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index f04f1791bd..7ca92843c6 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -56,6 +56,7 @@ typedef struct ICH9LPCPMRegs { AcpiCpuHotplug gpe_cpu; CPUHotplugState cpuhp_state; + bool keep_pci_slot_hpc; bool use_acpi_hotplug_bridge; AcpiPciHpState acpi_pci_hotplug; MemHotplugState acpi_memory_hotplug; From be12e3a016f10f4daa1a248df97198f4b10ef8c4 Mon Sep 17 00:00:00 2001 From: Julia Suvorova Date: Fri, 12 Nov 2021 06:08:55 -0500 Subject: [PATCH 08/20] bios-tables-test: Allow changes in DSDT ACPI tables Prepare for changing the _OSC method in q35 DSDT. Signed-off-by: Julia Suvorova Signed-off-by: Igor Mammedov Acked-by: Ani Sinha Message-Id: <20211112110857.3116853-4-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..48e5634d4b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,17 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/q35/DSDT", +"tests/data/acpi/q35/DSDT.tis", +"tests/data/acpi/q35/DSDT.bridge", +"tests/data/acpi/q35/DSDT.mmio64", +"tests/data/acpi/q35/DSDT.ipmibt", +"tests/data/acpi/q35/DSDT.cphp", +"tests/data/acpi/q35/DSDT.memhp", +"tests/data/acpi/q35/DSDT.acpihmat", +"tests/data/acpi/q35/DSDT.numamem", +"tests/data/acpi/q35/DSDT.dimmpxm", +"tests/data/acpi/q35/DSDT.nohpet", +"tests/data/acpi/q35/DSDT.tis.tpm2", +"tests/data/acpi/q35/DSDT.tis.tpm12", +"tests/data/acpi/q35/DSDT.multi-bridge", +"tests/data/acpi/q35/DSDT.ivrs", +"tests/data/acpi/q35/DSDT.xapic", From 211afe5c69b597acf85fdd577eb497f5be1ffbd8 Mon Sep 17 00:00:00 2001 From: Julia Suvorova Date: Fri, 12 Nov 2021 06:08:56 -0500 Subject: [PATCH 09/20] hw/i386/acpi-build: Deny control on PCIe Native Hot-plug in _OSC There are two ways to enable ACPI PCI Hot-plug: * Disable the Hot-plug Capable bit on PCIe slots. This was the first approach which led to regression [1-2], as I/O space for a port is allocated only when it is hot-pluggable, which is determined by HPC bit. * Leave the HPC bit on and disable PCIe Native Hot-plug in _OSC method. This removes the (future) ability of hot-plugging switches with PCIe Native hotplug since ACPI PCI Hot-plug only works with cold-plugged bridges. If the user wants to explicitely use this feature, they can disable ACPI PCI Hot-plug with: --global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off Change the bit in _OSC method so that the OS selects ACPI PCI Hot-plug instead of PCIe Native. [1] https://gitlab.com/qemu-project/qemu/-/issues/641 [2] https://bugzilla.redhat.com/show_bug.cgi?id=2006409 Signed-off-by: Julia Suvorova Signed-off-by: Igor Mammedov Message-Id: <20211112110857.3116853-5-imammedo@redhat.com> Reviewed-by: Ani Sinha Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index a3ad6abd33..a99c6e4fe3 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1337,7 +1337,7 @@ static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) aml_append(table, scope); } -static Aml *build_q35_osc_method(void) +static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug) { Aml *if_ctx; Aml *if_ctx2; @@ -1359,8 +1359,10 @@ static Aml *build_q35_osc_method(void) /* * Always allow native PME, AER (no dependencies) * Allow SHPC (PCI bridges can have SHPC controller) + * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled. */ - aml_append(if_ctx, aml_and(a_ctrl, aml_int(0x1F), a_ctrl)); + aml_append(if_ctx, aml_and(a_ctrl, + aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl)); if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); /* Unknown revision */ @@ -1449,7 +1451,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_ADR", aml_int(0))); aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); - aml_append(dev, build_q35_osc_method()); + aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en)); aml_append(sb_scope, dev); if (mcfg_valid) { aml_append(sb_scope, build_q35_dram_controller(&mcfg)); @@ -1565,7 +1567,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, if (pci_bus_is_express(bus)) { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); - aml_append(dev, build_q35_osc_method()); + + /* Expander bridges do not have ACPI PCI Hot-plug enabled */ + aml_append(dev, build_q35_osc_method(true)); } else { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); } From 7e6055c99f2f1f5e6a206e8d07a9f45508832611 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 12 Nov 2021 06:08:57 -0500 Subject: [PATCH 10/20] tests: bios-tables-test update expected blobs The changes are the result of 'hw/i386/acpi-build: Deny control on PCIe Native Hot-Plug in _OSC' which hides PCIE hotplug bit in host-bridge _OSC Method (_OSC, 4, NotSerialized) // _OSC: Operating System Capabilities { CreateDWordField (Arg3, Zero, CDW1) If ((Arg0 == ToUUID ("33db4d5b-1ff7-401c-9657-7441c03dd766") /* PCI Host Bridge Device */)) { CreateDWordField (Arg3, 0x04, CDW2) CreateDWordField (Arg3, 0x08, CDW3) Local0 = CDW3 /* \_SB_.PCI0._OSC.CDW3 */ - Local0 &= 0x1F + Local0 &= 0x1E Signed-off-by: Igor Mammedov Message-Id: <20211112110857.3116853-6-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/q35/DSDT | Bin 8289 -> 8289 bytes tests/data/acpi/q35/DSDT.acpihmat | Bin 9614 -> 9614 bytes tests/data/acpi/q35/DSDT.bridge | Bin 11003 -> 11003 bytes tests/data/acpi/q35/DSDT.cphp | Bin 8753 -> 8753 bytes tests/data/acpi/q35/DSDT.dimmpxm | Bin 9943 -> 9943 bytes tests/data/acpi/q35/DSDT.ipmibt | Bin 8364 -> 8364 bytes tests/data/acpi/q35/DSDT.ivrs | Bin 8306 -> 8306 bytes tests/data/acpi/q35/DSDT.memhp | Bin 9648 -> 9648 bytes tests/data/acpi/q35/DSDT.mmio64 | Bin 9419 -> 9419 bytes tests/data/acpi/q35/DSDT.multi-bridge | Bin 8583 -> 8583 bytes tests/data/acpi/q35/DSDT.nohpet | Bin 8147 -> 8147 bytes tests/data/acpi/q35/DSDT.numamem | Bin 8295 -> 8295 bytes tests/data/acpi/q35/DSDT.tis.tpm12 | Bin 8894 -> 8894 bytes tests/data/acpi/q35/DSDT.tis.tpm2 | Bin 8894 -> 8894 bytes tests/data/acpi/q35/DSDT.xapic | Bin 35652 -> 35652 bytes tests/qtest/bios-tables-test-allowed-diff.h | 16 ---------------- 16 files changed, 16 deletions(-) diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT index 281fc82c03b2562d2e6b7caec0d817b034a47138..c1965f6051ef2af81dd8412abe169d87845bb033 100644 GIT binary patch delta 24 gcmaFp@X&$FCDET<0BnZ{w*UYD delta 24 gcmaFp@X&$FCDET<0BnK?w*UYD diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat index 8c1e05a11a328ec1cc6f86e36e52c28f41f9744e..f24d4874bff8d327a165ed7c36de507aea114edd 100644 GIT binary patch delta 24 fcmeD4?(^ny33dtTQ)OUa+&+=(3ZvY{`|DKzU@Hhn delta 24 fcmeD4?(^ny33dtTQ)OUa+%}Qx3ZwkS`|DKzU?vDi diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge index 6f1464b6c712d7f33cb4b891b7ce76fe228f44c9..424d51bd1cb39ea73501ef7d0044ee52cec5bdac 100644 GIT binary patch delta 24 gcmewz`a6`%CDF$oF>WH)6-K#@_k$DxTWtqt delta 24 fcmdn!veAXhCDF$oF?J%?6-N1u_k$DxTWAMo diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm index fe5820d93d057ef09a001662369b15afbc5b87e2..76e451e829ec4c245315f7eed8731aa1be45a747 100644 GIT binary patch delta 24 gcmccad)=4ICDD~$3R?@yKo0BrFHn*aa+ diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp index 9bc11518fc57687ca789dc70793b48b29a0d74ed..4e9cb3dc6896bb79ccac0fe342a404549f6610e8 100644 GIT binary patch delta 24 gcmdnsy}_HyCD7Sg;8$f{S^uTTRaEr delta 24 fcmZp7Zg=K#33dr-S7cyd?48JUg;9Rv{S^uTTQ>*m diff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/q35/DSDT.nohpet index e8202e6ddfbe96071f32f1ec05758f650569943e..83d1aa00ac5686df479673fb0d7830f946e25dea 100644 GIT binary patch delta 24 gcmca?f7zbPCDn+a delta 24 gcmaFv@Z5pRCDn+a diff --git a/tests/data/acpi/q35/DSDT.tis.tpm12 b/tests/data/acpi/q35/DSDT.tis.tpm12 index c96b5277a14ae98174408d690d6e0246bd932623..0ebdf6fbd77967f1ab5d5337b7b1fed314cfaca8 100644 GIT binary patch delta 24 gcmdnzy3du%CDyjp@iVCN7s?mk^h31_s6r6S=N1%5A)#+64f6_X&Rh delta 26 icmX>yjp@iVCN7s?mk^h31_s9U6S=N1%5S`%+64f6@(F(c diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index 48e5634d4b..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,17 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/q35/DSDT", -"tests/data/acpi/q35/DSDT.tis", -"tests/data/acpi/q35/DSDT.bridge", -"tests/data/acpi/q35/DSDT.mmio64", -"tests/data/acpi/q35/DSDT.ipmibt", -"tests/data/acpi/q35/DSDT.cphp", -"tests/data/acpi/q35/DSDT.memhp", -"tests/data/acpi/q35/DSDT.acpihmat", -"tests/data/acpi/q35/DSDT.numamem", -"tests/data/acpi/q35/DSDT.dimmpxm", -"tests/data/acpi/q35/DSDT.nohpet", -"tests/data/acpi/q35/DSDT.tis.tpm2", -"tests/data/acpi/q35/DSDT.tis.tpm12", -"tests/data/acpi/q35/DSDT.multi-bridge", -"tests/data/acpi/q35/DSDT.ivrs", -"tests/data/acpi/q35/DSDT.xapic", From f463e761a41ee71e59892121e1c74d9c25c985d2 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 11 Nov 2021 14:38:53 +0800 Subject: [PATCH 11/20] virtio: use virtio accessor to access packed descriptor flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to access packed descriptor flags via address_space_{write|read}_cached(). When we hit the cache, memcpy() is used which is not an atomic operation which may lead a wrong value is read or wrote. So this patch switches to use virito_{stw|lduw}_phys_cached() to make sure the aceess is atomic. Fixes: 86044b24e865f ("virtio: basic packed virtqueue support") Cc: qemu-stable@nongnu.org Signed-off-by: Jason Wang Message-Id: <20211111063854.29060-1-jasowang@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index cc69a9b881..939bcbfeb9 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -507,11 +507,9 @@ static void vring_packed_desc_read_flags(VirtIODevice *vdev, MemoryRegionCache *cache, int i) { - address_space_read_cached(cache, - i * sizeof(VRingPackedDesc) + - offsetof(VRingPackedDesc, flags), - flags, sizeof(*flags)); - virtio_tswap16s(vdev, flags); + hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); + + *flags = virtio_lduw_phys_cached(vdev, cache, off); } static void vring_packed_desc_read(VirtIODevice *vdev, @@ -564,8 +562,7 @@ static void vring_packed_desc_write_flags(VirtIODevice *vdev, { hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); - virtio_tswap16s(vdev, &desc->flags); - address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags)); + virtio_stw_phys_cached(vdev, cache, off, desc->flags); address_space_cache_invalidate(cache, off, sizeof(desc->flags)); } From d152cdd6f6fad381e804c8185f0ba938030ccac9 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 11 Nov 2021 14:38:54 +0800 Subject: [PATCH 12/20] virtio: use virtio accessor to access packed event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to access packed descriptor event and off_wrap via address_space_{write|read}_cached(). When we hit the cache, memcpy() is used which is not atomic which may lead a wrong value to be read or wrote. This patch fixes this by switching to use virito_{stw|lduw}_phys_cached() to make sure the access is atomic. Fixes: 683f7665679c1 ("virtio: event suppression support for packed ring") Cc: qemu-stable@nongnu.org Signed-off-by: Jason Wang Message-Id: <20211111063854.29060-2-jasowang@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 939bcbfeb9..ea7c079fb0 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -247,13 +247,10 @@ static void vring_packed_event_read(VirtIODevice *vdev, hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap); hwaddr off_flags = offsetof(VRingPackedDescEvent, flags); - address_space_read_cached(cache, off_flags, &e->flags, - sizeof(e->flags)); + e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags); /* Make sure flags is seen before off_wrap */ smp_rmb(); - address_space_read_cached(cache, off_off, &e->off_wrap, - sizeof(e->off_wrap)); - virtio_tswap16s(vdev, &e->off_wrap); + e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off); virtio_tswap16s(vdev, &e->flags); } @@ -263,8 +260,7 @@ static void vring_packed_off_wrap_write(VirtIODevice *vdev, { hwaddr off = offsetof(VRingPackedDescEvent, off_wrap); - virtio_tswap16s(vdev, &off_wrap); - address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap)); + virtio_stw_phys_cached(vdev, cache, off, off_wrap); address_space_cache_invalidate(cache, off, sizeof(off_wrap)); } @@ -273,8 +269,7 @@ static void vring_packed_flags_write(VirtIODevice *vdev, { hwaddr off = offsetof(VRingPackedDescEvent, flags); - virtio_tswap16s(vdev, &flags); - address_space_write_cached(cache, off, &flags, sizeof(flags)); + virtio_stw_phys_cached(vdev, cache, off, flags); address_space_cache_invalidate(cache, off, sizeof(flags)); } From 0351152b6f16e23e65407b4c4b27e23b46aa3801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 12 Nov 2021 20:34:30 +0100 Subject: [PATCH 13/20] vdpa: Replace qemu_open_old by qemu_open at MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no reason to keep using the old one, since we neither use the variadics arguments nor open it with O_DIRECT. Also, net_client_init1, the caller of net_init_vhost_vdpa, wants all net_client_init_fun to use Error API, so it's a good step in that direction. Signed-off-by: Eugenio Pérez Message-Id: <20211112193431.2379298-2-eperezma@redhat.com> Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 373b706b90..1a7250b980 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -261,7 +261,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); opts = &netdev->u.vhost_vdpa; - vdpa_device_fd = qemu_open_old(opts->vhostdev, O_RDWR); + vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); if (vdpa_device_fd == -1) { return -errno; } From c829540401f280f0cad24131f4e7b92e81d506fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 12 Nov 2021 20:34:31 +0100 Subject: [PATCH 14/20] vdpa: Check for existence of opts.vhostdev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since net_init_vhost_vdpa is trying to open it. Not specifying it in the command line crash qemu. Fixes: 7327813d17 ("vhost-vdpa: open device fd in net_init_vhost_vdpa()") Signed-off-by: Eugenio Pérez Message-Id: <20211112193431.2379298-3-eperezma@redhat.com> Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 1a7250b980..2e3c22a8c7 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -260,6 +260,10 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); opts = &netdev->u.vhost_vdpa; + if (!opts->vhostdev) { + error_setg(errp, "vdpa character device not specified with vhostdev"); + return -1; + } vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); if (vdpa_device_fd == -1) { From 23786d13441d36e0efcfee94ba8ff218746bed6c Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:54 +0100 Subject: [PATCH 15/20] pci: implement power state This allows to power off pci devices. In "off" state the devices will not be visible. No pci config space access, no pci bar access, no dma. Default state is "on", so this patch (alone) should not change behavior. Use case: Allows hotplug controllers implement slot power. Hotplug controllers doing so should set the inital power state for devices in the ->plug callback. Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-2-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 25 +++++++++++++++++++++++-- hw/pci/pci_host.c | 6 ++++-- include/hw/pci/pci.h | 2 ++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 4a84e478ce..e5993c1ef5 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1380,6 +1380,9 @@ static void pci_update_mappings(PCIDevice *d) continue; new_addr = pci_bar_address(d, i, r->type, r->size); + if (!d->has_power) { + new_addr = PCI_BAR_UNMAPPED; + } /* This bar isn't changed */ if (new_addr == r->addr) @@ -1464,8 +1467,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int if (range_covers_byte(addr, l, PCI_COMMAND)) { pci_update_irq_disabled(d, was_irq_disabled); memory_region_set_enabled(&d->bus_master_enable_region, - pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER); + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); } msi_write_config(d, addr, val_in, l); @@ -2182,6 +2185,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) pci_qdev_unrealize(DEVICE(pci_dev)); return; } + + pci_set_power(pci_dev, true); } PCIDevice *pci_new_multifunction(int devfn, bool multifunction, @@ -2853,6 +2858,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) return msg; } +void pci_set_power(PCIDevice *d, bool state) +{ + if (d->has_power == state) { + return; + } + + d->has_power = state; + pci_update_mappings(d); + memory_region_set_enabled(&d->bus_master_enable_region, + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); + if (!d->has_power) { + pci_device_reset(d); + } +} + static const TypeInfo pci_device_type_info = { .name = TYPE_PCI_DEVICE, .parent = TYPE_DEVICE, diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index cf02f0d6a5..7beafd40a8 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -74,7 +74,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ - if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || + !pci_dev->has_power) { return; } @@ -97,7 +98,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ - if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || + !pci_dev->has_power) { return ~0x0; } diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 5c4016b995..e7cdf2d5ec 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -268,6 +268,7 @@ typedef struct PCIReqIDCache PCIReqIDCache; struct PCIDevice { DeviceState qdev; bool partially_hotplugged; + bool has_power; /* PCI config space */ uint8_t *config; @@ -908,5 +909,6 @@ extern const VMStateDescription vmstate_pci_device; } MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); +void pci_set_power(PCIDevice *pci_dev, bool state); #endif From d5daff7d312653b92f23c7a8e198090b32b8dae6 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:55 +0100 Subject: [PATCH 16/20] pcie: implement slot power control for pcie root ports With this patch hot-plugged pci devices will only be visible to the guest if the guests hotplug driver has enabled slot power. This should fix the hot-plug race which one can hit when hot-plugging a pci device at boot, while the guest is in the middle of the pci bus scan. Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-3-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 914a9bf3d1..13d11a57c7 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -366,6 +366,29 @@ static void hotplug_event_clear(PCIDevice *dev) } } +static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + bool *power = opaque; + + pci_set_power(dev, *power); +} + +static void pcie_cap_update_power(PCIDevice *hotplug_dev) +{ + uint8_t *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap; + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(hotplug_dev)); + uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); + bool power = true; + + if (sltcap & PCI_EXP_SLTCAP_PCP) { + power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON; + } + + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + pcie_set_power_device, &power); +} + /* * A PCI Express Hot-Plug Event has occurred, so update slot status register * and notify OS of the event if necessary. @@ -434,6 +457,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_DLLLA); } + pcie_cap_update_power(hotplug_pdev); return; } @@ -451,6 +475,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } pcie_cap_slot_event(hotplug_pdev, PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); + pcie_cap_update_power(hotplug_pdev); } } @@ -625,6 +650,7 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_ABP); + pcie_cap_update_power(dev); hotplug_event_update_event_status(dev); } @@ -705,6 +731,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC); } + pcie_cap_update_power(dev); hotplug_event_notify(dev); @@ -731,6 +758,7 @@ int pcie_cap_slot_post_load(void *opaque, int version_id) { PCIDevice *dev = opaque; hotplug_event_update_event_status(dev); + pcie_cap_update_power(dev); return 0; } From 81124b3c7a5dae34881cd8cd9631f125da81ef97 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:56 +0100 Subject: [PATCH 17/20] pcie: add power indicator blink check Refuse to push the attention button in case the guest is busy with some hotplug operation (as indicated by the power indicator blinking). Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-4-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 13d11a57c7..b92dbff118 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -506,6 +506,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); /* Check if hot-unplug is disabled on the slot */ if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { @@ -521,6 +522,12 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK) { + error_setg(errp, "Hot-unplug failed: " + "guest is busy (power indicator blinking)"); + return; + } + dev->pending_deleted_event = true; /* In case user cancel the operation of multi-function hot-add, From 44242d4d3d082a28200fdbecaed5398122682e6e Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:57 +0100 Subject: [PATCH 18/20] pcie: factor out pcie_cap_slot_unplug() No functional change. Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-5-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index b92dbff118..959bf074b2 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -497,6 +497,25 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) object_unparent(OBJECT(dev)); } +static void pcie_cap_slot_do_unplug(PCIDevice *dev) +{ + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); + + pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL); + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || + (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDC); +} + void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -676,7 +695,6 @@ void pcie_cap_slot_write_config(PCIDevice *dev, uint32_t pos = dev->exp.exp_cap; uint8_t *exp_cap = dev->config + pos; uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); - uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { /* @@ -726,17 +744,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF && (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { - PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); - pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL); - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); - if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || - (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); - } - pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDC); + pcie_cap_slot_do_unplug(dev); } pcie_cap_update_power(dev); From 0d33415a4eafe532f3beef8272811d927236a353 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:58 +0100 Subject: [PATCH 19/20] pcie: fast unplug when slot power is off In case the slot is powered off (and the power indicator turned off too) we can unplug right away, without round-trip to the guest. Also clear pending attention button press, there is nothing to care about any more. Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-6-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 959bf074b2..a930ac738a 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -560,6 +560,16 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + if (((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) && + ((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF)) { + /* slot is powered off -> unplug without round-trip to the guest */ + pcie_cap_slot_do_unplug(hotplug_pdev); + hotplug_event_notify(hotplug_pdev); + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_ABP); + return; + } + pcie_cap_slot_push_attention_button(hotplug_pdev); } From 18416c62e36a79823a9e28f6b2260aa13c25e1d9 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:59 +0100 Subject: [PATCH 20/20] pcie: expire pending delete Add an expire time for pending delete, once the time is over allow pressing the attention button again. This makes pcie hotplug behave more like acpi hotplug, where one can try sending an 'device_del' monitor command again in case the guest didn't respond to the first attempt. Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-7-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 2 ++ include/hw/qdev-core.h | 1 + softmmu/qdev-monitor.c | 4 +++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index a930ac738a..c5ed266337 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -548,6 +548,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, } dev->pending_deleted_event = true; + dev->pending_deleted_expires_ms = + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */ /* In case user cancel the operation of multi-function hot-add, * remove the function that is unexposed to guest individually, diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 72622bd337..20d3066595 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -181,6 +181,7 @@ struct DeviceState { char *canonical_path; bool realized; bool pending_deleted_event; + int64_t pending_deleted_expires_ms; QDict *opts; int hotplugged; bool allow_unplug_during_migration; diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index 588a62b88d..5925f1ae5f 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -943,7 +943,9 @@ void qmp_device_del(const char *id, Error **errp) { DeviceState *dev = find_device_state(id, errp); if (dev != NULL) { - if (dev->pending_deleted_event) { + if (dev->pending_deleted_event && + (dev->pending_deleted_expires_ms == 0 || + dev->pending_deleted_expires_ms > qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) { error_setg(errp, "Device %s is already in the " "process of unplug", id); return;