From 0c99d722e72dba458f4fa7876d96de5626780c94 Mon Sep 17 00:00:00 2001 From: Denis Plotnikov Date: Thu, 25 Mar 2021 18:12:15 +0300 Subject: [PATCH 1/9] vhost-user-blk: use different event handlers on initialization It is useful to use different connect/disconnect event handlers on device initialization and operation as seen from the further commit fixing a bug on device initialization. This patch refactors the code to make use of them: we don't rely any more on the VM state for choosing how to cleanup the device, instead we explicitly use the proper event handler depending on whether the device has been initialized. Signed-off-by: Denis Plotnikov Reviewed-by: Raphael Norwitz Message-Id: <20210325151217.262793-2-den-plotnikov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/vhost-user-blk.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index b870a50e6b..1af95ec6aa 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -362,7 +362,18 @@ static void vhost_user_blk_disconnect(DeviceState *dev) vhost_dev_cleanup(&s->dev); } -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event); +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, + bool realized); + +static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event) +{ + vhost_user_blk_event(opaque, event, false); +} + +static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event) +{ + vhost_user_blk_event(opaque, event, true); +} static void vhost_user_blk_chr_closed_bh(void *opaque) { @@ -371,11 +382,12 @@ static void vhost_user_blk_chr_closed_bh(void *opaque) VHostUserBlk *s = VHOST_USER_BLK(vdev); vhost_user_blk_disconnect(dev); - qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, - NULL, opaque, NULL, true); + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, + vhost_user_blk_event_oper, NULL, opaque, NULL, true); } -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, + bool realized) { DeviceState *dev = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(dev); @@ -406,7 +418,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) * TODO: maybe it is a good idea to make the same fix * for other vhost-user devices. */ - if (runstate_is_running()) { + if (realized) { AioContext *ctx = qemu_get_current_aio_context(); qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL, @@ -473,8 +485,9 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues); s->connected = false; - qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, - NULL, (void *)dev, NULL, true); + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, + vhost_user_blk_event_realize, NULL, (void *)dev, + NULL, true); reconnect: if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { @@ -494,6 +507,10 @@ reconnect: goto reconnect; } + /* we're fully initialized, now we can operate, so change the handler */ + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, + vhost_user_blk_event_oper, NULL, (void *)dev, + NULL, true); return; virtio_err: From bc79c87bcde6587a37347f81332fbb0cd6b14b85 Mon Sep 17 00:00:00 2001 From: Denis Plotnikov Date: Thu, 25 Mar 2021 18:12:16 +0300 Subject: [PATCH 2/9] vhost-user-blk: perform immediate cleanup if disconnect on initialization Commit 4bcad76f4c39 ("vhost-user-blk: delay vhost_user_blk_disconnect") introduced postponing vhost_dev cleanup aiming to eliminate qemu aborts because of connection problems with vhost-blk daemon. However, it introdues a new problem. Now, any communication errors during execution of vhost_dev_init() called by vhost_user_blk_device_realize() lead to qemu abort on assert in vhost_dev_get_config(). This happens because vhost_user_blk_disconnect() is postponed but it should have dropped s->connected flag by the time vhost_user_blk_device_realize() performs a new connection opening. On the connection opening, vhost_dev initialization in vhost_user_blk_connect() relies on s->connection flag and if it's not dropped, it skips vhost_dev initialization and returns with success. Then, vhost_user_blk_device_realize()'s execution flow goes to vhost_dev_get_config() where it's aborted on the assert. To fix the problem this patch adds immediate cleanup on device initialization(in vhost_user_blk_device_realize()) using different event handlers for initialization and operation introduced in the previous patch. On initialization (in vhost_user_blk_device_realize()) we fully control the initialization process. At that point, nobody can use the device since it isn't initialized and we don't need to postpone any cleanups, so we can do cleaup right away when there is a communication problem with the vhost-blk daemon. On operation we leave it as is, since the disconnect may happen when the device is in use, so the device users may want to use vhost_dev's data to do rollback before vhost_dev is re-initialized (e.g. in vhost_dev_set_log()). Signed-off-by: Denis Plotnikov Reviewed-by: Raphael Norwitz Message-Id: <20210325151217.262793-3-den-plotnikov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/vhost-user-blk.c | 48 +++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 1af95ec6aa..4e215f71f1 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -402,38 +402,38 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, break; case CHR_EVENT_CLOSED: /* - * A close event may happen during a read/write, but vhost - * code assumes the vhost_dev remains setup, so delay the - * stop & clear. There are two possible paths to hit this - * disconnect event: - * 1. When VM is in the RUN_STATE_PRELAUNCH state. The - * vhost_user_blk_device_realize() is a caller. - * 2. In tha main loop phase after VM start. - * - * For p2 the disconnect event will be delayed. We can't - * do the same for p1, because we are not running the loop - * at this moment. So just skip this step and perform - * disconnect in the caller function. - * - * TODO: maybe it is a good idea to make the same fix - * for other vhost-user devices. + * Closing the connection should happen differently on device + * initialization and operation stages. + * On initalization, we want to re-start vhost_dev initialization + * from the very beginning right away when the connection is closed, + * so we clean up vhost_dev on each connection closing. + * On operation, we want to postpone vhost_dev cleanup to let the + * other code perform its own cleanup sequence using vhost_dev data + * (e.g. vhost_dev_set_log). */ if (realized) { + /* + * A close event may happen during a read/write, but vhost + * code assumes the vhost_dev remains setup, so delay the + * stop & clear. + */ AioContext *ctx = qemu_get_current_aio_context(); qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL, NULL, NULL, false); aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque); - } - /* - * Move vhost device to the stopped state. The vhost-user device - * will be clean up and disconnected in BH. This can be useful in - * the vhost migration code. If disconnect was caught there is an - * option for the general vhost code to get the dev state without - * knowing its type (in this case vhost-user). - */ - s->dev.started = false; + /* + * Move vhost device to the stopped state. The vhost-user device + * will be clean up and disconnected in BH. This can be useful in + * the vhost migration code. If disconnect was caught there is an + * option for the general vhost code to get the dev state without + * knowing its type (in this case vhost-user). + */ + s->dev.started = false; + } else { + vhost_user_blk_disconnect(dev); + } break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: From 2b7d06c452014c88a13eec3a13b996aa3e9e2331 Mon Sep 17 00:00:00 2001 From: Denis Plotnikov Date: Thu, 25 Mar 2021 18:12:17 +0300 Subject: [PATCH 3/9] vhost-user-blk: add immediate cleanup on shutdown Qemu crashes on shutdown if the chardev used by vhost-user-blk has been finalized before the vhost-user-blk. This happens with char-socket chardev operating in the listening mode (server). The char-socket chardev emits "close" event at the end of finalizing when its internal data is destroyed. This calls vhost-user-blk event handler which in turn tries to manipulate with destroyed chardev by setting an empty event handler for vhost-user-blk cleanup postponing. This patch separates the shutdown case from the cleanup postponing removing the need to set an event handler. Signed-off-by: Denis Plotnikov Message-Id: <20210325151217.262793-4-den-plotnikov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/vhost-user-blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 4e215f71f1..0b5b9d44cd 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -411,7 +411,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, * other code perform its own cleanup sequence using vhost_dev data * (e.g. vhost_dev_set_log). */ - if (realized) { + if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) { /* * A close event may happen during a read/write, but vhost * code assumes the vhost_dev remains setup, so delay the From c3fd706165e9875a10606453ee2785dd51e987a5 Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Mon, 15 Mar 2021 13:59:36 +0200 Subject: [PATCH 4/9] virtio-pci: add check for vdev in virtio_pci_isr_read https://bugzilla.redhat.com/show_bug.cgi?id=1743098 This commit completes the solution of segfault in hot unplug flow (by commit ccec7e9603f446fe75c6c563ba335c00cfda6a06). Added missing check for vdev in virtio_pci_isr_read. Typical stack of crash: virtio_pci_isr_read ../hw/virtio/virtio-pci.c:1365 with proxy-vdev = 0 memory_region_read_accessor at ../softmmu/memory.c:442 access_with_adjusted_size at ../softmmu/memory.c:552 memory_region_dispatch_read1 at ../softmmu/memory.c:1420 memory_region_dispatch_read at ../softmmu/memory.c:1449 flatview_read_continue at ../softmmu/physmem.c:2822 flatview_read at ../softmmu/physmem.c:2862 address_space_read_full at ../softmmu/physmem.c:2875 Signed-off-by: Yuri Benditovich Message-Id: <20210315115937.14286-2-yuri.benditovich@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 883045a223..4a3dcee771 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1364,9 +1364,14 @@ static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr, { VirtIOPCIProxy *proxy = opaque; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - uint64_t val = qatomic_xchg(&vdev->isr, 0); - pci_irq_deassert(&proxy->pci_dev); + uint64_t val; + if (vdev == NULL) { + return 0; + } + + val = qatomic_xchg(&vdev->isr, 0); + pci_irq_deassert(&proxy->pci_dev); return val; } From 51e0e42cabe86b1d99055d23f7b732d8f9662208 Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Mon, 15 Mar 2021 13:59:37 +0200 Subject: [PATCH 5/9] virtio-pci: remove explicit initialization of val The value is assigned later in this procedure. Signed-off-by: Yuri Benditovich Message-Id: <20210315115937.14286-3-yuri.benditovich@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 4a3dcee771..c1b67cf6fc 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1385,10 +1385,10 @@ static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr, { VirtIOPCIProxy *proxy = opaque; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - uint64_t val = 0; + uint64_t val; if (vdev == NULL) { - return val; + return 0; } switch (size) { @@ -1401,6 +1401,9 @@ static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr, case 4: val = virtio_config_modern_readl(vdev, addr); break; + default: + val = 0; + break; } return val; } From 0fd7432533eea3d4d96c73f0393fcb82a6905f6d Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 23 Mar 2021 13:52:24 -0700 Subject: [PATCH 6/9] acpi/piix4: reinitialize acpi PM device on reset Commit 6be8cf56bc8b made sure that SCI is enabled in PM1.CNT on reset in acpi_only mode by modifying acpi_pm1_cnt_reset() and that worked for q35 as expected. The function was introduced by commit eaba51c573a (acpi, acpi_piix, vt82c686: factor out PM1_CNT logic) that forgot to actually call it at piix4 reset time and as result SCI_EN wasn't set as was expected by 6be8cf56bc8b in acpi_only mode. So Windows crashes when it notices that SCI_EN is not set and FADT is not providing information about how to enable it anymore. Reproducer: qemu-system-x86_64 -enable-kvm -M pc-i440fx-6.0,smm=off -cdrom any_windows_10x64.iso Fix it by calling acpi_pm1_cnt_reset() at piix4 reset time. Occasionally this patch adds reset acpi PM related registers on piix4 reset time and de-assert sci. piix4_pm_realize() initializes acpi pm tmr, evt, cnt and gpe. Reset them on device reset. pm_reset() in ich9.c correctly calls corresponding reset functions. Fixes: 6be8cf56bc8b (acpi/core: always set SCI_EN when SMM isn't supported) Reported-by: Reinoud Zandijk Co-developed-by: Igor Mammedov Signed-off-by: Igor Mammedov Signed-off-by: Isaku Yamahata Message-Id: <8a5bbd19727045ec863523830078dd4ca63f6a9a.1616532563.git.isaku.yamahata@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/piix4.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 6056d51667..8f8b0e95e5 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -326,6 +326,13 @@ static void piix4_pm_reset(DeviceState *dev) /* Mark SMM as already inited (until KVM supports SMM). */ pci_conf[0x5B] = 0x02; } + + acpi_pm1_evt_reset(&s->ar); + acpi_pm1_cnt_reset(&s->ar); + acpi_pm_tmr_reset(&s->ar); + acpi_gpe_reset(&s->ar); + acpi_update_sci(&s->ar, s->irq); + pm_io_space_update(s); acpi_pcihp_reset(&s->acpi_pci_hotplug, !s->use_acpi_root_pci_hotplug); } From 0fae92a3133f48f7fb06907c3ed2765266fad9c8 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 23 Mar 2021 13:52:25 -0700 Subject: [PATCH 7/9] vt82c686.c: don't raise SCI when PCI_INTERRUPT_PIN isn't setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this patch, the following patch will triger clan runtime sanitizer warnings as follows. This patch proactively works around it. I leave a correct fix to v582c686.c maintainerfix as I'm not sure about fuloong2e device model. > MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))} > QTEST_QEMU_IMG=./qemu-img > G_TEST_DBUS_DAEMON=/home/petmay01/linaro/qemu-for-merges/tests/dbus-vmstate-daemon.sh > QTEST_QEMU_BINARY=./qemu-system-mips64el tests/qtest/qom-test --tap -k > PASS 1 qtest-mips64el/qom-test /mips64el/qom/loongson3-virt > PASS 2 qtest-mips64el/qom-test /mips64el/qom/none > PASS 3 qtest-mips64el/qom-test /mips64el/qom/magnum > PASS 4 qtest-mips64el/qom-test /mips64el/qom/mipssim > PASS 5 qtest-mips64el/qom-test /mips64el/qom/malta > ../../hw/pci/pci.c:252:30: runtime error: shift exponent -1 is negative > PASS 6 qtest-mips64el/qom-test /mips64el/qom/fuloong2e > PASS 7 qtest-mips64el/qom-test /mips64el/qom/boston > PASS 8 qtest-mips64el/qom-test /mips64el/qom/pica61 > > and similarly for eg > > MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))} > QTEST_QEMU_IMG=./qemu-img > G_TEST_DBUS_DAEMON=/home/petmay01/linaro/qemu-for-merges/tests/dbus-vmstate-daemon.sh > QTEST_QEMU_BINARY=./qemu-system-mips64el tests/qtest/endianness-test > --tap -k > ../../hw/pci/pci.c:252:30: runtime error: shift exponent -1 is negative > PASS 1 qtest-mips64el/endianness-test /mips64el/endianness/fuloong2e > ../../hw/pci/pci.c:252:30: runtime error: shift exponent -1 is negative > PASS 2 qtest-mips64el/endianness-test /mips64el/endianness/split/fuloong2e > ../../hw/pci/pci.c:252:30: runtime error: shift exponent -1 is negative > PASS 3 qtest-mips64el/endianness-test /mips64el/endianness/combine/fuloong2e Cc: BALATON Zoltan Cc: Huacai Chen Cc: "Philippe Mathieu-Daudé" Cc: Jiaxun Yang Reported-by: Peter Maydell Signed-off-by: Isaku Yamahata Message-Id: <62a5fc69e453fb848bfd4794bae1852a75af73c5.1616532563.git.isaku.yamahata@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/isa/vt82c686.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c index 05d084f698..f0fb309f12 100644 --- a/hw/isa/vt82c686.c +++ b/hw/isa/vt82c686.c @@ -144,7 +144,18 @@ static void pm_update_sci(ViaPMState *s) ACPI_BITMASK_POWER_BUTTON_ENABLE | ACPI_BITMASK_GLOBAL_LOCK_ENABLE | ACPI_BITMASK_TIMER_ENABLE)) != 0); - pci_set_irq(&s->dev, sci_level); + if (pci_get_byte(s->dev.config + PCI_INTERRUPT_PIN)) { + /* + * FIXME: + * Fix device model that realizes this PM device and remove + * this work around. + * The device model should wire SCI and setup + * PCI_INTERRUPT_PIN properly. + * If PIN# = 0(interrupt pin isn't used), don't raise SCI as + * work around. + */ + pci_set_irq(&s->dev, sci_level); + } /* schedule a timer interruption if needed */ acpi_pm_tmr_update(&s->ar, (s->ar.pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) && !(pmsts & ACPI_BITMASK_TIMER_STATUS)); From 44421c60c93f78a6d83358e57f22e8f0c1993dba Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 23 Mar 2021 13:52:26 -0700 Subject: [PATCH 8/9] isa/v582c686: Reinitialize ACPI PM device on reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6be8cf56bc8b made sure that SCI is enabled in PM1.CNT on reset in acpi_only mode by modifying acpi_pm1_cnt_reset() and that worked for q35 as expected. This patch adds reset ACPI PM related registers on vt82c686 reset time and de-assert sci. via_pm_realize() initializes acpi pm tmr, evt, cnt and gpe. Reset them on device reset. Cc: BALATON Zoltan Cc: Huacai Chen Cc: "Philippe Mathieu-Daudé" Cc: Jiaxun Yang Signed-off-by: Isaku Yamahata Message-Id: <0a3fe998525552860919a690ce83dab8f663ab99.1616532563.git.isaku.yamahata@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/isa/vt82c686.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c index f0fb309f12..98325bb32b 100644 --- a/hw/isa/vt82c686.c +++ b/hw/isa/vt82c686.c @@ -178,6 +178,11 @@ static void via_pm_reset(DeviceState *d) /* SMBus IO base */ pci_set_long(s->dev.config + 0x90, 1); + acpi_pm1_evt_reset(&s->ar); + acpi_pm1_cnt_reset(&s->ar); + acpi_pm_tmr_reset(&s->ar); + pm_update_sci(s); + pm_io_space_update(s); smb_io_space_update(s); } From 8ddf54324858ce5e35272efa449f27fc0a19f957 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 23 Mar 2021 13:52:27 -0700 Subject: [PATCH 9/9] pci: sprinkle assert in PCI pin number If a device model (a) doesn't set the value to a correct interrupt number and then (b) triggers an interrupt for itself, it's device model bug. Add assert on interrupt pin number to catch this kind of bug more obviously. Suggested-by: Peter Maydell Signed-off-by: Isaku Yamahata Message-Id: <9cf8ac3b17e162daac0971d7be32deb6a33ae6ec.1616532563.git.isaku.yamahata@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index ac9a24889c..8f35e13a0c 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1450,6 +1450,8 @@ static void pci_irq_handler(void *opaque, int irq_num, int level) PCIDevice *pci_dev = opaque; int change; + assert(0 <= irq_num && irq_num < PCI_NUM_PINS); + assert(level == 0 || level == 1); change = level - pci_irq_state(pci_dev, irq_num); if (!change) return; @@ -1469,6 +1471,7 @@ static inline int pci_intx(PCIDevice *pci_dev) qemu_irq pci_allocate_irq(PCIDevice *pci_dev) { int intx = pci_intx(pci_dev); + assert(0 <= intx && intx < PCI_NUM_PINS); return qemu_allocate_irq(pci_irq_handler, pci_dev, intx); }