2008-12-04 20:38:57 +01:00
|
|
|
/*
|
|
|
|
* Virtio Support
|
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2007
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2016-01-26 19:17:07 +01:00
|
|
|
#include "qemu/osdep.h"
|
include/qemu/osdep.h: Don't include qapi/error.h
Commit 57cb38b included qapi/error.h into qemu/osdep.h to get the
Error typedef. Since then, we've moved to include qemu/osdep.h
everywhere. Its file comment explains: "To avoid getting into
possible circular include dependencies, this file should not include
any other QEMU headers, with the exceptions of config-host.h,
compiler.h, os-posix.h and os-win32.h, all of which are doing a
similar job to this file and are under similar constraints."
qapi/error.h doesn't do a similar job, and it doesn't adhere to
similar constraints: it includes qapi-types.h. That's in excess of
100KiB of crap most .c files don't actually need.
Add the typedef to qemu/typedefs.h, and include that instead of
qapi/error.h. Include qapi/error.h in .c files that need it and don't
get it now. Include qapi-types.h in qom/object.h for uint16List.
Update scripts/clean-includes accordingly. Update it further to match
reality: replace config.h by config-target.h, add sysemu/os-posix.h,
sysemu/os-win32.h. Update the list of includes in the qemu/osdep.h
comment quoted above similarly.
This reduces the number of objects depending on qapi/error.h from "all
of them" to less than a third. Unfortunately, the number depending on
qapi-types.h shrinks only a little. More work is needed for that one.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
[Fix compilation without the spice devel packages. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-14 09:01:28 +01:00
|
|
|
#include "qapi/error.h"
|
2022-08-11 14:24:39 +02:00
|
|
|
#include "qapi/qapi-commands-virtio.h"
|
2010-05-24 14:19:21 +02:00
|
|
|
#include "trace.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/error-report.h"
|
2020-10-28 14:47:03 +01:00
|
|
|
#include "qemu/log.h"
|
Include qemu/main-loop.h less
In my "build everything" tree, changing qemu/main-loop.h triggers a
recompile of some 5600 out of 6600 objects (not counting tests and
objects that don't depend on qemu/osdep.h). It includes block/aio.h,
which in turn includes qemu/event_notifier.h, qemu/notify.h,
qemu/processor.h, qemu/qsp.h, qemu/queue.h, qemu/thread-posix.h,
qemu/thread.h, qemu/timer.h, and a few more.
Include qemu/main-loop.h only where it's needed. Touching it now
recompiles only some 1700 objects. For block/aio.h and
qemu/event_notifier.h, these numbers drop from 5600 to 2800. For the
others, they shrink only slightly.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20190812052359.30071-21-armbru@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2019-08-12 07:23:50 +02:00
|
|
|
#include "qemu/main-loop.h"
|
2019-05-23 16:35:07 +02:00
|
|
|
#include "qemu/module.h"
|
2022-08-11 14:24:39 +02:00
|
|
|
#include "qom/object_interfaces.h"
|
2022-12-13 12:17:02 +01:00
|
|
|
#include "hw/core/cpu.h"
|
2013-02-05 17:06:20 +01:00
|
|
|
#include "hw/virtio/virtio.h"
|
2022-12-22 13:08:10 +01:00
|
|
|
#include "hw/virtio/vhost.h"
|
2019-08-12 07:23:39 +02:00
|
|
|
#include "migration/qemu-file-types.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/atomic.h"
|
2013-02-05 17:06:20 +01:00
|
|
|
#include "hw/virtio/virtio-bus.h"
|
2019-08-12 07:23:51 +02:00
|
|
|
#include "hw/qdev-properties.h"
|
2014-06-24 19:40:16 +02:00
|
|
|
#include "hw/virtio/virtio-access.h"
|
2016-12-30 11:09:10 +01:00
|
|
|
#include "sysemu/dma.h"
|
2019-08-12 07:23:59 +02:00
|
|
|
#include "sysemu/runstate.h"
|
2022-12-13 12:17:07 +01:00
|
|
|
#include "virtio-qmp.h"
|
|
|
|
|
2020-07-07 12:54:45 +02:00
|
|
|
#include "standard-headers/linux/virtio_ids.h"
|
qmp: decode feature & status bits in virtio-status
Display feature names instead of bitmaps for host, guest, and
backend for VirtIODevices.
Display status names instead of bitmaps for VirtIODevices.
Display feature names instead of bitmaps for backend, protocol,
acked, and features (hdev->features) for vhost devices.
Decode features according to device ID. Decode statuses
according to configuration status bitmap (config_status_map).
Decode vhost user protocol features according to vhost user
protocol bitmap (vhost_user_protocol_map).
Transport features are on the first line. Undecoded bits (if
any) are stored in a separate field.
[Jonah: Several changes made to this patch from prev. version (v14):
- Moved all device features mappings to hw/virtio/virtio.c
- Renamed device features mappings (less generic)
- Generalized @FEATURE_ENTRY macro for all device mappings
- Virtio device feature map definitions include descriptions of
feature bits
- Moved @VHOST_USER_F_PROTOCOL_FEATURES feature bit from transport
feature map to vhost-user-supported device feature mappings
(blk, fs, i2c, rng, net, gpu, input, scsi, vsock)
- New feature bit added for virtio-vsock: @VIRTIO_VSOCK_F_SEQPACKET
- New feature bit added for virtio-iommu: @VIRTIO_IOMMU_F_BYPASS_CONFIG
- New feature bit added for virtio-mem: @VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE
- New virtio transport feature bit added: @VIRTIO_F_IN_ORDER
- Added device feature map definition for virtio-rng
]
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
Message-Id: <1660220684-24909-4-git-send-email-jonah.palmer@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-08-11 14:24:41 +02:00
|
|
|
#include "standard-headers/linux/vhost_types.h"
|
|
|
|
#include "standard-headers/linux/virtio_blk.h"
|
|
|
|
#include "standard-headers/linux/virtio_console.h"
|
|
|
|
#include "standard-headers/linux/virtio_gpu.h"
|
|
|
|
#include "standard-headers/linux/virtio_net.h"
|
|
|
|
#include "standard-headers/linux/virtio_scsi.h"
|
|
|
|
#include "standard-headers/linux/virtio_i2c.h"
|
|
|
|
#include "standard-headers/linux/virtio_balloon.h"
|
|
|
|
#include "standard-headers/linux/virtio_iommu.h"
|
|
|
|
#include "standard-headers/linux/virtio_mem.h"
|
|
|
|
#include "standard-headers/linux/virtio_vsock.h"
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2022-12-22 09:00:05 +01:00
|
|
|
QmpVirtIODeviceList virtio_list;
|
2022-08-11 14:24:39 +02:00
|
|
|
|
qmp: decode feature & status bits in virtio-status
Display feature names instead of bitmaps for host, guest, and
backend for VirtIODevices.
Display status names instead of bitmaps for VirtIODevices.
Display feature names instead of bitmaps for backend, protocol,
acked, and features (hdev->features) for vhost devices.
Decode features according to device ID. Decode statuses
according to configuration status bitmap (config_status_map).
Decode vhost user protocol features according to vhost user
protocol bitmap (vhost_user_protocol_map).
Transport features are on the first line. Undecoded bits (if
any) are stored in a separate field.
[Jonah: Several changes made to this patch from prev. version (v14):
- Moved all device features mappings to hw/virtio/virtio.c
- Renamed device features mappings (less generic)
- Generalized @FEATURE_ENTRY macro for all device mappings
- Virtio device feature map definitions include descriptions of
feature bits
- Moved @VHOST_USER_F_PROTOCOL_FEATURES feature bit from transport
feature map to vhost-user-supported device feature mappings
(blk, fs, i2c, rng, net, gpu, input, scsi, vsock)
- New feature bit added for virtio-vsock: @VIRTIO_VSOCK_F_SEQPACKET
- New feature bit added for virtio-iommu: @VIRTIO_IOMMU_F_BYPASS_CONFIG
- New feature bit added for virtio-mem: @VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE
- New virtio transport feature bit added: @VIRTIO_F_IN_ORDER
- Added device feature map definition for virtio-rng
]
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
Message-Id: <1660220684-24909-4-git-send-email-jonah.palmer@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-08-11 14:24:41 +02:00
|
|
|
/*
|
|
|
|
* Maximum size of virtio device config space
|
|
|
|
*/
|
|
|
|
#define VHOST_USER_MAX_CONFIG_SIZE 256
|
|
|
|
|
2013-07-16 14:25:08 +02:00
|
|
|
/*
|
|
|
|
* The alignment to use between consumer and producer parts of vring.
|
|
|
|
* x86 pagesize again. This is the default, used by transports like PCI
|
|
|
|
* which don't provide a means for the guest to tell the host the alignment.
|
|
|
|
*/
|
2008-12-04 20:58:45 +01:00
|
|
|
#define VIRTIO_PCI_VRING_ALIGN 4096
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
typedef struct VRingDesc
|
|
|
|
{
|
|
|
|
uint64_t addr;
|
|
|
|
uint32_t len;
|
|
|
|
uint16_t flags;
|
|
|
|
uint16_t next;
|
|
|
|
} VRingDesc;
|
|
|
|
|
2019-10-25 10:35:20 +02:00
|
|
|
typedef struct VRingPackedDesc {
|
|
|
|
uint64_t addr;
|
|
|
|
uint32_t len;
|
|
|
|
uint16_t id;
|
|
|
|
uint16_t flags;
|
|
|
|
} VRingPackedDesc;
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
typedef struct VRingAvail
|
|
|
|
{
|
|
|
|
uint16_t flags;
|
|
|
|
uint16_t idx;
|
misc: Replace zero-length arrays with flexible array member (automatic)
Description copied from Linux kernel commit from Gustavo A. R. Silva
(see [3]):
--v-- description start --v--
The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to
declare variable-length types such as these ones is a flexible
array member [1], introduced in C99:
struct foo {
int stuff;
struct boo array[];
};
By making use of the mechanism above, we will get a compiler
warning in case the flexible array does not occur last in the
structure, which will help us prevent some kind of undefined
behavior bugs from being unadvertenly introduced [2] to the
Linux codebase from now on.
--^-- description end --^--
Do the similar housekeeping in the QEMU codebase (which uses
C99 since commit 7be41675f7cb).
All these instances of code were found with the help of the
following Coccinelle script:
@@
identifier s, m, a;
type t, T;
@@
struct s {
...
t m;
- T a[0];
+ T a[];
};
@@
identifier s, m, a;
type t, T;
@@
struct s {
...
t m;
- T a[0];
+ T a[];
} QEMU_PACKED;
[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=76497732932f
[3] https://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git/commit/?id=17642a2fbd2c1
Inspired-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-03-04 16:38:15 +01:00
|
|
|
uint16_t ring[];
|
2008-12-04 20:38:57 +01:00
|
|
|
} VRingAvail;
|
|
|
|
|
|
|
|
typedef struct VRingUsedElem
|
|
|
|
{
|
|
|
|
uint32_t id;
|
|
|
|
uint32_t len;
|
|
|
|
} VRingUsedElem;
|
|
|
|
|
|
|
|
typedef struct VRingUsed
|
|
|
|
{
|
|
|
|
uint16_t flags;
|
|
|
|
uint16_t idx;
|
misc: Replace zero-length arrays with flexible array member (automatic)
Description copied from Linux kernel commit from Gustavo A. R. Silva
(see [3]):
--v-- description start --v--
The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to
declare variable-length types such as these ones is a flexible
array member [1], introduced in C99:
struct foo {
int stuff;
struct boo array[];
};
By making use of the mechanism above, we will get a compiler
warning in case the flexible array does not occur last in the
structure, which will help us prevent some kind of undefined
behavior bugs from being unadvertenly introduced [2] to the
Linux codebase from now on.
--^-- description end --^--
Do the similar housekeeping in the QEMU codebase (which uses
C99 since commit 7be41675f7cb).
All these instances of code were found with the help of the
following Coccinelle script:
@@
identifier s, m, a;
type t, T;
@@
struct s {
...
t m;
- T a[0];
+ T a[];
};
@@
identifier s, m, a;
type t, T;
@@
struct s {
...
t m;
- T a[0];
+ T a[];
} QEMU_PACKED;
[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=76497732932f
[3] https://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git/commit/?id=17642a2fbd2c1
Inspired-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-03-04 16:38:15 +01:00
|
|
|
VRingUsedElem ring[];
|
2008-12-04 20:38:57 +01:00
|
|
|
} VRingUsed;
|
|
|
|
|
2017-01-27 16:40:17 +01:00
|
|
|
typedef struct VRingMemoryRegionCaches {
|
|
|
|
struct rcu_head rcu;
|
|
|
|
MemoryRegionCache desc;
|
|
|
|
MemoryRegionCache avail;
|
|
|
|
MemoryRegionCache used;
|
|
|
|
} VRingMemoryRegionCaches;
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
typedef struct VRing
|
|
|
|
{
|
|
|
|
unsigned int num;
|
2015-09-11 15:16:41 +02:00
|
|
|
unsigned int num_default;
|
2013-07-16 14:25:08 +02:00
|
|
|
unsigned int align;
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr desc;
|
|
|
|
hwaddr avail;
|
|
|
|
hwaddr used;
|
2017-01-27 16:40:17 +01:00
|
|
|
VRingMemoryRegionCaches *caches;
|
2008-12-04 20:38:57 +01:00
|
|
|
} VRing;
|
|
|
|
|
2019-10-25 10:35:20 +02:00
|
|
|
typedef struct VRingPackedDescEvent {
|
|
|
|
uint16_t off_wrap;
|
|
|
|
uint16_t flags;
|
|
|
|
} VRingPackedDescEvent ;
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
struct VirtQueue
|
|
|
|
{
|
|
|
|
VRing vring;
|
2019-10-25 10:35:24 +02:00
|
|
|
VirtQueueElement *used_elems;
|
2016-01-31 11:29:05 +01:00
|
|
|
|
|
|
|
/* Next head to pop */
|
2008-12-04 20:38:57 +01:00
|
|
|
uint16_t last_avail_idx;
|
2019-10-25 10:35:20 +02:00
|
|
|
bool last_avail_wrap_counter;
|
2016-01-31 11:29:04 +01:00
|
|
|
|
2016-01-31 11:29:05 +01:00
|
|
|
/* Last avail_idx read from VQ. */
|
|
|
|
uint16_t shadow_avail_idx;
|
2019-10-25 10:35:20 +02:00
|
|
|
bool shadow_avail_wrap_counter;
|
2016-01-31 11:29:05 +01:00
|
|
|
|
2016-01-31 11:29:04 +01:00
|
|
|
uint16_t used_idx;
|
2019-10-25 10:35:20 +02:00
|
|
|
bool used_wrap_counter;
|
2016-01-31 11:29:04 +01:00
|
|
|
|
2011-06-12 15:21:57 +02:00
|
|
|
/* Last used index value we have signalled on */
|
|
|
|
uint16_t signalled_used;
|
|
|
|
|
|
|
|
/* Last used index value we have signalled on */
|
|
|
|
bool signalled_used_valid;
|
|
|
|
|
Revert "virtio: turn vq->notification into a nested counter"
This reverts commit aff8fd18f1786fc5af259a9bc0077727222f51ca.
Both virtio-net and virtio-crypto do not balance
virtio_queue_set_notification() enable and disable calls. This makes
the notifications_disabled counter unreliable and Doug Goldstein
reported the following assertion failure:
#3 0x00007ffff44d1c62 in __GI___assert_fail (
assertion=assertion@entry=0x555555ae8e8a "vq->notification_disabled > 0",
file=file@entry=0x555555ae89c0 "/home/doug/work/qemu/hw/virtio/virtio.c",
line=line@entry=215,
function=function@entry=0x555555ae9630 <__PRETTY_FUNCTION__.43707>
"virtio_queue_set_notification") at assert.c:101
#4 0x00005555557f25d6 in virtio_queue_set_notification (vq=0x55555666aa90,
enable=enable@entry=1) at /home/doug/work/qemu/hw/virtio/virtio.c:215
#5 0x00005555557dc311 in virtio_net_has_buffers (q=<optimized out>,
q=<optimized out>, bufsize=102)
at /home/doug/work/qemu/hw/net/virtio-net.c:1008
#6 virtio_net_receive (nc=<optimized out>, buf=0x555557386b88 "", size=102)
at /home/doug/work/qemu/hw/net/virtio-net.c:1148
#7 0x00005555559cad33 in nc_sendv_compat (flags=<optimized out>, iovcnt=1,
iov=0x7fffead746d0, nc=0x55555788b340) at net/net.c:705
#8 qemu_deliver_packet_iov (sender=<optimized out>, flags=<optimized out>,
iov=0x7fffead746d0, iovcnt=1, opaque=0x55555788b340) at net/net.c:732
#9 0x00005555559cd929 in qemu_net_queue_deliver (size=<optimized out>,
data=<optimized out>, flags=<optimized out>, sender=<optimized out>,
queue=0x55555788b550) at net/queue.c:164
#10 qemu_net_queue_flush (queue=0x55555788b550) at net/queue.c:261
This patch is safe to revert since it's just an optimization for
virtqueue polling. The next patch will improve the situation again
without resorting to nesting.
Reported-by: Doug Goldstein <cardoe@cardoe.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Richard Henderson <rth@twiddle.net>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-01-12 12:46:10 +01:00
|
|
|
/* Notification enabled? */
|
|
|
|
bool notification;
|
2011-06-12 15:21:57 +02:00
|
|
|
|
2013-01-30 12:12:37 +01:00
|
|
|
uint16_t queue_index;
|
|
|
|
|
2016-12-19 16:44:44 +01:00
|
|
|
unsigned int inuse;
|
2011-06-12 15:21:57 +02:00
|
|
|
|
2009-06-21 18:50:13 +02:00
|
|
|
uint16_t vector;
|
2016-07-13 07:09:43 +02:00
|
|
|
VirtIOHandleOutput handle_output;
|
2010-03-17 12:08:02 +01:00
|
|
|
VirtIODevice *vdev;
|
|
|
|
EventNotifier guest_notifier;
|
|
|
|
EventNotifier host_notifier;
|
2019-11-05 15:09:46 +01:00
|
|
|
bool host_notifier_enabled;
|
2015-04-23 08:21:46 +02:00
|
|
|
QLIST_ENTRY(VirtQueue) node;
|
2008-12-04 20:38:57 +01:00
|
|
|
};
|
|
|
|
|
2022-04-01 15:23:18 +02:00
|
|
|
const char *virtio_device_names[] = {
|
|
|
|
[VIRTIO_ID_NET] = "virtio-net",
|
|
|
|
[VIRTIO_ID_BLOCK] = "virtio-blk",
|
|
|
|
[VIRTIO_ID_CONSOLE] = "virtio-serial",
|
|
|
|
[VIRTIO_ID_RNG] = "virtio-rng",
|
|
|
|
[VIRTIO_ID_BALLOON] = "virtio-balloon",
|
|
|
|
[VIRTIO_ID_IOMEM] = "virtio-iomem",
|
|
|
|
[VIRTIO_ID_RPMSG] = "virtio-rpmsg",
|
|
|
|
[VIRTIO_ID_SCSI] = "virtio-scsi",
|
|
|
|
[VIRTIO_ID_9P] = "virtio-9p",
|
|
|
|
[VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
|
|
|
|
[VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
|
|
|
|
[VIRTIO_ID_CAIF] = "virtio-caif",
|
|
|
|
[VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
|
|
|
|
[VIRTIO_ID_GPU] = "virtio-gpu",
|
|
|
|
[VIRTIO_ID_CLOCK] = "virtio-clk",
|
|
|
|
[VIRTIO_ID_INPUT] = "virtio-input",
|
|
|
|
[VIRTIO_ID_VSOCK] = "vhost-vsock",
|
|
|
|
[VIRTIO_ID_CRYPTO] = "virtio-crypto",
|
|
|
|
[VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
|
|
|
|
[VIRTIO_ID_PSTORE] = "virtio-pstore",
|
|
|
|
[VIRTIO_ID_IOMMU] = "virtio-iommu",
|
|
|
|
[VIRTIO_ID_MEM] = "virtio-mem",
|
|
|
|
[VIRTIO_ID_SOUND] = "virtio-sound",
|
|
|
|
[VIRTIO_ID_FS] = "virtio-user-fs",
|
|
|
|
[VIRTIO_ID_PMEM] = "virtio-pmem",
|
|
|
|
[VIRTIO_ID_RPMB] = "virtio-rpmb",
|
|
|
|
[VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
|
|
|
|
[VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
|
|
|
|
[VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
|
|
|
|
[VIRTIO_ID_SCMI] = "virtio-scmi",
|
|
|
|
[VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
|
|
|
|
[VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
|
|
|
|
[VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
|
|
|
|
[VIRTIO_ID_CAN] = "virtio-can",
|
|
|
|
[VIRTIO_ID_DMABUF] = "virtio-dmabuf",
|
|
|
|
[VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
|
|
|
|
[VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
|
|
|
|
[VIRTIO_ID_BT] = "virtio-bluetooth",
|
|
|
|
[VIRTIO_ID_GPIO] = "virtio-gpio"
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char *virtio_id_to_name(uint16_t device_id)
|
|
|
|
{
|
|
|
|
assert(device_id < G_N_ELEMENTS(virtio_device_names));
|
|
|
|
const char *name = virtio_device_names[device_id];
|
|
|
|
assert(name != NULL);
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2021-08-26 19:26:57 +02:00
|
|
|
/* Called within call_rcu(). */
|
2017-01-27 16:40:17 +01:00
|
|
|
static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
|
|
|
|
{
|
2021-08-26 19:26:57 +02:00
|
|
|
assert(caches != NULL);
|
2017-01-27 16:40:17 +01:00
|
|
|
address_space_cache_destroy(&caches->desc);
|
|
|
|
address_space_cache_destroy(&caches->avail);
|
|
|
|
address_space_cache_destroy(&caches->used);
|
|
|
|
g_free(caches);
|
|
|
|
}
|
|
|
|
|
2018-05-15 15:25:31 +02:00
|
|
|
static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
|
|
|
|
{
|
|
|
|
VRingMemoryRegionCaches *caches;
|
|
|
|
|
2020-09-23 12:56:46 +02:00
|
|
|
caches = qatomic_read(&vq->vring.caches);
|
|
|
|
qatomic_rcu_set(&vq->vring.caches, NULL);
|
2018-05-15 15:25:31 +02:00
|
|
|
if (caches) {
|
|
|
|
call_rcu(caches, virtio_free_region_cache, rcu);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-17 01:27:51 +01:00
|
|
|
void virtio_init_region_cache(VirtIODevice *vdev, int n)
|
2017-01-27 16:40:17 +01:00
|
|
|
{
|
|
|
|
VirtQueue *vq = &vdev->vq[n];
|
|
|
|
VRingMemoryRegionCaches *old = vq->vring.caches;
|
2018-05-15 15:25:31 +02:00
|
|
|
VRingMemoryRegionCaches *new = NULL;
|
2017-01-27 16:40:17 +01:00
|
|
|
hwaddr addr, size;
|
2017-03-15 12:48:32 +01:00
|
|
|
int64_t len;
|
2019-10-25 10:35:24 +02:00
|
|
|
bool packed;
|
2017-01-27 16:40:17 +01:00
|
|
|
|
|
|
|
|
|
|
|
addr = vq->vring.desc;
|
|
|
|
if (!addr) {
|
2018-05-15 15:25:31 +02:00
|
|
|
goto out_no_cache;
|
2017-01-27 16:40:17 +01:00
|
|
|
}
|
|
|
|
new = g_new0(VRingMemoryRegionCaches, 1);
|
|
|
|
size = virtio_queue_get_desc_size(vdev, n);
|
2019-10-25 10:35:24 +02:00
|
|
|
packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
|
|
|
|
true : false;
|
2017-03-15 12:48:32 +01:00
|
|
|
len = address_space_cache_init(&new->desc, vdev->dma_as,
|
2019-10-25 10:35:24 +02:00
|
|
|
addr, size, packed);
|
2017-03-15 12:48:32 +01:00
|
|
|
if (len < size) {
|
|
|
|
virtio_error(vdev, "Cannot map desc");
|
|
|
|
goto err_desc;
|
|
|
|
}
|
2017-01-27 16:40:17 +01:00
|
|
|
|
2019-10-25 10:35:21 +02:00
|
|
|
size = virtio_queue_get_used_size(vdev, n);
|
2017-03-15 12:48:32 +01:00
|
|
|
len = address_space_cache_init(&new->used, vdev->dma_as,
|
|
|
|
vq->vring.used, size, true);
|
|
|
|
if (len < size) {
|
|
|
|
virtio_error(vdev, "Cannot map used");
|
|
|
|
goto err_used;
|
|
|
|
}
|
2017-01-27 16:40:17 +01:00
|
|
|
|
2019-10-25 10:35:21 +02:00
|
|
|
size = virtio_queue_get_avail_size(vdev, n);
|
2017-03-15 12:48:32 +01:00
|
|
|
len = address_space_cache_init(&new->avail, vdev->dma_as,
|
|
|
|
vq->vring.avail, size, false);
|
|
|
|
if (len < size) {
|
|
|
|
virtio_error(vdev, "Cannot map avail");
|
|
|
|
goto err_avail;
|
|
|
|
}
|
2017-01-27 16:40:17 +01:00
|
|
|
|
2020-09-23 12:56:46 +02:00
|
|
|
qatomic_rcu_set(&vq->vring.caches, new);
|
2017-01-27 16:40:17 +01:00
|
|
|
if (old) {
|
|
|
|
call_rcu(old, virtio_free_region_cache, rcu);
|
|
|
|
}
|
2017-03-15 12:48:32 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
err_avail:
|
2018-05-15 15:25:31 +02:00
|
|
|
address_space_cache_destroy(&new->avail);
|
2017-03-15 12:48:32 +01:00
|
|
|
err_used:
|
2018-05-15 15:25:31 +02:00
|
|
|
address_space_cache_destroy(&new->used);
|
2017-03-15 12:48:32 +01:00
|
|
|
err_desc:
|
2018-05-15 15:25:31 +02:00
|
|
|
address_space_cache_destroy(&new->desc);
|
|
|
|
out_no_cache:
|
2017-03-15 12:48:32 +01:00
|
|
|
g_free(new);
|
2018-05-15 15:25:31 +02:00
|
|
|
virtio_virtqueue_reset_region_cache(vq);
|
2017-01-27 16:40:17 +01:00
|
|
|
}
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
/* virt queue functions */
|
2015-06-04 12:34:12 +02:00
|
|
|
void virtio_queue_update_rings(VirtIODevice *vdev, int n)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2015-06-04 12:34:12 +02:00
|
|
|
VRing *vring = &vdev->vq[n].vring;
|
2009-05-18 15:51:59 +02:00
|
|
|
|
2017-11-29 18:44:27 +01:00
|
|
|
if (!vring->num || !vring->desc || !vring->align) {
|
2015-06-04 12:34:12 +02:00
|
|
|
/* not yet setup -> nothing to do */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
|
|
|
|
vring->used = vring_align(vring->avail +
|
|
|
|
offsetof(VRingAvail, ring[vring->num]),
|
|
|
|
vring->align);
|
2017-01-27 16:40:17 +01:00
|
|
|
virtio_init_region_cache(vdev, n);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2019-10-25 10:35:24 +02:00
|
|
|
static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
|
|
|
|
MemoryRegionCache *cache, int i)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2017-01-27 16:40:16 +01:00
|
|
|
address_space_read_cached(cache, i * sizeof(VRingDesc),
|
|
|
|
desc, sizeof(VRingDesc));
|
2016-01-31 11:29:03 +01:00
|
|
|
virtio_tswap64s(vdev, &desc->addr);
|
|
|
|
virtio_tswap32s(vdev, &desc->len);
|
|
|
|
virtio_tswap16s(vdev, &desc->flags);
|
|
|
|
virtio_tswap16s(vdev, &desc->next);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:25 +02:00
|
|
|
static void vring_packed_event_read(VirtIODevice *vdev,
|
|
|
|
MemoryRegionCache *cache,
|
|
|
|
VRingPackedDescEvent *e)
|
|
|
|
{
|
|
|
|
hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
|
|
|
|
hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
|
|
|
|
|
2021-11-11 07:38:54 +01:00
|
|
|
e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
|
2019-10-25 10:35:25 +02:00
|
|
|
/* Make sure flags is seen before off_wrap */
|
|
|
|
smp_rmb();
|
2021-11-11 07:38:54 +01:00
|
|
|
e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
|
2019-10-25 10:35:25 +02:00
|
|
|
virtio_tswap16s(vdev, &e->flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vring_packed_off_wrap_write(VirtIODevice *vdev,
|
|
|
|
MemoryRegionCache *cache,
|
|
|
|
uint16_t off_wrap)
|
|
|
|
{
|
|
|
|
hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
|
|
|
|
|
2021-11-11 07:38:54 +01:00
|
|
|
virtio_stw_phys_cached(vdev, cache, off, off_wrap);
|
2019-10-25 10:35:25 +02:00
|
|
|
address_space_cache_invalidate(cache, off, sizeof(off_wrap));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vring_packed_flags_write(VirtIODevice *vdev,
|
|
|
|
MemoryRegionCache *cache, uint16_t flags)
|
|
|
|
{
|
|
|
|
hwaddr off = offsetof(VRingPackedDescEvent, flags);
|
|
|
|
|
2021-11-11 07:38:54 +01:00
|
|
|
virtio_stw_phys_cached(vdev, cache, off, flags);
|
2019-10-25 10:35:25 +02:00
|
|
|
address_space_cache_invalidate(cache, off, sizeof(flags));
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
/* Called within rcu_read_lock(). */
|
2017-03-15 12:48:31 +01:00
|
|
|
static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
|
|
|
|
{
|
2020-09-23 12:56:46 +02:00
|
|
|
return qatomic_rcu_read(&vq->vring.caches);
|
2017-03-15 12:48:31 +01:00
|
|
|
}
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2008-12-04 20:38:57 +01:00
|
|
|
static inline uint16_t vring_avail_flags(VirtQueue *vq)
|
|
|
|
{
|
2017-03-15 12:48:31 +01:00
|
|
|
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
|
2017-01-27 16:40:20 +01:00
|
|
|
hwaddr pa = offsetof(VRingAvail, flags);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
|
|
|
|
if (!caches) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2008-12-04 20:38:57 +01:00
|
|
|
static inline uint16_t vring_avail_idx(VirtQueue *vq)
|
|
|
|
{
|
2017-03-15 12:48:31 +01:00
|
|
|
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
|
2017-01-27 16:40:20 +01:00
|
|
|
hwaddr pa = offsetof(VRingAvail, idx);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
|
|
|
|
if (!caches) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
|
2016-01-31 11:29:05 +01:00
|
|
|
return vq->shadow_avail_idx;
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2008-12-04 20:38:57 +01:00
|
|
|
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
|
|
|
|
{
|
2017-03-15 12:48:31 +01:00
|
|
|
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
|
2017-01-27 16:40:20 +01:00
|
|
|
hwaddr pa = offsetof(VRingAvail, ring[i]);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
|
|
|
|
if (!caches) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2015-02-16 22:35:46 +01:00
|
|
|
static inline uint16_t vring_get_used_event(VirtQueue *vq)
|
2011-06-12 15:21:57 +02:00
|
|
|
{
|
|
|
|
return vring_avail_ring(vq, vq->vring.num);
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2016-01-31 11:29:06 +01:00
|
|
|
static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
|
|
|
|
int i)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2017-03-15 12:48:31 +01:00
|
|
|
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
|
2017-01-27 16:40:20 +01:00
|
|
|
hwaddr pa = offsetof(VRingUsed, ring[i]);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
|
|
|
|
if (!caches) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:06 +01:00
|
|
|
virtio_tswap32s(vq->vdev, &uelem->id);
|
|
|
|
virtio_tswap32s(vq->vdev, &uelem->len);
|
2017-01-27 16:40:20 +01:00
|
|
|
address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
|
|
|
|
address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2022-08-11 14:24:43 +02:00
|
|
|
/* Called within rcu_read_lock(). */
|
|
|
|
static inline uint16_t vring_used_flags(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
|
|
|
|
hwaddr pa = offsetof(VRingUsed, flags);
|
|
|
|
|
|
|
|
if (!caches) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2008-12-04 20:38:57 +01:00
|
|
|
static uint16_t vring_used_idx(VirtQueue *vq)
|
|
|
|
{
|
2017-03-15 12:48:31 +01:00
|
|
|
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
|
2017-01-27 16:40:20 +01:00
|
|
|
hwaddr pa = offsetof(VRingUsed, idx);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
|
|
|
|
if (!caches) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2011-06-12 15:21:57 +02:00
|
|
|
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2017-03-15 12:48:31 +01:00
|
|
|
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
|
2017-01-27 16:40:20 +01:00
|
|
|
hwaddr pa = offsetof(VRingUsed, idx);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
|
|
|
|
if (caches) {
|
|
|
|
virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
|
|
|
|
address_space_cache_invalidate(&caches->used, pa, sizeof(val));
|
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:04 +01:00
|
|
|
vq->used_idx = val;
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2008-12-04 20:38:57 +01:00
|
|
|
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
|
|
|
|
{
|
2017-03-15 12:48:31 +01:00
|
|
|
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
|
2014-06-24 19:40:16 +02:00
|
|
|
VirtIODevice *vdev = vq->vdev;
|
2017-01-27 16:40:20 +01:00
|
|
|
hwaddr pa = offsetof(VRingUsed, flags);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
uint16_t flags;
|
2017-01-27 16:40:20 +01:00
|
|
|
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
if (!caches) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
|
2017-01-27 16:40:20 +01:00
|
|
|
virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
|
|
|
|
address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2008-12-04 20:38:57 +01:00
|
|
|
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
|
|
|
|
{
|
2017-03-15 12:48:31 +01:00
|
|
|
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
|
2014-06-24 19:40:16 +02:00
|
|
|
VirtIODevice *vdev = vq->vdev;
|
2017-01-27 16:40:20 +01:00
|
|
|
hwaddr pa = offsetof(VRingUsed, flags);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
uint16_t flags;
|
2017-01-27 16:40:20 +01:00
|
|
|
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
if (!caches) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
|
2017-01-27 16:40:20 +01:00
|
|
|
virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
|
|
|
|
address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2015-02-16 22:35:46 +01:00
|
|
|
static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
|
2011-06-12 15:21:57 +02:00
|
|
|
{
|
2017-01-27 16:40:20 +01:00
|
|
|
VRingMemoryRegionCaches *caches;
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr pa;
|
Revert "virtio: turn vq->notification into a nested counter"
This reverts commit aff8fd18f1786fc5af259a9bc0077727222f51ca.
Both virtio-net and virtio-crypto do not balance
virtio_queue_set_notification() enable and disable calls. This makes
the notifications_disabled counter unreliable and Doug Goldstein
reported the following assertion failure:
#3 0x00007ffff44d1c62 in __GI___assert_fail (
assertion=assertion@entry=0x555555ae8e8a "vq->notification_disabled > 0",
file=file@entry=0x555555ae89c0 "/home/doug/work/qemu/hw/virtio/virtio.c",
line=line@entry=215,
function=function@entry=0x555555ae9630 <__PRETTY_FUNCTION__.43707>
"virtio_queue_set_notification") at assert.c:101
#4 0x00005555557f25d6 in virtio_queue_set_notification (vq=0x55555666aa90,
enable=enable@entry=1) at /home/doug/work/qemu/hw/virtio/virtio.c:215
#5 0x00005555557dc311 in virtio_net_has_buffers (q=<optimized out>,
q=<optimized out>, bufsize=102)
at /home/doug/work/qemu/hw/net/virtio-net.c:1008
#6 virtio_net_receive (nc=<optimized out>, buf=0x555557386b88 "", size=102)
at /home/doug/work/qemu/hw/net/virtio-net.c:1148
#7 0x00005555559cad33 in nc_sendv_compat (flags=<optimized out>, iovcnt=1,
iov=0x7fffead746d0, nc=0x55555788b340) at net/net.c:705
#8 qemu_deliver_packet_iov (sender=<optimized out>, flags=<optimized out>,
iov=0x7fffead746d0, iovcnt=1, opaque=0x55555788b340) at net/net.c:732
#9 0x00005555559cd929 in qemu_net_queue_deliver (size=<optimized out>,
data=<optimized out>, flags=<optimized out>, sender=<optimized out>,
queue=0x55555788b550) at net/queue.c:164
#10 qemu_net_queue_flush (queue=0x55555788b550) at net/queue.c:261
This patch is safe to revert since it's just an optimization for
virtqueue polling. The next patch will improve the situation again
without resorting to nesting.
Reported-by: Doug Goldstein <cardoe@cardoe.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Richard Henderson <rth@twiddle.net>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-01-12 12:46:10 +01:00
|
|
|
if (!vq->notification) {
|
2011-06-12 15:21:57 +02:00
|
|
|
return;
|
|
|
|
}
|
2017-01-27 16:40:20 +01:00
|
|
|
|
2017-03-15 12:48:31 +01:00
|
|
|
caches = vring_get_region_caches(vq);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
if (!caches) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
pa = offsetof(VRingUsed, ring[vq->vring.num]);
|
|
|
|
virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
|
2017-02-22 17:37:33 +01:00
|
|
|
address_space_cache_invalidate(&caches->used, pa, sizeof(val));
|
2011-06-12 15:21:57 +02:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:25 +02:00
|
|
|
static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2019-10-28 17:11:09 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
|
|
|
|
2015-08-17 11:48:29 +02:00
|
|
|
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
|
2015-02-16 22:35:46 +01:00
|
|
|
vring_set_avail_event(vq, vring_avail_idx(vq));
|
2011-06-12 15:21:57 +02:00
|
|
|
} else if (enable) {
|
2008-12-04 20:38:57 +01:00
|
|
|
vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
|
2011-06-12 15:21:57 +02:00
|
|
|
} else {
|
2008-12-04 20:38:57 +01:00
|
|
|
vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
|
2011-06-12 15:21:57 +02:00
|
|
|
}
|
2012-04-23 13:11:14 +02:00
|
|
|
if (enable) {
|
|
|
|
/* Expose avail event/used flags before caller checks the avail idx. */
|
|
|
|
smp_mb();
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:25 +02:00
|
|
|
static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
|
|
|
|
{
|
|
|
|
uint16_t off_wrap;
|
|
|
|
VRingPackedDescEvent e;
|
|
|
|
VRingMemoryRegionCaches *caches;
|
|
|
|
|
2019-10-28 17:11:09 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
caches = vring_get_region_caches(vq);
|
|
|
|
if (!caches) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:25 +02:00
|
|
|
vring_packed_event_read(vq->vdev, &caches->used, &e);
|
|
|
|
|
|
|
|
if (!enable) {
|
|
|
|
e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
|
|
|
|
} else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
|
|
|
|
off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
|
|
|
|
vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
|
|
|
|
/* Make sure off_wrap is wrote before flags */
|
|
|
|
smp_wmb();
|
|
|
|
e.flags = VRING_PACKED_EVENT_FLAG_DESC;
|
|
|
|
} else {
|
|
|
|
e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
|
|
|
|
if (enable) {
|
|
|
|
/* Expose avail event/used flags before caller checks the avail idx. */
|
|
|
|
smp_mb();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-09 22:09:57 +01:00
|
|
|
bool virtio_queue_get_notification(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
return vq->notification;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:25 +02:00
|
|
|
void virtio_queue_set_notification(VirtQueue *vq, int enable)
|
|
|
|
{
|
|
|
|
vq->notification = enable;
|
|
|
|
|
|
|
|
if (!vq->vring.desc) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
virtio_queue_packed_set_notification(vq, enable);
|
|
|
|
} else {
|
|
|
|
virtio_queue_split_set_notification(vq, enable);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
int virtio_queue_ready(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
return vq->vring.avail != 0;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static void vring_packed_desc_read_flags(VirtIODevice *vdev,
|
|
|
|
uint16_t *flags,
|
|
|
|
MemoryRegionCache *cache,
|
|
|
|
int i)
|
|
|
|
{
|
2021-11-11 07:38:53 +01:00
|
|
|
hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
|
|
|
|
|
|
|
|
*flags = virtio_lduw_phys_cached(vdev, cache, off);
|
2019-10-25 10:35:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void vring_packed_desc_read(VirtIODevice *vdev,
|
|
|
|
VRingPackedDesc *desc,
|
|
|
|
MemoryRegionCache *cache,
|
|
|
|
int i, bool strict_order)
|
|
|
|
{
|
|
|
|
hwaddr off = i * sizeof(VRingPackedDesc);
|
|
|
|
|
|
|
|
vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
|
|
|
|
|
|
|
|
if (strict_order) {
|
|
|
|
/* Make sure flags is read before the rest fields. */
|
|
|
|
smp_rmb();
|
|
|
|
}
|
|
|
|
|
|
|
|
address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
|
|
|
|
&desc->addr, sizeof(desc->addr));
|
|
|
|
address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
|
|
|
|
&desc->id, sizeof(desc->id));
|
|
|
|
address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
|
|
|
|
&desc->len, sizeof(desc->len));
|
|
|
|
virtio_tswap64s(vdev, &desc->addr);
|
|
|
|
virtio_tswap16s(vdev, &desc->id);
|
|
|
|
virtio_tswap32s(vdev, &desc->len);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vring_packed_desc_write_data(VirtIODevice *vdev,
|
|
|
|
VRingPackedDesc *desc,
|
|
|
|
MemoryRegionCache *cache,
|
|
|
|
int i)
|
|
|
|
{
|
|
|
|
hwaddr off_id = i * sizeof(VRingPackedDesc) +
|
|
|
|
offsetof(VRingPackedDesc, id);
|
|
|
|
hwaddr off_len = i * sizeof(VRingPackedDesc) +
|
|
|
|
offsetof(VRingPackedDesc, len);
|
|
|
|
|
|
|
|
virtio_tswap32s(vdev, &desc->len);
|
|
|
|
virtio_tswap16s(vdev, &desc->id);
|
|
|
|
address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
|
|
|
|
address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
|
|
|
|
address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
|
|
|
|
address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vring_packed_desc_write_flags(VirtIODevice *vdev,
|
|
|
|
VRingPackedDesc *desc,
|
|
|
|
MemoryRegionCache *cache,
|
|
|
|
int i)
|
|
|
|
{
|
|
|
|
hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
|
|
|
|
|
2021-11-11 07:38:53 +01:00
|
|
|
virtio_stw_phys_cached(vdev, cache, off, desc->flags);
|
2019-10-25 10:35:24 +02:00
|
|
|
address_space_cache_invalidate(cache, off, sizeof(desc->flags));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vring_packed_desc_write(VirtIODevice *vdev,
|
|
|
|
VRingPackedDesc *desc,
|
|
|
|
MemoryRegionCache *cache,
|
|
|
|
int i, bool strict_order)
|
|
|
|
{
|
|
|
|
vring_packed_desc_write_data(vdev, desc, cache, i);
|
|
|
|
if (strict_order) {
|
|
|
|
/* Make sure data is wrote before flags. */
|
|
|
|
smp_wmb();
|
|
|
|
}
|
|
|
|
vring_packed_desc_write_flags(vdev, desc, cache, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
|
|
|
|
{
|
|
|
|
bool avail, used;
|
|
|
|
|
|
|
|
avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
|
|
|
|
used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
|
|
|
|
return (avail != used) && (avail == wrap_counter);
|
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:05 +01:00
|
|
|
/* Fetch avail_idx from VQ memory only when we really need to know if
|
2017-01-27 16:40:20 +01:00
|
|
|
* guest has added some buffers.
|
|
|
|
* Called within rcu_read_lock(). */
|
|
|
|
static int virtio_queue_empty_rcu(VirtQueue *vq)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
if (virtio_device_disabled(vq->vdev)) {
|
virtio: Return true from virtio_queue_empty if broken
Both virtio-blk and virtio-scsi use virtio_queue_empty() as the
loop condition in VQ handlers (virtio_blk_handle_vq,
virtio_scsi_handle_cmd_vq). When a device is marked broken in
virtqueue_pop, for example if a vIOMMU address translation failed, we
want to break out of the loop.
This fixes a hanging problem when booting a CentOS 3.10.0-862.el7.x86_64
kernel with ATS enabled:
$ qemu-system-x86_64 \
... \
-device intel-iommu,intremap=on,caching-mode=on,eim=on,device-iotlb=on \
-device virtio-scsi-pci,iommu_platform=on,ats=on,id=scsi0,bus=pci.4,addr=0x0
The dead loop happens immediately when the kernel boots and initializes
the device, where virtio_scsi_data_plane_handle_cmd will not return:
> ...
> #13 0x00005586602b7793 in virtio_scsi_handle_cmd_vq
> #14 0x00005586602b8d66 in virtio_scsi_data_plane_handle_cmd
> #15 0x00005586602ddab7 in virtio_queue_notify_aio_vq
> #16 0x00005586602dfc9f in virtio_queue_host_notifier_aio_poll
> #17 0x00005586607885da in run_poll_handlers_once
> #18 0x000055866078880e in try_poll_mode
> #19 0x00005586607888eb in aio_poll
> #20 0x0000558660784561 in aio_wait_bh_oneshot
> #21 0x00005586602b9582 in virtio_scsi_dataplane_stop
> #22 0x00005586605a7110 in virtio_bus_stop_ioeventfd
> #23 0x00005586605a9426 in virtio_pci_stop_ioeventfd
> #24 0x00005586605ab808 in virtio_pci_common_write
> #25 0x0000558660242396 in memory_region_write_accessor
> #26 0x00005586602425ab in access_with_adjusted_size
> #27 0x0000558660245281 in memory_region_dispatch_write
> #28 0x00005586601e008e in flatview_write_continue
> #29 0x00005586601e01d8 in flatview_write
> #30 0x00005586601e04de in address_space_write
> #31 0x00005586601e052f in address_space_rw
> #32 0x00005586602607f2 in kvm_cpu_exec
> #33 0x0000558660227148 in qemu_kvm_cpu_thread_fn
> #34 0x000055866078bde7 in qemu_thread_start
> #35 0x00007f5784906594 in start_thread
> #36 0x00007f5784639e6f in clone
With this patch, virtio_queue_empty will now return 1 as soon as the
vdev is marked as broken, after a "virtio: zero sized buffers are not
allowed" error.
To be consistent, update virtio_queue_empty_rcu as well.
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20180910145616.8598-2-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-09-10 16:56:15 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2017-03-15 12:48:30 +01:00
|
|
|
if (unlikely(!vq->vring.avail)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:05 +01:00
|
|
|
if (vq->shadow_avail_idx != vq->last_avail_idx) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
return vring_avail_idx(vq) == vq->last_avail_idx;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static int virtio_queue_split_empty(VirtQueue *vq)
|
2017-01-27 16:40:20 +01:00
|
|
|
{
|
|
|
|
bool empty;
|
|
|
|
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
if (virtio_device_disabled(vq->vdev)) {
|
virtio: Return true from virtio_queue_empty if broken
Both virtio-blk and virtio-scsi use virtio_queue_empty() as the
loop condition in VQ handlers (virtio_blk_handle_vq,
virtio_scsi_handle_cmd_vq). When a device is marked broken in
virtqueue_pop, for example if a vIOMMU address translation failed, we
want to break out of the loop.
This fixes a hanging problem when booting a CentOS 3.10.0-862.el7.x86_64
kernel with ATS enabled:
$ qemu-system-x86_64 \
... \
-device intel-iommu,intremap=on,caching-mode=on,eim=on,device-iotlb=on \
-device virtio-scsi-pci,iommu_platform=on,ats=on,id=scsi0,bus=pci.4,addr=0x0
The dead loop happens immediately when the kernel boots and initializes
the device, where virtio_scsi_data_plane_handle_cmd will not return:
> ...
> #13 0x00005586602b7793 in virtio_scsi_handle_cmd_vq
> #14 0x00005586602b8d66 in virtio_scsi_data_plane_handle_cmd
> #15 0x00005586602ddab7 in virtio_queue_notify_aio_vq
> #16 0x00005586602dfc9f in virtio_queue_host_notifier_aio_poll
> #17 0x00005586607885da in run_poll_handlers_once
> #18 0x000055866078880e in try_poll_mode
> #19 0x00005586607888eb in aio_poll
> #20 0x0000558660784561 in aio_wait_bh_oneshot
> #21 0x00005586602b9582 in virtio_scsi_dataplane_stop
> #22 0x00005586605a7110 in virtio_bus_stop_ioeventfd
> #23 0x00005586605a9426 in virtio_pci_stop_ioeventfd
> #24 0x00005586605ab808 in virtio_pci_common_write
> #25 0x0000558660242396 in memory_region_write_accessor
> #26 0x00005586602425ab in access_with_adjusted_size
> #27 0x0000558660245281 in memory_region_dispatch_write
> #28 0x00005586601e008e in flatview_write_continue
> #29 0x00005586601e01d8 in flatview_write
> #30 0x00005586601e04de in address_space_write
> #31 0x00005586601e052f in address_space_rw
> #32 0x00005586602607f2 in kvm_cpu_exec
> #33 0x0000558660227148 in qemu_kvm_cpu_thread_fn
> #34 0x000055866078bde7 in qemu_thread_start
> #35 0x00007f5784906594 in start_thread
> #36 0x00007f5784639e6f in clone
With this patch, virtio_queue_empty will now return 1 as soon as the
vdev is marked as broken, after a "virtio: zero sized buffers are not
allowed" error.
To be consistent, update virtio_queue_empty_rcu as well.
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20180910145616.8598-2-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-09-10 16:56:15 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2017-03-15 12:48:30 +01:00
|
|
|
if (unlikely(!vq->vring.avail)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
if (vq->shadow_avail_idx != vq->last_avail_idx) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-28 17:11:09 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2017-01-27 16:40:20 +01:00
|
|
|
empty = vring_avail_idx(vq) == vq->last_avail_idx;
|
|
|
|
return empty;
|
|
|
|
}
|
|
|
|
|
2021-08-26 19:26:56 +02:00
|
|
|
/* Called within rcu_read_lock(). */
|
2019-10-25 10:35:24 +02:00
|
|
|
static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
struct VRingPackedDesc desc;
|
|
|
|
VRingMemoryRegionCaches *cache;
|
|
|
|
|
|
|
|
if (unlikely(!vq->vring.desc)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
cache = vring_get_region_caches(vq);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
if (!cache) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
|
|
|
|
vq->last_avail_idx);
|
|
|
|
|
|
|
|
return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int virtio_queue_packed_empty(VirtQueue *vq)
|
|
|
|
{
|
2019-10-28 17:11:09 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
|
|
|
return virtio_queue_packed_empty_rcu(vq);
|
2019-10-25 10:35:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int virtio_queue_empty(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
return virtio_queue_packed_empty(vq);
|
|
|
|
} else {
|
|
|
|
return virtio_queue_split_empty(vq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-25 07:21:28 +02:00
|
|
|
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
|
|
|
|
unsigned int len)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2016-12-30 11:09:10 +01:00
|
|
|
AddressSpace *dma_as = vq->vdev->dma_as;
|
2008-12-04 20:38:57 +01:00
|
|
|
unsigned int offset;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
offset = 0;
|
|
|
|
for (i = 0; i < elem->in_num; i++) {
|
|
|
|
size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
|
|
|
|
|
2016-12-30 11:09:10 +01:00
|
|
|
dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
|
|
|
|
elem->in_sg[i].iov_len,
|
|
|
|
DMA_DIRECTION_FROM_DEVICE, size);
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2012-09-24 15:09:30 +02:00
|
|
|
offset += size;
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2009-03-28 18:46:18 +01:00
|
|
|
for (i = 0; i < elem->out_num; i++)
|
2016-12-30 11:09:10 +01:00
|
|
|
dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
|
|
|
|
elem->out_sg[i].iov_len,
|
|
|
|
DMA_DIRECTION_TO_DEVICE,
|
|
|
|
elem->out_sg[i].iov_len);
|
2015-09-25 07:21:28 +02:00
|
|
|
}
|
|
|
|
|
2016-09-19 15:28:03 +02:00
|
|
|
/* virtqueue_detach_element:
|
|
|
|
* @vq: The #VirtQueue
|
|
|
|
* @elem: The #VirtQueueElement
|
|
|
|
* @len: number of bytes written
|
|
|
|
*
|
|
|
|
* Detach the element from the virtqueue. This function is suitable for device
|
|
|
|
* reset or other situations where a #VirtQueueElement is simply freed and will
|
|
|
|
* not be pushed or discarded.
|
|
|
|
*/
|
|
|
|
void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
|
|
|
|
unsigned int len)
|
|
|
|
{
|
2019-10-25 10:35:24 +02:00
|
|
|
vq->inuse -= elem->ndescs;
|
2016-09-19 15:28:03 +02:00
|
|
|
virtqueue_unmap_sg(vq, elem, len);
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
|
|
|
|
{
|
|
|
|
vq->last_avail_idx -= num;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
|
|
|
|
{
|
|
|
|
if (vq->last_avail_idx < num) {
|
|
|
|
vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
|
|
|
|
vq->last_avail_wrap_counter ^= 1;
|
|
|
|
} else {
|
|
|
|
vq->last_avail_idx -= num;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-03 09:55:49 +01:00
|
|
|
/* virtqueue_unpop:
|
2016-09-19 15:28:03 +02:00
|
|
|
* @vq: The #VirtQueue
|
|
|
|
* @elem: The #VirtQueueElement
|
|
|
|
* @len: number of bytes written
|
|
|
|
*
|
|
|
|
* Pretend the most recent element wasn't popped from the virtqueue. The next
|
|
|
|
* call to virtqueue_pop() will refetch the element.
|
|
|
|
*/
|
2016-11-03 09:55:49 +01:00
|
|
|
void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
|
|
|
|
unsigned int len)
|
2015-09-25 07:21:29 +02:00
|
|
|
{
|
2019-10-25 10:35:24 +02:00
|
|
|
|
|
|
|
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
virtqueue_packed_rewind(vq, 1);
|
|
|
|
} else {
|
|
|
|
virtqueue_split_rewind(vq, 1);
|
|
|
|
}
|
|
|
|
|
2016-09-19 15:28:03 +02:00
|
|
|
virtqueue_detach_element(vq, elem, len);
|
2015-09-25 07:21:29 +02:00
|
|
|
}
|
|
|
|
|
2016-09-07 17:20:48 +02:00
|
|
|
/* virtqueue_rewind:
|
|
|
|
* @vq: The #VirtQueue
|
|
|
|
* @num: Number of elements to push back
|
|
|
|
*
|
|
|
|
* Pretend that elements weren't popped from the virtqueue. The next
|
|
|
|
* virtqueue_pop() will refetch the oldest element.
|
|
|
|
*
|
2016-11-03 09:55:49 +01:00
|
|
|
* Use virtqueue_unpop() instead if you have a VirtQueueElement.
|
2016-09-07 17:20:48 +02:00
|
|
|
*
|
|
|
|
* Returns: true on success, false if @num is greater than the number of in use
|
|
|
|
* elements.
|
|
|
|
*/
|
|
|
|
bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
|
|
|
|
{
|
|
|
|
if (num > vq->inuse) {
|
|
|
|
return false;
|
|
|
|
}
|
2019-10-25 10:35:24 +02:00
|
|
|
|
2016-09-07 17:20:48 +02:00
|
|
|
vq->inuse -= num;
|
2019-10-25 10:35:24 +02:00
|
|
|
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
virtqueue_packed_rewind(vq, num);
|
|
|
|
} else {
|
|
|
|
virtqueue_split_rewind(vq, num);
|
|
|
|
}
|
2016-09-07 17:20:48 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
|
2015-09-25 07:21:28 +02:00
|
|
|
unsigned int len, unsigned int idx)
|
|
|
|
{
|
2016-01-31 11:29:06 +01:00
|
|
|
VRingUsedElem uelem;
|
|
|
|
|
2017-03-15 12:48:30 +01:00
|
|
|
if (unlikely(!vq->vring.used)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:04 +01:00
|
|
|
idx = (idx + vq->used_idx) % vq->vring.num;
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2016-01-31 11:29:06 +01:00
|
|
|
uelem.id = elem->index;
|
|
|
|
uelem.len = len;
|
|
|
|
vring_used_write(vq, &uelem, idx);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
|
|
|
|
unsigned int len, unsigned int idx)
|
|
|
|
{
|
|
|
|
vq->used_elems[idx].index = elem->index;
|
|
|
|
vq->used_elems[idx].len = len;
|
|
|
|
vq->used_elems[idx].ndescs = elem->ndescs;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtqueue_packed_fill_desc(VirtQueue *vq,
|
|
|
|
const VirtQueueElement *elem,
|
|
|
|
unsigned int idx,
|
|
|
|
bool strict_order)
|
|
|
|
{
|
|
|
|
uint16_t head;
|
|
|
|
VRingMemoryRegionCaches *caches;
|
|
|
|
VRingPackedDesc desc = {
|
|
|
|
.id = elem->index,
|
|
|
|
.len = elem->len,
|
|
|
|
};
|
|
|
|
bool wrap_counter = vq->used_wrap_counter;
|
|
|
|
|
|
|
|
if (unlikely(!vq->vring.desc)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
head = vq->used_idx + idx;
|
|
|
|
if (head >= vq->vring.num) {
|
|
|
|
head -= vq->vring.num;
|
|
|
|
wrap_counter ^= 1;
|
|
|
|
}
|
|
|
|
if (wrap_counter) {
|
|
|
|
desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
|
|
|
|
desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
|
|
|
|
} else {
|
|
|
|
desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
|
|
|
|
desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
|
|
|
|
}
|
|
|
|
|
|
|
|
caches = vring_get_region_caches(vq);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
if (!caches) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2019-10-25 10:35:24 +02:00
|
|
|
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
|
|
|
|
unsigned int len, unsigned int idx)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2019-10-25 10:35:24 +02:00
|
|
|
trace_virtqueue_fill(vq, elem, len, idx);
|
|
|
|
|
|
|
|
virtqueue_unmap_sg(vq, elem, len);
|
2016-09-21 17:52:19 +02:00
|
|
|
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
if (virtio_device_disabled(vq->vdev)) {
|
2016-09-21 17:52:19 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
virtqueue_packed_fill(vq, elem, len, idx);
|
|
|
|
} else {
|
|
|
|
virtqueue_split_fill(vq, elem, len, idx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Called within rcu_read_lock(). */
|
|
|
|
static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
|
|
|
|
{
|
|
|
|
uint16_t old, new;
|
|
|
|
|
2017-03-15 12:48:30 +01:00
|
|
|
if (unlikely(!vq->vring.used)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
/* Make sure buffer is written before we update index. */
|
2011-09-20 04:05:20 +02:00
|
|
|
smp_wmb();
|
2010-05-24 14:19:21 +02:00
|
|
|
trace_virtqueue_flush(vq, count);
|
2016-01-31 11:29:04 +01:00
|
|
|
old = vq->used_idx;
|
2011-06-12 15:21:57 +02:00
|
|
|
new = old + count;
|
|
|
|
vring_used_idx_set(vq, new);
|
2008-12-04 20:38:57 +01:00
|
|
|
vq->inuse -= count;
|
2011-06-12 15:21:57 +02:00
|
|
|
if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
|
|
|
|
vq->signalled_used_valid = false;
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
|
|
|
|
{
|
|
|
|
unsigned int i, ndescs = 0;
|
|
|
|
|
|
|
|
if (unlikely(!vq->vring.desc)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 1; i < count; i++) {
|
|
|
|
virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
|
|
|
|
ndescs += vq->used_elems[i].ndescs;
|
|
|
|
}
|
|
|
|
virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
|
|
|
|
ndescs += vq->used_elems[0].ndescs;
|
|
|
|
|
|
|
|
vq->inuse -= ndescs;
|
|
|
|
vq->used_idx += ndescs;
|
|
|
|
if (vq->used_idx >= vq->vring.num) {
|
|
|
|
vq->used_idx -= vq->vring.num;
|
|
|
|
vq->used_wrap_counter ^= 1;
|
2021-11-30 14:45:10 +01:00
|
|
|
vq->signalled_used_valid = false;
|
2019-10-25 10:35:24 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void virtqueue_flush(VirtQueue *vq, unsigned int count)
|
|
|
|
{
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
if (virtio_device_disabled(vq->vdev)) {
|
2019-10-25 10:35:24 +02:00
|
|
|
vq->inuse -= count;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
virtqueue_packed_flush(vq, count);
|
|
|
|
} else {
|
|
|
|
virtqueue_split_flush(vq, count);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
|
|
|
|
unsigned int len)
|
|
|
|
{
|
2019-10-28 17:11:09 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2008-12-04 20:38:57 +01:00
|
|
|
virtqueue_fill(vq, elem, len, 0);
|
|
|
|
virtqueue_flush(vq, 1);
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2008-12-04 20:38:57 +01:00
|
|
|
static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
|
|
|
|
{
|
|
|
|
uint16_t num_heads = vring_avail_idx(vq) - idx;
|
|
|
|
|
|
|
|
/* Check it isn't doing very strange things with descriptor numbers. */
|
2008-12-04 22:28:28 +01:00
|
|
|
if (num_heads > vq->vring.num) {
|
2016-09-21 17:52:25 +02:00
|
|
|
virtio_error(vq->vdev, "Guest moved used index from %u to %u",
|
2016-01-31 11:29:05 +01:00
|
|
|
idx, vq->shadow_avail_idx);
|
2016-09-21 17:52:25 +02:00
|
|
|
return -EINVAL;
|
2008-12-04 22:28:28 +01:00
|
|
|
}
|
2012-04-23 14:46:22 +02:00
|
|
|
/* On success, callers read a descriptor at vq->last_avail_idx.
|
|
|
|
* Make sure descriptor read does not bypass avail index read. */
|
|
|
|
if (num_heads) {
|
|
|
|
smp_rmb();
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
return num_heads;
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
/* Called within rcu_read_lock(). */
|
2016-09-21 17:52:26 +02:00
|
|
|
static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
|
|
|
|
unsigned int *head)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
|
|
|
/* Grab the next descriptor number they're advertising, and increment
|
|
|
|
* the index we've seen. */
|
2016-09-21 17:52:26 +02:00
|
|
|
*head = vring_avail_ring(vq, idx % vq->vring.num);
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
/* If their number is silly, that's a fatal mistake. */
|
2016-09-21 17:52:26 +02:00
|
|
|
if (*head >= vq->vring.num) {
|
|
|
|
virtio_error(vq->vdev, "Guest says index %u is available", *head);
|
|
|
|
return false;
|
2008-12-04 22:28:28 +01:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2016-09-21 17:52:26 +02:00
|
|
|
return true;
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2016-09-21 17:52:24 +02:00
|
|
|
enum {
|
|
|
|
VIRTQUEUE_READ_DESC_ERROR = -1,
|
|
|
|
VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
|
|
|
|
VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
|
|
|
|
};
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
|
|
|
|
MemoryRegionCache *desc_cache,
|
|
|
|
unsigned int max, unsigned int *next)
|
2016-09-21 17:52:24 +02:00
|
|
|
{
|
2008-12-04 20:38:57 +01:00
|
|
|
/* If this descriptor says it doesn't chain, we're done. */
|
2016-01-31 11:29:03 +01:00
|
|
|
if (!(desc->flags & VRING_DESC_F_NEXT)) {
|
2016-09-21 17:52:24 +02:00
|
|
|
return VIRTQUEUE_READ_DESC_DONE;
|
2014-06-24 19:40:16 +02:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
/* Check they're not leading us off end of descriptors. */
|
2016-09-21 17:52:24 +02:00
|
|
|
*next = desc->next;
|
2008-12-04 20:38:57 +01:00
|
|
|
/* Make sure compiler knows to grab that: we don't want it changing! */
|
2011-09-20 04:05:20 +02:00
|
|
|
smp_wmb();
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2016-09-21 17:52:24 +02:00
|
|
|
if (*next >= max) {
|
|
|
|
virtio_error(vdev, "Desc next is %u", *next);
|
|
|
|
return VIRTQUEUE_READ_DESC_ERROR;
|
2008-12-04 22:28:28 +01:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
vring_split_desc_read(vdev, desc, desc_cache, *next);
|
2016-09-21 17:52:24 +02:00
|
|
|
return VIRTQUEUE_READ_DESC_MORE;
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2021-09-06 12:43:18 +02:00
|
|
|
/* Called within rcu_read_lock(). */
|
2019-10-25 10:35:24 +02:00
|
|
|
static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
|
|
|
|
unsigned int *in_bytes, unsigned int *out_bytes,
|
2021-09-06 12:43:18 +02:00
|
|
|
unsigned max_in_bytes, unsigned max_out_bytes,
|
|
|
|
VRingMemoryRegionCaches *caches)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2017-01-27 16:40:14 +01:00
|
|
|
VirtIODevice *vdev = vq->vdev;
|
virtio: fix reachable assertion due to stale value of cached region size
In virtqueue_{split,packed}_get_avail_bytes() descriptors are read
in a loop via MemoryRegionCache regions and calls to
vring_{split,packed}_desc_read() - these take a region cache and the
index of the descriptor to be read.
For direct descriptors we use a cache provided by the caller, whose
size matches that of the virtqueue vring. We limit the number of
descriptors we can read by the size of that vring:
max = vq->vring.num;
...
MemoryRegionCache *desc_cache = &caches->desc;
For indirect descriptors, we initialize a new cache and limit the
number of descriptors by the size of the intermediate descriptor:
len = address_space_cache_init(&indirect_desc_cache,
vdev->dma_as,
desc.addr, desc.len, false);
desc_cache = &indirect_desc_cache;
...
max = desc.len / sizeof(VRingDesc);
However, the first initialization of `max` is done outside the loop
where we process guest descriptors, while the second one is done
inside. This means that a sequence of an indirect descriptor followed
by a direct one will leave a stale value in `max`. If the second
descriptor's `next` field is smaller than the stale value, but
greater than the size of the virtqueue ring (and thus the cached
region), a failed assertion will be triggered in
address_space_read_cached() down the call chain.
Fix this by initializing `max` inside the loop in both functions.
Fixes: 9796d0ac8fb0 ("virtio: use address_space_map/unmap to access descriptors")
Signed-off-by: Carlos López <clopez@suse.de>
Message-Id: <20230302100358.3613-1-clopez@suse.de>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-03-02 11:03:59 +01:00
|
|
|
unsigned int idx;
|
2012-09-24 20:35:14 +02:00
|
|
|
unsigned int total_bufs, in_total, out_total;
|
2017-01-27 16:40:16 +01:00
|
|
|
MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
|
|
|
|
int64_t len = 0;
|
2016-09-21 17:52:24 +02:00
|
|
|
int rc;
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
idx = vq->last_avail_idx;
|
2009-06-17 12:38:28 +02:00
|
|
|
total_bufs = in_total = out_total = 0;
|
2017-01-27 16:40:14 +01:00
|
|
|
|
2016-09-21 17:52:25 +02:00
|
|
|
while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
|
2017-01-27 16:40:18 +01:00
|
|
|
MemoryRegionCache *desc_cache = &caches->desc;
|
2017-01-27 16:40:14 +01:00
|
|
|
unsigned int num_bufs;
|
2016-01-31 11:29:03 +01:00
|
|
|
VRingDesc desc;
|
2016-09-21 17:52:23 +02:00
|
|
|
unsigned int i;
|
virtio: fix reachable assertion due to stale value of cached region size
In virtqueue_{split,packed}_get_avail_bytes() descriptors are read
in a loop via MemoryRegionCache regions and calls to
vring_{split,packed}_desc_read() - these take a region cache and the
index of the descriptor to be read.
For direct descriptors we use a cache provided by the caller, whose
size matches that of the virtqueue vring. We limit the number of
descriptors we can read by the size of that vring:
max = vq->vring.num;
...
MemoryRegionCache *desc_cache = &caches->desc;
For indirect descriptors, we initialize a new cache and limit the
number of descriptors by the size of the intermediate descriptor:
len = address_space_cache_init(&indirect_desc_cache,
vdev->dma_as,
desc.addr, desc.len, false);
desc_cache = &indirect_desc_cache;
...
max = desc.len / sizeof(VRingDesc);
However, the first initialization of `max` is done outside the loop
where we process guest descriptors, while the second one is done
inside. This means that a sequence of an indirect descriptor followed
by a direct one will leave a stale value in `max`. If the second
descriptor's `next` field is smaller than the stale value, but
greater than the size of the virtqueue ring (and thus the cached
region), a failed assertion will be triggered in
address_space_read_cached() down the call chain.
Fix this by initializing `max` inside the loop in both functions.
Fixes: 9796d0ac8fb0 ("virtio: use address_space_map/unmap to access descriptors")
Signed-off-by: Carlos López <clopez@suse.de>
Message-Id: <20230302100358.3613-1-clopez@suse.de>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-03-02 11:03:59 +01:00
|
|
|
unsigned int max = vq->vring.num;
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2009-06-17 12:38:28 +02:00
|
|
|
num_bufs = total_bufs;
|
2016-09-21 17:52:26 +02:00
|
|
|
|
|
|
|
if (!virtqueue_get_head(vq, idx++, &i)) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
vring_split_desc_read(vdev, &desc, desc_cache, i);
|
2009-06-17 12:38:28 +02:00
|
|
|
|
2016-01-31 11:29:03 +01:00
|
|
|
if (desc.flags & VRING_DESC_F_INDIRECT) {
|
2019-01-15 11:08:47 +01:00
|
|
|
if (!desc.len || (desc.len % sizeof(VRingDesc))) {
|
2016-09-21 17:52:22 +02:00
|
|
|
virtio_error(vdev, "Invalid size for indirect buffer table");
|
|
|
|
goto err;
|
2009-06-17 12:38:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* If we've got too many, that implies a descriptor loop. */
|
|
|
|
if (num_bufs >= max) {
|
2016-09-21 17:52:22 +02:00
|
|
|
virtio_error(vdev, "Looped descriptor");
|
|
|
|
goto err;
|
2009-06-17 12:38:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* loop over the indirect descriptor table */
|
2017-01-27 16:40:16 +01:00
|
|
|
len = address_space_cache_init(&indirect_desc_cache,
|
|
|
|
vdev->dma_as,
|
|
|
|
desc.addr, desc.len, false);
|
|
|
|
desc_cache = &indirect_desc_cache;
|
2017-01-27 16:40:14 +01:00
|
|
|
if (len < desc.len) {
|
|
|
|
virtio_error(vdev, "Cannot map indirect buffer");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:03 +01:00
|
|
|
max = desc.len / sizeof(VRingDesc);
|
2013-08-22 08:47:16 +02:00
|
|
|
num_bufs = i = 0;
|
2019-10-25 10:35:24 +02:00
|
|
|
vring_split_desc_read(vdev, &desc, desc_cache, i);
|
2009-06-17 12:38:28 +02:00
|
|
|
}
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
do {
|
|
|
|
/* If we've got too many, that implies a descriptor loop. */
|
2009-06-17 12:37:32 +02:00
|
|
|
if (++num_bufs > max) {
|
2016-09-21 17:52:22 +02:00
|
|
|
virtio_error(vdev, "Looped descriptor");
|
|
|
|
goto err;
|
2008-12-04 22:28:28 +01:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2016-01-31 11:29:03 +01:00
|
|
|
if (desc.flags & VRING_DESC_F_WRITE) {
|
|
|
|
in_total += desc.len;
|
2008-12-04 20:38:57 +01:00
|
|
|
} else {
|
2016-01-31 11:29:03 +01:00
|
|
|
out_total += desc.len;
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
2012-11-29 23:02:56 +01:00
|
|
|
if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
|
|
|
|
goto done;
|
|
|
|
}
|
2016-09-21 17:52:24 +02:00
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
|
2016-09-21 17:52:24 +02:00
|
|
|
} while (rc == VIRTQUEUE_READ_DESC_MORE);
|
|
|
|
|
|
|
|
if (rc == VIRTQUEUE_READ_DESC_ERROR) {
|
|
|
|
goto err;
|
|
|
|
}
|
2009-06-17 12:38:28 +02:00
|
|
|
|
2017-01-27 16:40:16 +01:00
|
|
|
if (desc_cache == &indirect_desc_cache) {
|
|
|
|
address_space_cache_destroy(&indirect_desc_cache);
|
2009-06-17 12:38:28 +02:00
|
|
|
total_bufs++;
|
2017-01-27 16:40:14 +01:00
|
|
|
} else {
|
|
|
|
total_bufs = num_bufs;
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
2016-09-21 17:52:25 +02:00
|
|
|
|
|
|
|
if (rc < 0) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-11-29 23:02:56 +01:00
|
|
|
done:
|
2017-01-27 16:40:16 +01:00
|
|
|
address_space_cache_destroy(&indirect_desc_cache);
|
2012-09-24 20:35:15 +02:00
|
|
|
if (in_bytes) {
|
2019-10-25 10:35:24 +02:00
|
|
|
*in_bytes = in_total;
|
|
|
|
}
|
|
|
|
if (out_bytes) {
|
|
|
|
*out_bytes = out_total;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
|
|
|
|
err:
|
|
|
|
in_total = out_total = 0;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int virtqueue_packed_read_next_desc(VirtQueue *vq,
|
|
|
|
VRingPackedDesc *desc,
|
|
|
|
MemoryRegionCache
|
|
|
|
*desc_cache,
|
|
|
|
unsigned int max,
|
|
|
|
unsigned int *next,
|
|
|
|
bool indirect)
|
|
|
|
{
|
|
|
|
/* If this descriptor says it doesn't chain, we're done. */
|
|
|
|
if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
|
|
|
|
return VIRTQUEUE_READ_DESC_DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
++*next;
|
|
|
|
if (*next == max) {
|
|
|
|
if (indirect) {
|
|
|
|
return VIRTQUEUE_READ_DESC_DONE;
|
|
|
|
} else {
|
|
|
|
(*next) -= vq->vring.num;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
|
|
|
|
return VIRTQUEUE_READ_DESC_MORE;
|
|
|
|
}
|
|
|
|
|
2021-09-06 12:43:18 +02:00
|
|
|
/* Called within rcu_read_lock(). */
|
2019-10-25 10:35:24 +02:00
|
|
|
static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
|
|
|
|
unsigned int *in_bytes,
|
|
|
|
unsigned int *out_bytes,
|
|
|
|
unsigned max_in_bytes,
|
2021-09-06 12:43:18 +02:00
|
|
|
unsigned max_out_bytes,
|
|
|
|
VRingMemoryRegionCaches *caches)
|
2019-10-25 10:35:24 +02:00
|
|
|
{
|
|
|
|
VirtIODevice *vdev = vq->vdev;
|
virtio: fix reachable assertion due to stale value of cached region size
In virtqueue_{split,packed}_get_avail_bytes() descriptors are read
in a loop via MemoryRegionCache regions and calls to
vring_{split,packed}_desc_read() - these take a region cache and the
index of the descriptor to be read.
For direct descriptors we use a cache provided by the caller, whose
size matches that of the virtqueue vring. We limit the number of
descriptors we can read by the size of that vring:
max = vq->vring.num;
...
MemoryRegionCache *desc_cache = &caches->desc;
For indirect descriptors, we initialize a new cache and limit the
number of descriptors by the size of the intermediate descriptor:
len = address_space_cache_init(&indirect_desc_cache,
vdev->dma_as,
desc.addr, desc.len, false);
desc_cache = &indirect_desc_cache;
...
max = desc.len / sizeof(VRingDesc);
However, the first initialization of `max` is done outside the loop
where we process guest descriptors, while the second one is done
inside. This means that a sequence of an indirect descriptor followed
by a direct one will leave a stale value in `max`. If the second
descriptor's `next` field is smaller than the stale value, but
greater than the size of the virtqueue ring (and thus the cached
region), a failed assertion will be triggered in
address_space_read_cached() down the call chain.
Fix this by initializing `max` inside the loop in both functions.
Fixes: 9796d0ac8fb0 ("virtio: use address_space_map/unmap to access descriptors")
Signed-off-by: Carlos López <clopez@suse.de>
Message-Id: <20230302100358.3613-1-clopez@suse.de>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-03-02 11:03:59 +01:00
|
|
|
unsigned int idx;
|
2019-10-25 10:35:24 +02:00
|
|
|
unsigned int total_bufs, in_total, out_total;
|
|
|
|
MemoryRegionCache *desc_cache;
|
|
|
|
MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
|
|
|
|
int64_t len = 0;
|
|
|
|
VRingPackedDesc desc;
|
|
|
|
bool wrap_counter;
|
|
|
|
|
|
|
|
idx = vq->last_avail_idx;
|
|
|
|
wrap_counter = vq->last_avail_wrap_counter;
|
|
|
|
total_bufs = in_total = out_total = 0;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
unsigned int num_bufs = total_bufs;
|
|
|
|
unsigned int i = idx;
|
|
|
|
int rc;
|
virtio: fix reachable assertion due to stale value of cached region size
In virtqueue_{split,packed}_get_avail_bytes() descriptors are read
in a loop via MemoryRegionCache regions and calls to
vring_{split,packed}_desc_read() - these take a region cache and the
index of the descriptor to be read.
For direct descriptors we use a cache provided by the caller, whose
size matches that of the virtqueue vring. We limit the number of
descriptors we can read by the size of that vring:
max = vq->vring.num;
...
MemoryRegionCache *desc_cache = &caches->desc;
For indirect descriptors, we initialize a new cache and limit the
number of descriptors by the size of the intermediate descriptor:
len = address_space_cache_init(&indirect_desc_cache,
vdev->dma_as,
desc.addr, desc.len, false);
desc_cache = &indirect_desc_cache;
...
max = desc.len / sizeof(VRingDesc);
However, the first initialization of `max` is done outside the loop
where we process guest descriptors, while the second one is done
inside. This means that a sequence of an indirect descriptor followed
by a direct one will leave a stale value in `max`. If the second
descriptor's `next` field is smaller than the stale value, but
greater than the size of the virtqueue ring (and thus the cached
region), a failed assertion will be triggered in
address_space_read_cached() down the call chain.
Fix this by initializing `max` inside the loop in both functions.
Fixes: 9796d0ac8fb0 ("virtio: use address_space_map/unmap to access descriptors")
Signed-off-by: Carlos López <clopez@suse.de>
Message-Id: <20230302100358.3613-1-clopez@suse.de>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-03-02 11:03:59 +01:00
|
|
|
unsigned int max = vq->vring.num;
|
2019-10-25 10:35:24 +02:00
|
|
|
|
|
|
|
desc_cache = &caches->desc;
|
virtio: fix reachable assertion due to stale value of cached region size
In virtqueue_{split,packed}_get_avail_bytes() descriptors are read
in a loop via MemoryRegionCache regions and calls to
vring_{split,packed}_desc_read() - these take a region cache and the
index of the descriptor to be read.
For direct descriptors we use a cache provided by the caller, whose
size matches that of the virtqueue vring. We limit the number of
descriptors we can read by the size of that vring:
max = vq->vring.num;
...
MemoryRegionCache *desc_cache = &caches->desc;
For indirect descriptors, we initialize a new cache and limit the
number of descriptors by the size of the intermediate descriptor:
len = address_space_cache_init(&indirect_desc_cache,
vdev->dma_as,
desc.addr, desc.len, false);
desc_cache = &indirect_desc_cache;
...
max = desc.len / sizeof(VRingDesc);
However, the first initialization of `max` is done outside the loop
where we process guest descriptors, while the second one is done
inside. This means that a sequence of an indirect descriptor followed
by a direct one will leave a stale value in `max`. If the second
descriptor's `next` field is smaller than the stale value, but
greater than the size of the virtqueue ring (and thus the cached
region), a failed assertion will be triggered in
address_space_read_cached() down the call chain.
Fix this by initializing `max` inside the loop in both functions.
Fixes: 9796d0ac8fb0 ("virtio: use address_space_map/unmap to access descriptors")
Signed-off-by: Carlos López <clopez@suse.de>
Message-Id: <20230302100358.3613-1-clopez@suse.de>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-03-02 11:03:59 +01:00
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
|
|
|
|
if (!is_desc_avail(desc.flags, wrap_counter)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (desc.flags & VRING_DESC_F_INDIRECT) {
|
|
|
|
if (desc.len % sizeof(VRingPackedDesc)) {
|
|
|
|
virtio_error(vdev, "Invalid size for indirect buffer table");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we've got too many, that implies a descriptor loop. */
|
|
|
|
if (num_bufs >= max) {
|
|
|
|
virtio_error(vdev, "Looped descriptor");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* loop over the indirect descriptor table */
|
|
|
|
len = address_space_cache_init(&indirect_desc_cache,
|
|
|
|
vdev->dma_as,
|
|
|
|
desc.addr, desc.len, false);
|
|
|
|
desc_cache = &indirect_desc_cache;
|
|
|
|
if (len < desc.len) {
|
|
|
|
virtio_error(vdev, "Cannot map indirect buffer");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
max = desc.len / sizeof(VRingPackedDesc);
|
|
|
|
num_bufs = i = 0;
|
|
|
|
vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
/* If we've got too many, that implies a descriptor loop. */
|
|
|
|
if (++num_bufs > max) {
|
|
|
|
virtio_error(vdev, "Looped descriptor");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (desc.flags & VRING_DESC_F_WRITE) {
|
|
|
|
in_total += desc.len;
|
|
|
|
} else {
|
|
|
|
out_total += desc.len;
|
|
|
|
}
|
|
|
|
if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
|
|
|
|
&i, desc_cache ==
|
|
|
|
&indirect_desc_cache);
|
|
|
|
} while (rc == VIRTQUEUE_READ_DESC_MORE);
|
|
|
|
|
|
|
|
if (desc_cache == &indirect_desc_cache) {
|
|
|
|
address_space_cache_destroy(&indirect_desc_cache);
|
|
|
|
total_bufs++;
|
|
|
|
idx++;
|
|
|
|
} else {
|
|
|
|
idx += num_bufs - total_bufs;
|
|
|
|
total_bufs = num_bufs;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (idx >= vq->vring.num) {
|
|
|
|
idx -= vq->vring.num;
|
|
|
|
wrap_counter ^= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Record the index and wrap counter for a kick we want */
|
|
|
|
vq->shadow_avail_idx = idx;
|
|
|
|
vq->shadow_avail_wrap_counter = wrap_counter;
|
|
|
|
done:
|
|
|
|
address_space_cache_destroy(&indirect_desc_cache);
|
|
|
|
if (in_bytes) {
|
|
|
|
*in_bytes = in_total;
|
|
|
|
}
|
|
|
|
if (out_bytes) {
|
|
|
|
*out_bytes = out_total;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
|
|
|
|
err:
|
|
|
|
in_total = out_total = 0;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
|
|
|
unsigned int *out_bytes,
|
|
|
|
unsigned max_in_bytes, unsigned max_out_bytes)
|
|
|
|
{
|
|
|
|
uint16_t desc_size;
|
|
|
|
VRingMemoryRegionCaches *caches;
|
|
|
|
|
2021-09-06 12:43:18 +02:00
|
|
|
RCU_READ_LOCK_GUARD();
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
if (unlikely(!vq->vring.desc)) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
caches = vring_get_region_caches(vq);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
if (!caches) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
|
|
|
|
sizeof(VRingPackedDesc) : sizeof(VRingDesc);
|
|
|
|
if (caches->desc.len < vq->vring.num * desc_size) {
|
|
|
|
virtio_error(vq->vdev, "Cannot map descriptor ring");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
|
2021-09-06 12:43:18 +02:00
|
|
|
max_in_bytes, max_out_bytes,
|
|
|
|
caches);
|
2019-10-25 10:35:24 +02:00
|
|
|
} else {
|
|
|
|
virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
|
2021-09-06 12:43:18 +02:00
|
|
|
max_in_bytes, max_out_bytes,
|
|
|
|
caches);
|
2019-10-25 10:35:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
err:
|
|
|
|
if (in_bytes) {
|
|
|
|
*in_bytes = 0;
|
2012-09-24 20:35:15 +02:00
|
|
|
}
|
|
|
|
if (out_bytes) {
|
2019-10-25 10:35:24 +02:00
|
|
|
*out_bytes = 0;
|
2012-09-24 20:35:15 +02:00
|
|
|
}
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2012-09-24 20:35:15 +02:00
|
|
|
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
|
|
|
|
unsigned int out_bytes)
|
|
|
|
{
|
|
|
|
unsigned int in_total, out_total;
|
|
|
|
|
2012-11-29 23:02:56 +01:00
|
|
|
virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
|
|
|
|
return in_bytes <= in_total && out_bytes <= out_total;
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2016-09-21 17:52:21 +02:00
|
|
|
static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
|
|
|
|
hwaddr *addr, struct iovec *iov,
|
2016-01-31 11:29:01 +01:00
|
|
|
unsigned int max_num_sg, bool is_write,
|
|
|
|
hwaddr pa, size_t sz)
|
|
|
|
{
|
2016-09-21 17:52:21 +02:00
|
|
|
bool ok = false;
|
2016-01-31 11:29:01 +01:00
|
|
|
unsigned num_sg = *p_num_sg;
|
|
|
|
assert(num_sg <= max_num_sg);
|
|
|
|
|
2016-07-27 17:37:56 +02:00
|
|
|
if (!sz) {
|
2016-09-21 17:52:21 +02:00
|
|
|
virtio_error(vdev, "virtio: zero sized buffers are not allowed");
|
|
|
|
goto out;
|
2016-07-27 17:37:56 +02:00
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:01 +01:00
|
|
|
while (sz) {
|
|
|
|
hwaddr len = sz;
|
|
|
|
|
|
|
|
if (num_sg == max_num_sg) {
|
2016-09-21 17:52:21 +02:00
|
|
|
virtio_error(vdev, "virtio: too many write descriptors in "
|
|
|
|
"indirect table");
|
|
|
|
goto out;
|
2016-01-31 11:29:01 +01:00
|
|
|
}
|
|
|
|
|
2016-12-30 11:09:10 +01:00
|
|
|
iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
|
|
|
|
is_write ?
|
|
|
|
DMA_DIRECTION_FROM_DEVICE :
|
dma: Let dma_memory_map() take MemTxAttrs argument
Let devices specify transaction attributes when calling
dma_memory_map().
Patch created mechanically using spatch with this script:
@@
expression E1, E2, E3, E4;
@@
- dma_memory_map(E1, E2, E3, E4)
+ dma_memory_map(E1, E2, E3, E4, MEMTXATTRS_UNSPECIFIED)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Li Qiang <liq3ea@gmail.com>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20211223115554.3155328-7-philmd@redhat.com>
2020-09-03 11:00:47 +02:00
|
|
|
DMA_DIRECTION_TO_DEVICE,
|
|
|
|
MEMTXATTRS_UNSPECIFIED);
|
2016-09-19 20:25:45 +02:00
|
|
|
if (!iov[num_sg].iov_base) {
|
2016-09-21 17:52:21 +02:00
|
|
|
virtio_error(vdev, "virtio: bogus descriptor or out of resources");
|
|
|
|
goto out;
|
2016-09-19 20:25:45 +02:00
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:01 +01:00
|
|
|
iov[num_sg].iov_len = len;
|
|
|
|
addr[num_sg] = pa;
|
|
|
|
|
|
|
|
sz -= len;
|
|
|
|
pa += len;
|
|
|
|
num_sg++;
|
|
|
|
}
|
2016-09-21 17:52:21 +02:00
|
|
|
ok = true;
|
|
|
|
|
|
|
|
out:
|
2016-01-31 11:29:01 +01:00
|
|
|
*p_num_sg = num_sg;
|
2016-09-21 17:52:21 +02:00
|
|
|
return ok;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Only used by error code paths before we have a VirtQueueElement (therefore
|
|
|
|
* virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
|
|
|
|
* yet.
|
|
|
|
*/
|
|
|
|
static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
|
|
|
|
struct iovec *iov)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < out_num + in_num; i++) {
|
|
|
|
int is_write = i >= out_num;
|
|
|
|
|
|
|
|
cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
|
|
|
|
iov++;
|
|
|
|
}
|
2016-01-31 11:29:01 +01:00
|
|
|
}
|
|
|
|
|
2016-12-30 11:09:10 +01:00
|
|
|
static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
|
2018-11-02 07:16:36 +01:00
|
|
|
hwaddr *addr, unsigned int num_sg,
|
2020-02-19 19:19:53 +01:00
|
|
|
bool is_write)
|
2010-08-03 16:54:38 +02:00
|
|
|
{
|
|
|
|
unsigned int i;
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr len;
|
2010-08-03 16:54:38 +02:00
|
|
|
|
2018-11-02 07:16:36 +01:00
|
|
|
for (i = 0; i < num_sg; i++) {
|
2010-08-03 16:54:38 +02:00
|
|
|
len = sg[i].iov_len;
|
2016-12-30 11:09:10 +01:00
|
|
|
sg[i].iov_base = dma_memory_map(vdev->dma_as,
|
|
|
|
addr[i], &len, is_write ?
|
|
|
|
DMA_DIRECTION_FROM_DEVICE :
|
dma: Let dma_memory_map() take MemTxAttrs argument
Let devices specify transaction attributes when calling
dma_memory_map().
Patch created mechanically using spatch with this script:
@@
expression E1, E2, E3, E4;
@@
- dma_memory_map(E1, E2, E3, E4)
+ dma_memory_map(E1, E2, E3, E4, MEMTXATTRS_UNSPECIFIED)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Li Qiang <liq3ea@gmail.com>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20211223115554.3155328-7-philmd@redhat.com>
2020-09-03 11:00:47 +02:00
|
|
|
DMA_DIRECTION_TO_DEVICE,
|
|
|
|
MEMTXATTRS_UNSPECIFIED);
|
2015-10-27 09:01:44 +01:00
|
|
|
if (!sg[i].iov_base) {
|
2014-06-10 17:56:27 +02:00
|
|
|
error_report("virtio: error trying to map MMIO memory");
|
2010-08-03 16:54:38 +02:00
|
|
|
exit(1);
|
|
|
|
}
|
2016-01-31 11:29:01 +01:00
|
|
|
if (len != sg[i].iov_len) {
|
|
|
|
error_report("virtio: unexpected memory split");
|
2015-10-27 09:01:44 +01:00
|
|
|
exit(1);
|
|
|
|
}
|
2010-08-03 16:54:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-30 11:09:10 +01:00
|
|
|
void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
|
2015-10-27 09:01:44 +01:00
|
|
|
{
|
2020-02-19 19:19:53 +01:00
|
|
|
virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
|
|
|
|
virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
|
|
|
|
false);
|
2016-01-31 11:29:00 +01:00
|
|
|
}
|
|
|
|
|
2016-11-03 09:55:50 +01:00
|
|
|
static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
|
2016-01-31 11:29:00 +01:00
|
|
|
{
|
|
|
|
VirtQueueElement *elem;
|
|
|
|
size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
|
|
|
|
size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
|
|
|
|
size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
|
|
|
|
size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
|
|
|
|
size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
|
|
|
|
size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
|
|
|
|
|
|
|
|
assert(sz >= sizeof(VirtQueueElement));
|
|
|
|
elem = g_malloc(out_sg_end);
|
2017-06-01 15:54:47 +02:00
|
|
|
trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
|
2016-01-31 11:29:00 +01:00
|
|
|
elem->out_num = out_num;
|
|
|
|
elem->in_num = in_num;
|
|
|
|
elem->in_addr = (void *)elem + in_addr_ofs;
|
|
|
|
elem->out_addr = (void *)elem + out_addr_ofs;
|
|
|
|
elem->in_sg = (void *)elem + in_sg_ofs;
|
|
|
|
elem->out_sg = (void *)elem + out_sg_ofs;
|
|
|
|
return elem;
|
2015-10-27 09:01:44 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2009-06-17 12:37:32 +02:00
|
|
|
unsigned int i, head, max;
|
2017-01-27 16:40:18 +01:00
|
|
|
VRingMemoryRegionCaches *caches;
|
2017-01-27 16:40:16 +01:00
|
|
|
MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
|
|
|
|
MemoryRegionCache *desc_cache;
|
|
|
|
int64_t len;
|
2014-06-24 19:40:16 +02:00
|
|
|
VirtIODevice *vdev = vq->vdev;
|
2017-01-27 16:40:14 +01:00
|
|
|
VirtQueueElement *elem = NULL;
|
2017-09-20 08:09:33 +02:00
|
|
|
unsigned out_num, in_num, elem_entries;
|
2016-01-31 11:29:01 +01:00
|
|
|
hwaddr addr[VIRTQUEUE_MAX_SIZE];
|
|
|
|
struct iovec iov[VIRTQUEUE_MAX_SIZE];
|
2016-01-31 11:29:03 +01:00
|
|
|
VRingDesc desc;
|
2016-09-21 17:52:24 +02:00
|
|
|
int rc;
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2019-10-28 17:11:09 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2017-01-27 16:40:20 +01:00
|
|
|
if (virtio_queue_empty_rcu(vq)) {
|
|
|
|
goto done;
|
2016-02-04 15:26:51 +01:00
|
|
|
}
|
2016-01-31 11:29:05 +01:00
|
|
|
/* Needed after virtio_queue_empty(), see comment in
|
|
|
|
* virtqueue_num_heads(). */
|
|
|
|
smp_rmb();
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
/* When we start there are none of either input nor output. */
|
2017-09-20 08:09:33 +02:00
|
|
|
out_num = in_num = elem_entries = 0;
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2009-06-17 12:37:32 +02:00
|
|
|
max = vq->vring.num;
|
|
|
|
|
2016-07-19 14:07:13 +02:00
|
|
|
if (vq->inuse >= vq->vring.num) {
|
2016-09-21 17:52:21 +02:00
|
|
|
virtio_error(vdev, "Virtqueue size exceeded");
|
2017-01-27 16:40:20 +01:00
|
|
|
goto done;
|
2016-07-19 14:07:13 +02:00
|
|
|
}
|
|
|
|
|
2016-09-21 17:52:26 +02:00
|
|
|
if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
|
2017-01-27 16:40:20 +01:00
|
|
|
goto done;
|
2016-09-21 17:52:26 +02:00
|
|
|
}
|
|
|
|
|
2015-08-17 11:48:29 +02:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
|
2015-02-16 22:35:46 +01:00
|
|
|
vring_set_avail_event(vq, vq->last_avail_idx);
|
2011-06-12 15:21:57 +02:00
|
|
|
}
|
2009-06-17 12:38:28 +02:00
|
|
|
|
2016-09-21 17:52:26 +02:00
|
|
|
i = head;
|
2017-01-27 16:40:14 +01:00
|
|
|
|
2017-03-15 12:48:31 +01:00
|
|
|
caches = vring_get_region_caches(vq);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
if (!caches) {
|
|
|
|
virtio_error(vdev, "Region caches not initialized");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:18 +01:00
|
|
|
if (caches->desc.len < max * sizeof(VRingDesc)) {
|
2017-01-27 16:40:14 +01:00
|
|
|
virtio_error(vdev, "Cannot map descriptor ring");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:18 +01:00
|
|
|
desc_cache = &caches->desc;
|
2019-10-25 10:35:24 +02:00
|
|
|
vring_split_desc_read(vdev, &desc, desc_cache, i);
|
2016-01-31 11:29:03 +01:00
|
|
|
if (desc.flags & VRING_DESC_F_INDIRECT) {
|
2019-01-15 11:08:47 +01:00
|
|
|
if (!desc.len || (desc.len % sizeof(VRingDesc))) {
|
2016-09-21 17:52:21 +02:00
|
|
|
virtio_error(vdev, "Invalid size for indirect buffer table");
|
2017-01-27 16:40:14 +01:00
|
|
|
goto done;
|
2009-06-17 12:38:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* loop over the indirect descriptor table */
|
2017-01-27 16:40:16 +01:00
|
|
|
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
|
|
|
|
desc.addr, desc.len, false);
|
|
|
|
desc_cache = &indirect_desc_cache;
|
2017-01-27 16:40:14 +01:00
|
|
|
if (len < desc.len) {
|
|
|
|
virtio_error(vdev, "Cannot map indirect buffer");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:03 +01:00
|
|
|
max = desc.len / sizeof(VRingDesc);
|
2009-06-17 12:38:28 +02:00
|
|
|
i = 0;
|
2019-10-25 10:35:24 +02:00
|
|
|
vring_split_desc_read(vdev, &desc, desc_cache, i);
|
2009-06-17 12:38:28 +02:00
|
|
|
}
|
|
|
|
|
2010-08-03 16:54:38 +02:00
|
|
|
/* Collect all the descriptors */
|
2008-12-04 20:38:57 +01:00
|
|
|
do {
|
2016-09-21 17:52:21 +02:00
|
|
|
bool map_ok;
|
|
|
|
|
2016-01-31 11:29:03 +01:00
|
|
|
if (desc.flags & VRING_DESC_F_WRITE) {
|
2016-09-21 17:52:21 +02:00
|
|
|
map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
|
|
|
|
iov + out_num,
|
|
|
|
VIRTQUEUE_MAX_SIZE - out_num, true,
|
|
|
|
desc.addr, desc.len);
|
2010-08-03 16:54:38 +02:00
|
|
|
} else {
|
2016-01-31 11:29:01 +01:00
|
|
|
if (in_num) {
|
2016-09-21 17:52:21 +02:00
|
|
|
virtio_error(vdev, "Incorrect order for descriptors");
|
|
|
|
goto err_undo_map;
|
2011-06-20 12:42:27 +02:00
|
|
|
}
|
2016-09-21 17:52:21 +02:00
|
|
|
map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
|
|
|
|
VIRTQUEUE_MAX_SIZE, false,
|
|
|
|
desc.addr, desc.len);
|
|
|
|
}
|
|
|
|
if (!map_ok) {
|
|
|
|
goto err_undo_map;
|
2010-08-03 16:54:38 +02:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
/* If we've got too many, that implies a descriptor loop. */
|
2017-09-20 08:09:33 +02:00
|
|
|
if (++elem_entries > max) {
|
2016-09-21 17:52:21 +02:00
|
|
|
virtio_error(vdev, "Looped descriptor");
|
|
|
|
goto err_undo_map;
|
2008-12-04 22:28:28 +01:00
|
|
|
}
|
2016-09-21 17:52:24 +02:00
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
|
2016-09-21 17:52:24 +02:00
|
|
|
} while (rc == VIRTQUEUE_READ_DESC_MORE);
|
|
|
|
|
|
|
|
if (rc == VIRTQUEUE_READ_DESC_ERROR) {
|
|
|
|
goto err_undo_map;
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2016-01-31 11:29:01 +01:00
|
|
|
/* Now copy what we have collected and mapped */
|
|
|
|
elem = virtqueue_alloc_element(sz, out_num, in_num);
|
2008-12-04 20:38:57 +01:00
|
|
|
elem->index = head;
|
2019-10-25 10:35:24 +02:00
|
|
|
elem->ndescs = 1;
|
2016-01-31 11:29:01 +01:00
|
|
|
for (i = 0; i < out_num; i++) {
|
|
|
|
elem->out_addr[i] = addr[i];
|
|
|
|
elem->out_sg[i] = iov[i];
|
|
|
|
}
|
|
|
|
for (i = 0; i < in_num; i++) {
|
|
|
|
elem->in_addr[i] = addr[out_num + i];
|
|
|
|
elem->in_sg[i] = iov[out_num + i];
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
vq->inuse++;
|
|
|
|
|
2010-05-24 14:19:21 +02:00
|
|
|
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
|
2017-01-27 16:40:14 +01:00
|
|
|
done:
|
2017-01-27 16:40:16 +01:00
|
|
|
address_space_cache_destroy(&indirect_desc_cache);
|
2017-01-27 16:40:14 +01:00
|
|
|
|
2016-02-04 15:26:51 +01:00
|
|
|
return elem;
|
2016-09-21 17:52:21 +02:00
|
|
|
|
|
|
|
err_undo_map:
|
|
|
|
virtqueue_undo_map_desc(out_num, in_num, iov);
|
2017-01-27 16:40:14 +01:00
|
|
|
goto done;
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
|
|
|
|
{
|
|
|
|
unsigned int i, max;
|
|
|
|
VRingMemoryRegionCaches *caches;
|
|
|
|
MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
|
|
|
|
MemoryRegionCache *desc_cache;
|
|
|
|
int64_t len;
|
|
|
|
VirtIODevice *vdev = vq->vdev;
|
|
|
|
VirtQueueElement *elem = NULL;
|
|
|
|
unsigned out_num, in_num, elem_entries;
|
|
|
|
hwaddr addr[VIRTQUEUE_MAX_SIZE];
|
|
|
|
struct iovec iov[VIRTQUEUE_MAX_SIZE];
|
|
|
|
VRingPackedDesc desc;
|
|
|
|
uint16_t id;
|
|
|
|
int rc;
|
|
|
|
|
2019-10-28 17:11:09 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2019-10-25 10:35:24 +02:00
|
|
|
if (virtio_queue_packed_empty_rcu(vq)) {
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* When we start there are none of either input nor output. */
|
|
|
|
out_num = in_num = elem_entries = 0;
|
|
|
|
|
|
|
|
max = vq->vring.num;
|
|
|
|
|
|
|
|
if (vq->inuse >= vq->vring.num) {
|
|
|
|
virtio_error(vdev, "Virtqueue size exceeded");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
i = vq->last_avail_idx;
|
|
|
|
|
|
|
|
caches = vring_get_region_caches(vq);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
if (!caches) {
|
|
|
|
virtio_error(vdev, "Region caches not initialized");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
if (caches->desc.len < max * sizeof(VRingDesc)) {
|
|
|
|
virtio_error(vdev, "Cannot map descriptor ring");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
desc_cache = &caches->desc;
|
|
|
|
vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
|
|
|
|
id = desc.id;
|
|
|
|
if (desc.flags & VRING_DESC_F_INDIRECT) {
|
|
|
|
if (desc.len % sizeof(VRingPackedDesc)) {
|
|
|
|
virtio_error(vdev, "Invalid size for indirect buffer table");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* loop over the indirect descriptor table */
|
|
|
|
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
|
|
|
|
desc.addr, desc.len, false);
|
|
|
|
desc_cache = &indirect_desc_cache;
|
|
|
|
if (len < desc.len) {
|
|
|
|
virtio_error(vdev, "Cannot map indirect buffer");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
max = desc.len / sizeof(VRingPackedDesc);
|
|
|
|
i = 0;
|
|
|
|
vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Collect all the descriptors */
|
|
|
|
do {
|
|
|
|
bool map_ok;
|
|
|
|
|
|
|
|
if (desc.flags & VRING_DESC_F_WRITE) {
|
|
|
|
map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
|
|
|
|
iov + out_num,
|
|
|
|
VIRTQUEUE_MAX_SIZE - out_num, true,
|
|
|
|
desc.addr, desc.len);
|
|
|
|
} else {
|
|
|
|
if (in_num) {
|
|
|
|
virtio_error(vdev, "Incorrect order for descriptors");
|
|
|
|
goto err_undo_map;
|
|
|
|
}
|
|
|
|
map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
|
|
|
|
VIRTQUEUE_MAX_SIZE, false,
|
|
|
|
desc.addr, desc.len);
|
|
|
|
}
|
|
|
|
if (!map_ok) {
|
|
|
|
goto err_undo_map;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we've got too many, that implies a descriptor loop. */
|
|
|
|
if (++elem_entries > max) {
|
|
|
|
virtio_error(vdev, "Looped descriptor");
|
|
|
|
goto err_undo_map;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
|
|
|
|
desc_cache ==
|
|
|
|
&indirect_desc_cache);
|
|
|
|
} while (rc == VIRTQUEUE_READ_DESC_MORE);
|
|
|
|
|
|
|
|
/* Now copy what we have collected and mapped */
|
|
|
|
elem = virtqueue_alloc_element(sz, out_num, in_num);
|
|
|
|
for (i = 0; i < out_num; i++) {
|
|
|
|
elem->out_addr[i] = addr[i];
|
|
|
|
elem->out_sg[i] = iov[i];
|
|
|
|
}
|
|
|
|
for (i = 0; i < in_num; i++) {
|
|
|
|
elem->in_addr[i] = addr[out_num + i];
|
|
|
|
elem->in_sg[i] = iov[out_num + i];
|
|
|
|
}
|
|
|
|
|
|
|
|
elem->index = id;
|
|
|
|
elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
|
|
|
|
vq->last_avail_idx += elem->ndescs;
|
|
|
|
vq->inuse += elem->ndescs;
|
|
|
|
|
|
|
|
if (vq->last_avail_idx >= vq->vring.num) {
|
|
|
|
vq->last_avail_idx -= vq->vring.num;
|
|
|
|
vq->last_avail_wrap_counter ^= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq->shadow_avail_idx = vq->last_avail_idx;
|
|
|
|
vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
|
|
|
|
|
|
|
|
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
|
|
|
|
done:
|
|
|
|
address_space_cache_destroy(&indirect_desc_cache);
|
|
|
|
|
|
|
|
return elem;
|
|
|
|
|
|
|
|
err_undo_map:
|
|
|
|
virtqueue_undo_map_desc(out_num, in_num, iov);
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
void *virtqueue_pop(VirtQueue *vq, size_t sz)
|
|
|
|
{
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
if (virtio_device_disabled(vq->vdev)) {
|
2019-10-25 10:35:24 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
return virtqueue_packed_pop(vq, sz);
|
|
|
|
} else {
|
|
|
|
return virtqueue_split_pop(vq, sz);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
|
2016-12-13 09:12:07 +01:00
|
|
|
{
|
2019-10-25 10:35:24 +02:00
|
|
|
VRingMemoryRegionCaches *caches;
|
|
|
|
MemoryRegionCache *desc_cache;
|
2016-12-13 09:12:07 +01:00
|
|
|
unsigned int dropped = 0;
|
|
|
|
VirtQueueElement elem = {};
|
|
|
|
VirtIODevice *vdev = vq->vdev;
|
2019-10-25 10:35:24 +02:00
|
|
|
VRingPackedDesc desc;
|
2016-12-13 09:12:07 +01:00
|
|
|
|
2021-09-06 12:43:17 +02:00
|
|
|
RCU_READ_LOCK_GUARD();
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
caches = vring_get_region_caches(vq);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
if (!caches) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
desc_cache = &caches->desc;
|
|
|
|
|
|
|
|
virtio_queue_set_notification(vq, 0);
|
|
|
|
|
|
|
|
while (vq->inuse < vq->vring.num) {
|
|
|
|
unsigned int idx = vq->last_avail_idx;
|
|
|
|
/*
|
|
|
|
* works similar to virtqueue_pop but does not map buffers
|
|
|
|
* and does not allocate any memory.
|
|
|
|
*/
|
|
|
|
vring_packed_desc_read(vdev, &desc, desc_cache,
|
|
|
|
vq->last_avail_idx , true);
|
|
|
|
if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
elem.index = desc.id;
|
|
|
|
elem.ndescs = 1;
|
|
|
|
while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
|
|
|
|
vq->vring.num, &idx, false)) {
|
|
|
|
++elem.ndescs;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* immediately push the element, nothing to unmap
|
|
|
|
* as both in_num and out_num are set to 0.
|
|
|
|
*/
|
|
|
|
virtqueue_push(vq, &elem, 0);
|
|
|
|
dropped++;
|
|
|
|
vq->last_avail_idx += elem.ndescs;
|
|
|
|
if (vq->last_avail_idx >= vq->vring.num) {
|
|
|
|
vq->last_avail_idx -= vq->vring.num;
|
|
|
|
vq->last_avail_wrap_counter ^= 1;
|
|
|
|
}
|
2016-12-13 09:12:07 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
return dropped;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
unsigned int dropped = 0;
|
|
|
|
VirtQueueElement elem = {};
|
|
|
|
VirtIODevice *vdev = vq->vdev;
|
|
|
|
bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
|
|
|
|
|
2016-12-13 09:12:07 +01:00
|
|
|
while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
|
|
|
|
/* works similar to virtqueue_pop but does not map buffers
|
|
|
|
* and does not allocate any memory */
|
|
|
|
smp_rmb();
|
|
|
|
if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
vq->inuse++;
|
|
|
|
vq->last_avail_idx++;
|
|
|
|
if (fEventIdx) {
|
|
|
|
vring_set_avail_event(vq, vq->last_avail_idx);
|
|
|
|
}
|
|
|
|
/* immediately push the element, nothing to unmap
|
|
|
|
* as both in_num and out_num are set to 0 */
|
|
|
|
virtqueue_push(vq, &elem, 0);
|
|
|
|
dropped++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return dropped;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
/* virtqueue_drop_all:
|
|
|
|
* @vq: The #VirtQueue
|
|
|
|
* Drops all queued buffers and indicates them to the guest
|
|
|
|
* as if they are done. Useful when buffers can not be
|
|
|
|
* processed but must be returned to the guest.
|
|
|
|
*/
|
|
|
|
unsigned int virtqueue_drop_all(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
struct VirtIODevice *vdev = vq->vdev;
|
|
|
|
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
if (virtio_device_disabled(vq->vdev)) {
|
2019-10-25 10:35:24 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
return virtqueue_packed_drop_all(vq);
|
|
|
|
} else {
|
|
|
|
return virtqueue_split_drop_all(vq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:00 +01:00
|
|
|
/* Reading and writing a structure directly to QEMUFile is *awful*, but
|
|
|
|
* it is what QEMU has always done by mistake. We can change it sooner
|
|
|
|
* or later by bumping the version number of the affected vm states.
|
|
|
|
* In the meanwhile, since the in-memory layout of VirtQueueElement
|
|
|
|
* has changed, we need to marshal to and from the layout that was
|
|
|
|
* used before the change.
|
|
|
|
*/
|
|
|
|
typedef struct VirtQueueElementOld {
|
|
|
|
unsigned int index;
|
|
|
|
unsigned int out_num;
|
|
|
|
unsigned int in_num;
|
|
|
|
hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
|
|
|
|
hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
|
|
|
|
struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
|
|
|
|
struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
|
|
|
|
} VirtQueueElementOld;
|
|
|
|
|
2016-12-30 11:09:10 +01:00
|
|
|
void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
|
2016-01-31 11:28:59 +01:00
|
|
|
{
|
2016-01-31 11:29:00 +01:00
|
|
|
VirtQueueElement *elem;
|
|
|
|
VirtQueueElementOld data;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
|
|
|
|
|
2017-01-18 20:32:22 +01:00
|
|
|
/* TODO: teach all callers that this can fail, and return failure instead
|
|
|
|
* of asserting here.
|
osdep.h: Prohibit disabling assert() in supported builds
We already have several files that knowingly require assert()
to work, sometimes because refactoring the code for proper
error handling has not been tackled yet; there are probably
other files that have a similar situation but with no comments
documenting the same. In fact, we have places in migration
that handle untrusted input with assertions, where disabling
the assertions risks a worse security hole than the current
behavior of losing the guest to SIGABRT when migration fails
because of the assertion. Promote our current per-file
safety-valve to instead be project-wide, and expand it to also
cover glib's g_assert().
Note that we do NOT want to encourage 'assert(side-effects);'
(that is a bad practice that prevents copy-and-paste of code to
other projects that CAN disable assertions; plus it costs
unnecessary reviewer mental cycles to remember whether a project
special-cases the crippling of asserts); and we would LIKE to
fix migration to not rely on asserts (but that takes a big code
audit). But in the meantime, we DO want to send a message
that anyone that disables assertions has to tweak code in order
to compile, making it obvious that they are taking on additional
risk that we are not going to support. At the same time, leave
comments mentioning NDEBUG in files that we know still need to
be scrubbed, so there is at least something to grep for.
It would be possible to come up with some other mechanism for
doing runtime checking by default, but which does not abort
the program on failure, while leaving side effects in place
(unlike how crippling assert() avoids even the side effects),
perhaps under the name q_verify(); but it was not deemed worth
the effort (developers should not have to learn a replacement
when the standard C macro works just fine, and it would be a lot
of churn for little gain). The patch specifically uses #error
rather than #warn so that a user is forced to tweak the header
to acknowledge the issue, even when not using a -Werror
compilation.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20170911211320.25385-1-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-09-11 23:13:20 +02:00
|
|
|
* This is just one thing (there are probably more) that must be
|
|
|
|
* fixed before we can allow NDEBUG compilation.
|
2017-01-18 20:32:22 +01:00
|
|
|
*/
|
|
|
|
assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
|
|
|
|
assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
|
|
|
|
|
2016-01-31 11:29:00 +01:00
|
|
|
elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
|
|
|
|
elem->index = data.index;
|
|
|
|
|
|
|
|
for (i = 0; i < elem->in_num; i++) {
|
|
|
|
elem->in_addr[i] = data.in_addr[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < elem->out_num; i++) {
|
|
|
|
elem->out_addr[i] = data.out_addr[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < elem->in_num; i++) {
|
|
|
|
/* Base is overwritten by virtqueue_map. */
|
|
|
|
elem->in_sg[i].iov_base = 0;
|
|
|
|
elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < elem->out_num; i++) {
|
|
|
|
/* Base is overwritten by virtqueue_map. */
|
|
|
|
elem->out_sg[i].iov_base = 0;
|
|
|
|
elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
qemu_get_be32s(f, &elem->ndescs);
|
|
|
|
}
|
|
|
|
|
2016-12-30 11:09:10 +01:00
|
|
|
virtqueue_map(vdev, elem);
|
2016-01-31 11:28:59 +01:00
|
|
|
return elem;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
|
|
|
|
VirtQueueElement *elem)
|
2016-01-31 11:28:59 +01:00
|
|
|
{
|
2016-01-31 11:29:00 +01:00
|
|
|
VirtQueueElementOld data;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
memset(&data, 0, sizeof(data));
|
|
|
|
data.index = elem->index;
|
|
|
|
data.in_num = elem->in_num;
|
|
|
|
data.out_num = elem->out_num;
|
|
|
|
|
|
|
|
for (i = 0; i < elem->in_num; i++) {
|
|
|
|
data.in_addr[i] = elem->in_addr[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < elem->out_num; i++) {
|
|
|
|
data.out_addr[i] = elem->out_addr[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < elem->in_num; i++) {
|
|
|
|
/* Base is overwritten by virtqueue_map when loading. Do not
|
|
|
|
* save it, as it would leak the QEMU address space layout. */
|
|
|
|
data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < elem->out_num; i++) {
|
|
|
|
/* Do not save iov_base as above. */
|
|
|
|
data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
|
|
|
|
}
|
2019-10-25 10:35:24 +02:00
|
|
|
|
|
|
|
if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
qemu_put_be32s(f, &elem->ndescs);
|
|
|
|
}
|
|
|
|
|
2016-01-31 11:29:00 +01:00
|
|
|
qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
|
2016-01-31 11:28:59 +01:00
|
|
|
}
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
/* virtio device */
|
2009-06-21 18:50:13 +02:00
|
|
|
static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
|
|
|
|
{
|
2013-04-24 10:21:21 +02:00
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
if (virtio_device_disabled(vdev)) {
|
2016-09-21 17:52:19 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-04-24 10:21:21 +02:00
|
|
|
if (k->notify) {
|
|
|
|
k->notify(qbus->parent, vector);
|
2009-06-21 18:50:13 +02:00
|
|
|
}
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2009-05-18 15:51:59 +02:00
|
|
|
void virtio_update_irq(VirtIODevice *vdev)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2009-06-21 18:50:13 +02:00
|
|
|
virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2015-06-04 12:34:15 +02:00
|
|
|
static int virtio_validate_features(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
|
|
|
|
|
2016-12-30 11:09:10 +01:00
|
|
|
if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
|
|
|
|
!virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
2015-06-04 12:34:15 +02:00
|
|
|
if (k->validate_features) {
|
|
|
|
return k->validate_features(vdev);
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int virtio_set_status(VirtIODevice *vdev, uint8_t val)
|
2011-09-13 14:34:37 +02:00
|
|
|
{
|
2013-04-24 10:21:20 +02:00
|
|
|
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
|
2011-09-13 14:34:37 +02:00
|
|
|
trace_virtio_set_status(vdev, val);
|
|
|
|
|
2015-08-17 11:48:29 +02:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
|
2015-06-04 12:34:15 +02:00
|
|
|
if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
|
|
|
|
val & VIRTIO_CONFIG_S_FEATURES_OK) {
|
|
|
|
int ret = virtio_validate_features(vdev);
|
|
|
|
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-06-26 04:31:26 +02:00
|
|
|
|
2019-06-26 04:31:30 +02:00
|
|
|
if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
|
|
|
|
(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
|
|
|
|
virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
|
|
|
|
}
|
2019-03-20 12:26:40 +01:00
|
|
|
|
2013-04-24 10:21:20 +02:00
|
|
|
if (k->set_status) {
|
|
|
|
k->set_status(vdev, val);
|
2011-09-13 14:34:37 +02:00
|
|
|
}
|
|
|
|
vdev->status = val;
|
2019-03-20 12:26:40 +01:00
|
|
|
|
2015-06-04 12:34:15 +02:00
|
|
|
return 0;
|
2011-09-13 14:34:37 +02:00
|
|
|
}
|
|
|
|
|
2014-06-24 19:38:54 +02:00
|
|
|
static enum virtio_device_endian virtio_default_endian(void)
|
|
|
|
{
|
|
|
|
if (target_words_bigendian()) {
|
|
|
|
return VIRTIO_DEVICE_ENDIAN_BIG;
|
|
|
|
} else {
|
|
|
|
return VIRTIO_DEVICE_ENDIAN_LITTLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum virtio_device_endian virtio_current_cpu_endian(void)
|
|
|
|
{
|
2021-05-17 12:51:24 +02:00
|
|
|
if (cpu_virtio_is_big_endian(current_cpu)) {
|
2014-06-24 19:38:54 +02:00
|
|
|
return VIRTIO_DEVICE_ENDIAN_BIG;
|
|
|
|
} else {
|
|
|
|
return VIRTIO_DEVICE_ENDIAN_LITTLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-17 11:25:44 +02:00
|
|
|
static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
|
|
|
|
{
|
|
|
|
vdev->vq[i].vring.desc = 0;
|
|
|
|
vdev->vq[i].vring.avail = 0;
|
|
|
|
vdev->vq[i].vring.used = 0;
|
|
|
|
vdev->vq[i].last_avail_idx = 0;
|
|
|
|
vdev->vq[i].shadow_avail_idx = 0;
|
|
|
|
vdev->vq[i].used_idx = 0;
|
|
|
|
vdev->vq[i].last_avail_wrap_counter = true;
|
|
|
|
vdev->vq[i].shadow_avail_wrap_counter = true;
|
|
|
|
vdev->vq[i].used_wrap_counter = true;
|
|
|
|
virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
|
|
|
|
vdev->vq[i].signalled_used = 0;
|
|
|
|
vdev->vq[i].signalled_used_valid = false;
|
|
|
|
vdev->vq[i].notification = true;
|
|
|
|
vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
|
|
|
|
vdev->vq[i].inuse = 0;
|
|
|
|
virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
|
|
|
|
}
|
|
|
|
|
2022-10-17 11:25:45 +02:00
|
|
|
void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
|
|
|
|
{
|
|
|
|
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
|
|
|
|
|
|
|
|
if (k->queue_reset) {
|
|
|
|
k->queue_reset(vdev, queue_index);
|
|
|
|
}
|
|
|
|
|
|
|
|
__virtio_queue_reset(vdev, queue_index);
|
|
|
|
}
|
|
|
|
|
2022-10-17 11:25:46 +02:00
|
|
|
void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
|
|
|
|
{
|
|
|
|
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
|
|
|
|
|
2022-11-21 21:01:50 +01:00
|
|
|
/*
|
|
|
|
* TODO: Seabios is currently out of spec and triggering this error.
|
|
|
|
* So this needs to be fixed in Seabios, then this can
|
|
|
|
* be re-enabled for new machine types only, and also after
|
|
|
|
* being converted to LOG_GUEST_ERROR.
|
|
|
|
*
|
2022-10-17 11:25:46 +02:00
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
|
|
|
|
error_report("queue_enable is only suppported in devices of virtio "
|
|
|
|
"1.0 or later.");
|
|
|
|
}
|
2022-11-21 21:01:50 +01:00
|
|
|
*/
|
2022-10-17 11:25:46 +02:00
|
|
|
|
|
|
|
if (k->queue_enable) {
|
|
|
|
k->queue_enable(vdev, queue_index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-18 15:51:59 +02:00
|
|
|
void virtio_reset(void *opaque)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
2013-04-24 10:21:20 +02:00
|
|
|
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
|
2008-12-04 20:38:57 +01:00
|
|
|
int i;
|
|
|
|
|
2010-09-27 18:32:52 +02:00
|
|
|
virtio_set_status(vdev, 0);
|
2014-06-24 19:38:54 +02:00
|
|
|
if (current_cpu) {
|
|
|
|
/* Guest initiated reset */
|
|
|
|
vdev->device_endian = virtio_current_cpu_endian();
|
|
|
|
} else {
|
|
|
|
/* System reset */
|
|
|
|
vdev->device_endian = virtio_default_endian();
|
|
|
|
}
|
2010-09-27 18:32:52 +02:00
|
|
|
|
2013-04-24 10:21:20 +02:00
|
|
|
if (k->reset) {
|
|
|
|
k->reset(vdev);
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2019-06-26 04:31:28 +02:00
|
|
|
vdev->start_on_kick = false;
|
2019-03-20 12:26:40 +01:00
|
|
|
vdev->started = false;
|
2016-09-21 17:52:19 +02:00
|
|
|
vdev->broken = false;
|
2010-01-10 12:52:47 +01:00
|
|
|
vdev->guest_features = 0;
|
2008-12-04 20:38:57 +01:00
|
|
|
vdev->queue_sel = 0;
|
|
|
|
vdev->status = 0;
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
vdev->disabled = false;
|
2020-09-23 12:56:46 +02:00
|
|
|
qatomic_set(&vdev->isr, 0);
|
2009-06-21 18:50:13 +02:00
|
|
|
vdev->config_vector = VIRTIO_NO_VECTOR;
|
|
|
|
virtio_notify_vector(vdev, vdev->config_vector);
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2015-05-29 08:15:31 +02:00
|
|
|
for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
2022-10-17 11:25:44 +02:00
|
|
|
__virtio_queue_reset(vdev, i);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-23 12:30:10 +02:00
|
|
|
void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2017-11-29 18:44:27 +01:00
|
|
|
if (!vdev->vq[n].vring.num) {
|
|
|
|
return;
|
|
|
|
}
|
2015-06-04 12:34:12 +02:00
|
|
|
vdev->vq[n].vring.desc = addr;
|
|
|
|
virtio_queue_update_rings(vdev, n);
|
2009-05-18 15:51:59 +02:00
|
|
|
}
|
|
|
|
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
|
2009-05-18 15:51:59 +02:00
|
|
|
{
|
2015-06-04 12:34:12 +02:00
|
|
|
return vdev->vq[n].vring.desc;
|
|
|
|
}
|
|
|
|
|
|
|
|
void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
|
|
|
|
hwaddr avail, hwaddr used)
|
|
|
|
{
|
2017-11-29 18:44:27 +01:00
|
|
|
if (!vdev->vq[n].vring.num) {
|
|
|
|
return;
|
|
|
|
}
|
2015-06-04 12:34:12 +02:00
|
|
|
vdev->vq[n].vring.desc = desc;
|
|
|
|
vdev->vq[n].vring.avail = avail;
|
|
|
|
vdev->vq[n].vring.used = used;
|
2017-01-27 16:40:17 +01:00
|
|
|
virtio_init_region_cache(vdev, n);
|
2009-05-18 15:51:59 +02:00
|
|
|
}
|
|
|
|
|
2013-07-16 14:25:07 +02:00
|
|
|
void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
|
|
|
|
{
|
2013-07-26 17:41:27 +02:00
|
|
|
/* Don't allow guest to flip queue between existent and
|
|
|
|
* nonexistent states, or to set it to an invalid size.
|
|
|
|
*/
|
|
|
|
if (!!num != !!vdev->vq[n].vring.num ||
|
|
|
|
num > VIRTQUEUE_MAX_SIZE ||
|
|
|
|
num < 0) {
|
|
|
|
return;
|
2013-07-16 14:25:07 +02:00
|
|
|
}
|
2013-07-26 17:41:27 +02:00
|
|
|
vdev->vq[n].vring.num = num;
|
2013-07-16 14:25:07 +02:00
|
|
|
}
|
|
|
|
|
2015-04-23 08:21:46 +02:00
|
|
|
VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
|
|
|
|
{
|
|
|
|
return QLIST_FIRST(&vdev->vector_queues[vector]);
|
|
|
|
}
|
|
|
|
|
|
|
|
VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
return QLIST_NEXT(vq, node);
|
|
|
|
}
|
|
|
|
|
2009-05-18 15:51:59 +02:00
|
|
|
int virtio_queue_get_num(VirtIODevice *vdev, int n)
|
|
|
|
{
|
|
|
|
return vdev->vq[n].vring.num;
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2017-01-12 22:26:22 +01:00
|
|
|
int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
|
|
|
|
{
|
|
|
|
return vdev->vq[n].vring.num_default;
|
|
|
|
}
|
|
|
|
|
2015-05-29 08:15:26 +02:00
|
|
|
int virtio_get_num_queues(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2015-05-29 08:15:31 +02:00
|
|
|
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
2015-05-29 08:15:26 +02:00
|
|
|
if (!virtio_queue_get_num(vdev, i)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
2013-07-16 14:25:08 +02:00
|
|
|
void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
|
|
|
|
{
|
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
|
2015-06-04 12:34:12 +02:00
|
|
|
/* virtio-1 compliant devices cannot change the alignment */
|
2015-08-17 11:48:29 +02:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
|
2015-06-04 12:34:12 +02:00
|
|
|
error_report("tried to modify queue alignment for virtio-1 device");
|
|
|
|
return;
|
|
|
|
}
|
2013-07-16 14:25:08 +02:00
|
|
|
/* Check that the transport told us it was going to do this
|
|
|
|
* (so a buggy transport will immediately assert rather than
|
|
|
|
* silently failing to migrate this state)
|
|
|
|
*/
|
|
|
|
assert(k->has_variable_vring_alignment);
|
|
|
|
|
2017-11-29 18:44:27 +01:00
|
|
|
if (align) {
|
|
|
|
vdev->vq[n].vring.align = align;
|
|
|
|
virtio_queue_update_rings(vdev, n);
|
|
|
|
}
|
2013-07-16 14:25:08 +02:00
|
|
|
}
|
|
|
|
|
2016-04-06 12:16:22 +02:00
|
|
|
static void virtio_queue_notify_vq(VirtQueue *vq)
|
2010-12-17 13:01:50 +01:00
|
|
|
{
|
2015-03-12 10:50:18 +01:00
|
|
|
if (vq->vring.desc && vq->handle_output) {
|
2010-12-17 13:01:50 +01:00
|
|
|
VirtIODevice *vdev = vq->vdev;
|
2015-03-12 10:50:18 +01:00
|
|
|
|
2016-09-21 17:52:19 +02:00
|
|
|
if (unlikely(vdev->broken)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-12-17 13:01:50 +01:00
|
|
|
trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
|
|
|
|
vq->handle_output(vdev, vq);
|
2019-03-20 12:26:40 +01:00
|
|
|
|
|
|
|
if (unlikely(vdev->start_on_kick)) {
|
2019-06-26 04:31:26 +02:00
|
|
|
virtio_set_started(vdev, true);
|
2019-03-20 12:26:40 +01:00
|
|
|
}
|
2010-12-17 13:01:50 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-18 15:51:59 +02:00
|
|
|
void virtio_queue_notify(VirtIODevice *vdev, int n)
|
|
|
|
{
|
2017-02-28 14:21:32 +01:00
|
|
|
VirtQueue *vq = &vdev->vq[n];
|
|
|
|
|
|
|
|
if (unlikely(!vq->vring.desc || vdev->broken)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
|
2019-11-05 15:09:46 +01:00
|
|
|
if (vq->host_notifier_enabled) {
|
2017-02-28 14:21:32 +01:00
|
|
|
event_notifier_set(&vq->host_notifier);
|
|
|
|
} else if (vq->handle_output) {
|
|
|
|
vq->handle_output(vdev, vq);
|
2019-03-20 12:26:40 +01:00
|
|
|
|
2019-06-26 04:31:29 +02:00
|
|
|
if (unlikely(vdev->start_on_kick)) {
|
|
|
|
virtio_set_started(vdev, true);
|
|
|
|
}
|
2019-03-20 12:26:40 +01:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2009-06-21 18:50:13 +02:00
|
|
|
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
|
|
|
|
{
|
2015-05-29 08:15:31 +02:00
|
|
|
return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
|
2009-06-21 18:50:13 +02:00
|
|
|
VIRTIO_NO_VECTOR;
|
|
|
|
}
|
|
|
|
|
|
|
|
void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
|
|
|
|
{
|
2015-04-23 08:21:46 +02:00
|
|
|
VirtQueue *vq = &vdev->vq[n];
|
|
|
|
|
2015-05-29 08:15:31 +02:00
|
|
|
if (n < VIRTIO_QUEUE_MAX) {
|
2015-04-23 08:21:46 +02:00
|
|
|
if (vdev->vector_queues &&
|
|
|
|
vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
|
|
|
|
QLIST_REMOVE(vq, node);
|
|
|
|
}
|
2009-06-21 18:50:13 +02:00
|
|
|
vdev->vq[n].vector = vector;
|
2015-04-23 08:21:46 +02:00
|
|
|
if (vdev->vector_queues &&
|
|
|
|
vector != VIRTIO_NO_VECTOR) {
|
|
|
|
QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
|
|
|
|
}
|
|
|
|
}
|
2009-06-21 18:50:13 +02:00
|
|
|
}
|
|
|
|
|
2016-10-21 22:48:11 +02:00
|
|
|
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
|
|
|
|
VirtIOHandleOutput handle_output)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2015-05-29 08:15:31 +02:00
|
|
|
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
2008-12-04 20:38:57 +01:00
|
|
|
if (vdev->vq[i].vring.num == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-05-29 08:15:31 +02:00
|
|
|
if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
|
2008-12-04 20:38:57 +01:00
|
|
|
abort();
|
|
|
|
|
|
|
|
vdev->vq[i].vring.num = queue_size;
|
2015-09-11 15:16:41 +02:00
|
|
|
vdev->vq[i].vring.num_default = queue_size;
|
2013-07-16 14:25:08 +02:00
|
|
|
vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
|
2008-12-04 20:38:57 +01:00
|
|
|
vdev->vq[i].handle_output = handle_output;
|
2022-03-15 15:41:56 +01:00
|
|
|
vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
return &vdev->vq[i];
|
|
|
|
}
|
|
|
|
|
2019-12-09 17:46:13 +01:00
|
|
|
void virtio_delete_queue(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
vq->vring.num = 0;
|
|
|
|
vq->vring.num_default = 0;
|
|
|
|
vq->handle_output = NULL;
|
|
|
|
g_free(vq->used_elems);
|
2019-12-09 17:47:24 +01:00
|
|
|
vq->used_elems = NULL;
|
2019-12-26 05:36:48 +01:00
|
|
|
virtio_virtqueue_reset_region_cache(vq);
|
2019-12-09 17:46:13 +01:00
|
|
|
}
|
|
|
|
|
2013-01-30 12:12:36 +01:00
|
|
|
void virtio_del_queue(VirtIODevice *vdev, int n)
|
|
|
|
{
|
2015-05-29 08:15:31 +02:00
|
|
|
if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
|
2013-01-30 12:12:36 +01:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2019-12-09 17:46:13 +01:00
|
|
|
virtio_delete_queue(&vdev->vq[n]);
|
2013-01-30 12:12:36 +01:00
|
|
|
}
|
|
|
|
|
2016-11-18 16:07:01 +01:00
|
|
|
static void virtio_set_isr(VirtIODevice *vdev, int value)
|
|
|
|
{
|
2020-09-23 12:56:46 +02:00
|
|
|
uint8_t old = qatomic_read(&vdev->isr);
|
2016-11-18 16:07:01 +01:00
|
|
|
|
|
|
|
/* Do not write ISR if it does not change, so that its cacheline remains
|
|
|
|
* shared in the common case where the guest does not read it.
|
|
|
|
*/
|
|
|
|
if ((old & value) != value) {
|
2020-09-23 12:56:46 +02:00
|
|
|
qatomic_or(&vdev->isr, value);
|
2016-11-18 16:07:01 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-23 11:40:40 +02:00
|
|
|
/* Called within rcu_read_lock(). */
|
2019-10-25 10:35:25 +02:00
|
|
|
static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
|
2011-06-12 15:21:57 +02:00
|
|
|
{
|
|
|
|
uint16_t old, new;
|
|
|
|
bool v;
|
virtio: add missing mb() on notification
During normal operation, virtio first writes a used index
and then checks whether it should interrupt the guest
by reading guest avail index/flag values.
Guest does the reverse: writes the index/flag,
then checks the used ring.
The ordering is important: if host avail flag read bypasses the used
index write, we could in effect get this timing:
host avail flag read
guest enable interrupts: avail flag write
guest check used ring: ring is empty
host used index write
which results in a lost interrupt: guest will never be notified
about the used ring update.
This actually can happen when using kvm with an io thread,
such that the guest vcpu and qemu run on different host cpus,
and this has actually been observed in the field
(but only seems to trigger on very specific processor types)
with userspace virtio: vhost has the necessary smp_mb()
in place to prevent the regordering, so the same workload stalls
forever waiting for an interrupt with vhost=off but works
fine with vhost=on.
Insert an smp_mb barrier operation in userspace virtio to
ensure the correct ordering.
Applying this patch fixed the race condition we have observed.
Tested on x86_64. I checked the code generated by the new macro
for i386 and ppc but didn't run virtio.
Note: mb could in theory be implemented by __sync_synchronize, but this
would make us hit old GCC bugs. Besides old GCC
not implementing __sync_synchronize at all, there were bugs
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793
in this functionality as recently as in 4.3.
As we need asm for rmb,wmb anyway, it's just as well to
use it for mb.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2012-04-22 15:45:53 +02:00
|
|
|
/* We need to expose used array entries before checking used event. */
|
|
|
|
smp_mb();
|
2009-03-20 17:13:50 +01:00
|
|
|
/* Always notify when queue is empty (when feature acknowledge) */
|
2015-08-17 11:48:29 +02:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
|
2016-01-31 11:29:05 +01:00
|
|
|
!vq->inuse && virtio_queue_empty(vq)) {
|
2011-06-12 15:21:57 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-08-17 11:48:29 +02:00
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
|
2011-06-12 15:21:57 +02:00
|
|
|
return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
|
|
|
|
}
|
|
|
|
|
|
|
|
v = vq->signalled_used_valid;
|
|
|
|
vq->signalled_used_valid = true;
|
|
|
|
old = vq->signalled_used;
|
2016-01-31 11:29:04 +01:00
|
|
|
new = vq->signalled_used = vq->used_idx;
|
2015-02-16 22:35:46 +01:00
|
|
|
return !v || vring_need_event(vring_get_used_event(vq), new, old);
|
2011-06-12 15:21:57 +02:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:25 +02:00
|
|
|
static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
|
|
|
|
uint16_t off_wrap, uint16_t new,
|
|
|
|
uint16_t old)
|
|
|
|
{
|
|
|
|
int off = off_wrap & ~(1 << 15);
|
|
|
|
|
|
|
|
if (wrap != off_wrap >> 15) {
|
|
|
|
off -= vq->vring.num;
|
|
|
|
}
|
|
|
|
|
|
|
|
return vring_need_event(off, new, old);
|
|
|
|
}
|
|
|
|
|
2021-05-23 11:40:40 +02:00
|
|
|
/* Called within rcu_read_lock(). */
|
2019-10-25 10:35:25 +02:00
|
|
|
static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
|
|
|
VRingPackedDescEvent e;
|
|
|
|
uint16_t old, new;
|
|
|
|
bool v;
|
|
|
|
VRingMemoryRegionCaches *caches;
|
|
|
|
|
|
|
|
caches = vring_get_region_caches(vq);
|
virtio: gracefully handle invalid region caches
The virtqueue code sets up MemoryRegionCaches to access the virtqueue
guest RAM data structures. The code currently assumes that
VRingMemoryRegionCaches is initialized before device emulation code
accesses the virtqueue. An assertion will fail in
vring_get_region_caches() when this is not true. Device fuzzing found a
case where this assumption is false (see below).
Virtqueue guest RAM addresses can also be changed from a vCPU thread
while an IOThread is accessing the virtqueue. This breaks the same
assumption but this time the caches could become invalid partway through
the virtqueue code. The code fetches the caches RCU pointer multiple
times so we will need to validate the pointer every time it is fetched.
Add checks each time we call vring_get_region_caches() and treat invalid
caches as a nop: memory stores are ignored and memory reads return 0.
The fuzz test failure is as follows:
$ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \
-drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \
-drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \
-display none \
-qtest stdio
endianness
outl 0xcf8 0x80002020
outl 0xcfc 0xe0000000
outl 0xcf8 0x80002004
outw 0xcfc 0x7
write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001
inb 0x4
writew 0xe000001c 0x1
write 0xe0000014 0x1 0x0d
The following error message is produced:
qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed.
The backtrace looks like this:
#0 0x00007ffff5520625 in raise () at /lib64/libc.so.6
#1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6
#2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6
#3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6
#4 0x00005555559073da in vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:286
#5 vring_get_region_caches (vq=<optimized out>) at qemu/hw/virtio/virtio.c:283
#6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398
#8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451
#9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444
#10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775
#11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244
#12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429
#13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460
#14 0x0000555555cb307e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260
#15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219
#17 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242
#18 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518
#19 0x00005555559b20c9 in main_loop () at vl.c:1683
#20 0x0000555555838115 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4441
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Cc: Michael Tsirkin <mst@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200207104619.164892-1-stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-02-07 11:46:19 +01:00
|
|
|
if (!caches) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:25 +02:00
|
|
|
vring_packed_event_read(vdev, &caches->avail, &e);
|
|
|
|
|
|
|
|
old = vq->signalled_used;
|
|
|
|
new = vq->signalled_used = vq->used_idx;
|
|
|
|
v = vq->signalled_used_valid;
|
|
|
|
vq->signalled_used_valid = true;
|
|
|
|
|
|
|
|
if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
|
|
|
|
return false;
|
|
|
|
} else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
|
|
|
|
e.off_wrap, new, old);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Called within rcu_read_lock(). */
|
|
|
|
static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
return virtio_packed_should_notify(vdev, vq);
|
|
|
|
} else {
|
|
|
|
return virtio_split_should_notify(vdev, vq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
virtio: set ISR on dataplane notifications
Dataplane has been omitting forever the step of setting ISR when
an interrupt is raised. This caused little breakage, because the
specification actually says that ISR may not be updated in MSI mode.
Some versions of the Windows drivers however didn't clear MSI mode
correctly, and proceeded using polling mode (using ISR, not the used
ring index!) for crashdump and hibernation. If it were just crashdump
and hibernation it would not be a big deal, but recent releases of
Windows do not really shut down, but rather log out and hibernate to
make the next startup faster. Hence, this manifested as a more serious
hang during shutdown with e.g. Windows 8.1 and virtio-win 1.8.0 RPMs.
Newer versions fixed this, while older versions do not use MSI at all.
The failure has always been there for virtio dataplane, but it became
visible after commits 9ffe337 ("virtio-blk: always use dataplane path
if ioeventfd is active", 2016-10-30) and ad07cd6 ("virtio-scsi: always
use dataplane path if ioeventfd is active", 2016-10-30) made virtio-blk
and virtio-scsi always use the dataplane code under KVM. The good news
therefore is that it was not a bug in the patches---they were doing
exactly what they were meant for, i.e. shake out remaining dataplane bugs.
The fix is not hard, so it's worth arranging for the broken drivers.
The virtio_should_notify+event_notifier_set pair that is common to
virtio-blk and virtio-scsi dataplane is replaced with a new public
function virtio_notify_irqfd that also sets ISR. The irqfd emulation
code now need not set ISR anymore, so virtio_irq is removed.
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-11-18 16:07:02 +01:00
|
|
|
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
2019-10-28 17:11:09 +01:00
|
|
|
WITH_RCU_READ_LOCK_GUARD() {
|
|
|
|
if (!virtio_should_notify(vdev, vq)) {
|
|
|
|
return;
|
|
|
|
}
|
virtio: set ISR on dataplane notifications
Dataplane has been omitting forever the step of setting ISR when
an interrupt is raised. This caused little breakage, because the
specification actually says that ISR may not be updated in MSI mode.
Some versions of the Windows drivers however didn't clear MSI mode
correctly, and proceeded using polling mode (using ISR, not the used
ring index!) for crashdump and hibernation. If it were just crashdump
and hibernation it would not be a big deal, but recent releases of
Windows do not really shut down, but rather log out and hibernate to
make the next startup faster. Hence, this manifested as a more serious
hang during shutdown with e.g. Windows 8.1 and virtio-win 1.8.0 RPMs.
Newer versions fixed this, while older versions do not use MSI at all.
The failure has always been there for virtio dataplane, but it became
visible after commits 9ffe337 ("virtio-blk: always use dataplane path
if ioeventfd is active", 2016-10-30) and ad07cd6 ("virtio-scsi: always
use dataplane path if ioeventfd is active", 2016-10-30) made virtio-blk
and virtio-scsi always use the dataplane code under KVM. The good news
therefore is that it was not a bug in the patches---they were doing
exactly what they were meant for, i.e. shake out remaining dataplane bugs.
The fix is not hard, so it's worth arranging for the broken drivers.
The virtio_should_notify+event_notifier_set pair that is common to
virtio-blk and virtio-scsi dataplane is replaced with a new public
function virtio_notify_irqfd that also sets ISR. The irqfd emulation
code now need not set ISR anymore, so virtio_irq is removed.
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-11-18 16:07:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
trace_virtio_notify_irqfd(vdev, vq);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
|
|
|
|
* windows drivers included in virtio-win 1.8.0 (circa 2015) are
|
|
|
|
* incorrectly polling this bit during crashdump and hibernation
|
|
|
|
* in MSI mode, causing a hang if this bit is never updated.
|
|
|
|
* Recent releases of Windows do not really shut down, but rather
|
|
|
|
* log out and hibernate to make the next startup faster. Hence,
|
|
|
|
* this manifested as a more serious hang during shutdown with
|
|
|
|
*
|
|
|
|
* Next driver release from 2016 fixed this problem, so working around it
|
|
|
|
* is not a must, but it's easy to do so let's do it here.
|
|
|
|
*
|
|
|
|
* Note: it's safe to update ISR from any thread as it was switched
|
|
|
|
* to an atomic operation.
|
|
|
|
*/
|
|
|
|
virtio_set_isr(vq->vdev, 0x1);
|
|
|
|
event_notifier_set(&vq->guest_notifier);
|
|
|
|
}
|
|
|
|
|
2017-02-17 03:52:16 +01:00
|
|
|
static void virtio_irq(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
virtio_set_isr(vq->vdev, 0x1);
|
|
|
|
virtio_notify_vector(vq->vdev, vq->vector);
|
|
|
|
}
|
|
|
|
|
2011-06-12 15:21:57 +02:00
|
|
|
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
2019-10-28 17:11:09 +01:00
|
|
|
WITH_RCU_READ_LOCK_GUARD() {
|
|
|
|
if (!virtio_should_notify(vdev, vq)) {
|
|
|
|
return;
|
|
|
|
}
|
2011-06-12 15:21:57 +02:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2010-05-24 14:19:21 +02:00
|
|
|
trace_virtio_notify(vdev, vq);
|
2017-02-17 03:52:16 +01:00
|
|
|
virtio_irq(vq);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void virtio_notify_config(VirtIODevice *vdev)
|
|
|
|
{
|
2009-01-29 18:02:13 +01:00
|
|
|
if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
|
|
|
|
return;
|
|
|
|
|
2016-11-18 16:07:01 +01:00
|
|
|
virtio_set_isr(vdev, 0x3);
|
2015-06-04 12:34:23 +02:00
|
|
|
vdev->generation++;
|
2009-06-21 18:50:13 +02:00
|
|
|
virtio_notify_vector(vdev, vdev->config_vector);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2014-06-24 19:38:54 +02:00
|
|
|
static bool virtio_device_endian_needed(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
|
|
|
|
|
|
|
assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
|
2015-08-17 11:48:29 +02:00
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
|
2015-06-04 12:34:11 +02:00
|
|
|
return vdev->device_endian != virtio_default_endian();
|
|
|
|
}
|
|
|
|
/* Devices conforming to VIRTIO 1.0 or later are always LE. */
|
|
|
|
return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
|
2014-06-24 19:38:54 +02:00
|
|
|
}
|
|
|
|
|
2015-06-01 10:45:40 +02:00
|
|
|
static bool virtio_64bit_features_needed(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
|
|
|
|
|
|
|
return (vdev->host_features >> 32) != 0;
|
|
|
|
}
|
|
|
|
|
2015-08-05 11:50:07 +02:00
|
|
|
static bool virtio_virtqueue_needed(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
|
|
|
|
|
|
|
return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static bool virtio_packed_virtqueue_needed(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
|
|
|
|
|
|
|
return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
|
|
|
|
}
|
|
|
|
|
2015-09-11 15:16:41 +02:00
|
|
|
static bool virtio_ringsize_needed(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
|
|
|
if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-11-06 09:02:44 +01:00
|
|
|
static bool virtio_extra_state_needed(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
|
|
|
|
return k->has_extra_state &&
|
|
|
|
k->has_extra_state(qbus->parent);
|
|
|
|
}
|
|
|
|
|
2016-09-21 17:52:20 +02:00
|
|
|
static bool virtio_broken_needed(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
|
|
|
|
|
|
|
return vdev->broken;
|
|
|
|
}
|
|
|
|
|
2019-03-20 12:26:40 +01:00
|
|
|
static bool virtio_started_needed(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
|
|
|
|
|
|
|
return vdev->started;
|
|
|
|
}
|
|
|
|
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
static bool virtio_disabled_needed(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
|
|
|
|
|
|
|
return vdev->disabled;
|
|
|
|
}
|
|
|
|
|
2016-01-06 13:23:39 +01:00
|
|
|
static const VMStateDescription vmstate_virtqueue = {
|
2015-08-05 11:50:07 +02:00
|
|
|
.name = "virtqueue_state",
|
2016-01-06 13:23:39 +01:00
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT64(vring.avail, struct VirtQueue),
|
|
|
|
VMSTATE_UINT64(vring.used, struct VirtQueue),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
2015-08-05 11:50:07 +02:00
|
|
|
};
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static const VMStateDescription vmstate_packed_virtqueue = {
|
|
|
|
.name = "packed_virtqueue_state",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
|
|
|
|
VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
|
|
|
|
VMSTATE_UINT16(used_idx, struct VirtQueue),
|
|
|
|
VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
|
|
|
|
VMSTATE_UINT32(inuse, struct VirtQueue),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2015-08-05 11:50:07 +02:00
|
|
|
static const VMStateDescription vmstate_virtio_virtqueues = {
|
|
|
|
.name = "virtio/virtqueues",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = &virtio_virtqueue_needed,
|
|
|
|
.fields = (VMStateField[]) {
|
2016-01-29 14:18:56 +01:00
|
|
|
VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
|
|
|
|
VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
|
2015-08-05 11:50:07 +02:00
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static const VMStateDescription vmstate_virtio_packed_virtqueues = {
|
|
|
|
.name = "virtio/packed_virtqueues",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = &virtio_packed_virtqueue_needed,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
|
|
|
|
VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2016-01-06 13:23:39 +01:00
|
|
|
static const VMStateDescription vmstate_ringsize = {
|
2015-09-11 15:16:41 +02:00
|
|
|
.name = "ringsize_state",
|
2016-01-06 13:23:39 +01:00
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT32(vring.num_default, struct VirtQueue),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
2015-09-11 15:16:41 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const VMStateDescription vmstate_virtio_ringsize = {
|
|
|
|
.name = "virtio/ringsize",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = &virtio_ringsize_needed,
|
|
|
|
.fields = (VMStateField[]) {
|
2016-01-29 14:18:56 +01:00
|
|
|
VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
|
|
|
|
VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
|
2015-09-11 15:16:41 +02:00
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-01-19 20:00:50 +01:00
|
|
|
static int get_extra_state(QEMUFile *f, void *pv, size_t size,
|
2018-11-14 14:29:30 +01:00
|
|
|
const VMStateField *field)
|
2015-11-06 09:02:44 +01:00
|
|
|
{
|
|
|
|
VirtIODevice *vdev = pv;
|
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
|
|
|
|
if (!k->load_extra_state) {
|
|
|
|
return -1;
|
|
|
|
} else {
|
|
|
|
return k->load_extra_state(qbus->parent, f);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-19 20:00:50 +01:00
|
|
|
static int put_extra_state(QEMUFile *f, void *pv, size_t size,
|
2020-12-11 18:11:48 +01:00
|
|
|
const VMStateField *field, JSONWriter *vmdesc)
|
2015-11-06 09:02:44 +01:00
|
|
|
{
|
|
|
|
VirtIODevice *vdev = pv;
|
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
|
|
|
|
k->save_extra_state(qbus->parent, f);
|
2017-01-19 20:00:50 +01:00
|
|
|
return 0;
|
2015-11-06 09:02:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateInfo vmstate_info_extra_state = {
|
|
|
|
.name = "virtqueue_extra_state",
|
|
|
|
.get = get_extra_state,
|
|
|
|
.put = put_extra_state,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const VMStateDescription vmstate_virtio_extra_state = {
|
|
|
|
.name = "virtio/extra_state",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = &virtio_extra_state_needed,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
{
|
|
|
|
.name = "extra_state",
|
|
|
|
.version_id = 0,
|
|
|
|
.field_exists = NULL,
|
|
|
|
.size = 0,
|
|
|
|
.info = &vmstate_info_extra_state,
|
|
|
|
.flags = VMS_SINGLE,
|
|
|
|
.offset = 0,
|
|
|
|
},
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2014-06-24 19:38:54 +02:00
|
|
|
static const VMStateDescription vmstate_virtio_device_endian = {
|
|
|
|
.name = "virtio/device_endian",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
2014-09-23 14:09:54 +02:00
|
|
|
.needed = &virtio_device_endian_needed,
|
2014-06-24 19:38:54 +02:00
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT8(device_endian, VirtIODevice),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2015-06-01 10:45:40 +02:00
|
|
|
static const VMStateDescription vmstate_virtio_64bit_features = {
|
|
|
|
.name = "virtio/64bit_features",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
2014-09-23 14:09:54 +02:00
|
|
|
.needed = &virtio_64bit_features_needed,
|
2015-06-01 10:45:40 +02:00
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT64(guest_features, VirtIODevice),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2016-09-21 17:52:20 +02:00
|
|
|
static const VMStateDescription vmstate_virtio_broken = {
|
|
|
|
.name = "virtio/broken",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = &virtio_broken_needed,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_BOOL(broken, VirtIODevice),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-03-20 12:26:40 +01:00
|
|
|
static const VMStateDescription vmstate_virtio_started = {
|
|
|
|
.name = "virtio/started",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = &virtio_started_needed,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_BOOL(started, VirtIODevice),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
static const VMStateDescription vmstate_virtio_disabled = {
|
|
|
|
.name = "virtio/disabled",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = &virtio_disabled_needed,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_BOOL(disabled, VirtIODevice),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2014-06-24 19:22:30 +02:00
|
|
|
static const VMStateDescription vmstate_virtio = {
|
|
|
|
.name = "virtio",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_END_OF_LIST()
|
2014-06-24 19:38:54 +02:00
|
|
|
},
|
2014-09-23 14:09:54 +02:00
|
|
|
.subsections = (const VMStateDescription*[]) {
|
|
|
|
&vmstate_virtio_device_endian,
|
|
|
|
&vmstate_virtio_64bit_features,
|
2015-08-05 11:50:07 +02:00
|
|
|
&vmstate_virtio_virtqueues,
|
2015-09-11 15:16:41 +02:00
|
|
|
&vmstate_virtio_ringsize,
|
2016-09-21 17:52:20 +02:00
|
|
|
&vmstate_virtio_broken,
|
2015-11-06 09:02:44 +01:00
|
|
|
&vmstate_virtio_extra_state,
|
2019-03-20 12:26:40 +01:00
|
|
|
&vmstate_virtio_started,
|
2019-10-25 10:35:24 +02:00
|
|
|
&vmstate_virtio_packed_virtqueues,
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
&vmstate_virtio_disabled,
|
2014-09-23 14:09:54 +02:00
|
|
|
NULL
|
2014-06-24 19:22:30 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-09-25 13:29:17 +02:00
|
|
|
int virtio_save(VirtIODevice *vdev, QEMUFile *f)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2013-04-24 10:21:21 +02:00
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
2014-06-24 19:15:31 +02:00
|
|
|
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
|
2015-06-01 10:45:40 +02:00
|
|
|
uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
|
2008-12-04 20:38:57 +01:00
|
|
|
int i;
|
|
|
|
|
2013-04-24 10:21:21 +02:00
|
|
|
if (k->save_config) {
|
|
|
|
k->save_config(qbus->parent, f);
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
qemu_put_8s(f, &vdev->status);
|
|
|
|
qemu_put_8s(f, &vdev->isr);
|
|
|
|
qemu_put_be16s(f, &vdev->queue_sel);
|
2015-06-01 10:45:40 +02:00
|
|
|
qemu_put_be32s(f, &guest_features_lo);
|
2008-12-04 20:38:57 +01:00
|
|
|
qemu_put_be32(f, vdev->config_len);
|
|
|
|
qemu_put_buffer(f, vdev->config, vdev->config_len);
|
|
|
|
|
2015-05-29 08:15:31 +02:00
|
|
|
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
2008-12-04 20:38:57 +01:00
|
|
|
if (vdev->vq[i].vring.num == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
qemu_put_be32(f, i);
|
|
|
|
|
2015-05-29 08:15:31 +02:00
|
|
|
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
2008-12-04 20:38:57 +01:00
|
|
|
if (vdev->vq[i].vring.num == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
qemu_put_be32(f, vdev->vq[i].vring.num);
|
2013-07-16 14:25:08 +02:00
|
|
|
if (k->has_variable_vring_alignment) {
|
|
|
|
qemu_put_be32(f, vdev->vq[i].vring.align);
|
|
|
|
}
|
2017-02-22 17:37:34 +01:00
|
|
|
/*
|
|
|
|
* Save desc now, the rest of the ring addresses are saved in
|
|
|
|
* subsections for VIRTIO-1 devices.
|
|
|
|
*/
|
2015-06-04 12:34:12 +02:00
|
|
|
qemu_put_be64(f, vdev->vq[i].vring.desc);
|
2008-12-04 20:38:57 +01:00
|
|
|
qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
|
2013-04-24 10:21:21 +02:00
|
|
|
if (k->save_queue) {
|
|
|
|
k->save_queue(qbus->parent, i, f);
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
2014-06-24 19:15:31 +02:00
|
|
|
|
|
|
|
if (vdc->save != NULL) {
|
|
|
|
vdc->save(vdev, f);
|
|
|
|
}
|
2014-06-24 19:22:30 +02:00
|
|
|
|
2016-10-27 19:36:36 +02:00
|
|
|
if (vdc->vmsd) {
|
2017-09-25 13:29:17 +02:00
|
|
|
int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
2016-10-27 19:36:36 +02:00
|
|
|
}
|
|
|
|
|
2014-06-24 19:22:30 +02:00
|
|
|
/* Subsections */
|
2017-09-25 13:29:17 +02:00
|
|
|
return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2016-10-06 14:55:39 +02:00
|
|
|
/* A wrapper for use as a VMState .put function */
|
2017-01-19 20:00:50 +01:00
|
|
|
static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
|
2020-12-11 18:11:48 +01:00
|
|
|
const VMStateField *field, JSONWriter *vmdesc)
|
2016-10-06 14:55:39 +02:00
|
|
|
{
|
2017-09-25 13:29:17 +02:00
|
|
|
return virtio_save(VIRTIO_DEVICE(opaque), f);
|
2016-10-06 14:55:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* A wrapper for use as a VMState .get function */
|
2017-01-19 20:00:50 +01:00
|
|
|
static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
|
2018-11-14 14:29:30 +01:00
|
|
|
const VMStateField *field)
|
2016-10-06 14:55:39 +02:00
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
|
|
|
|
DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
|
|
|
|
|
|
|
|
return virtio_load(vdev, f, dc->vmsd->version_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
const VMStateInfo virtio_vmstate_info = {
|
|
|
|
.name = "virtio",
|
|
|
|
.get = virtio_device_get,
|
|
|
|
.put = virtio_device_put,
|
|
|
|
};
|
|
|
|
|
2015-06-04 12:34:14 +02:00
|
|
|
static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
|
2011-11-24 13:28:52 +01:00
|
|
|
{
|
2013-04-24 10:21:20 +02:00
|
|
|
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
|
2015-05-26 16:34:47 +02:00
|
|
|
bool bad = (val & ~(vdev->host_features)) != 0;
|
2011-11-24 13:28:52 +01:00
|
|
|
|
2015-05-26 16:34:47 +02:00
|
|
|
val &= vdev->host_features;
|
2013-04-24 10:21:20 +02:00
|
|
|
if (k->set_features) {
|
|
|
|
k->set_features(vdev, val);
|
2011-11-24 13:28:52 +01:00
|
|
|
}
|
|
|
|
vdev->guest_features = val;
|
|
|
|
return bad ? -1 : 0;
|
|
|
|
}
|
|
|
|
|
2015-06-04 12:34:14 +02:00
|
|
|
int virtio_set_features(VirtIODevice *vdev, uint64_t val)
|
|
|
|
{
|
2018-08-23 14:21:23 +02:00
|
|
|
int ret;
|
|
|
|
/*
|
2015-06-04 12:34:14 +02:00
|
|
|
* The driver must not attempt to set features after feature negotiation
|
|
|
|
* has finished.
|
|
|
|
*/
|
|
|
|
if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2022-08-02 11:49:56 +02:00
|
|
|
|
|
|
|
if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
|
|
|
|
qemu_log_mask(LOG_GUEST_ERROR,
|
|
|
|
"%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
|
|
|
|
__func__, vdev->name);
|
|
|
|
}
|
|
|
|
|
2018-08-23 14:21:23 +02:00
|
|
|
ret = virtio_set_features_nocheck(vdev, val);
|
2020-09-19 10:27:06 +02:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
|
|
|
|
/* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
|
|
|
if (vdev->vq[i].vring.num != 0) {
|
|
|
|
virtio_init_region_cache(vdev, i);
|
2018-08-23 14:21:23 +02:00
|
|
|
}
|
|
|
|
}
|
2020-09-19 10:27:06 +02:00
|
|
|
}
|
|
|
|
if (!ret) {
|
2019-06-26 04:31:28 +02:00
|
|
|
if (!virtio_device_started(vdev, vdev->status) &&
|
|
|
|
!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
|
|
|
|
vdev->start_on_kick = true;
|
|
|
|
}
|
2018-08-23 14:21:23 +02:00
|
|
|
}
|
|
|
|
return ret;
|
2015-06-04 12:34:14 +02:00
|
|
|
}
|
|
|
|
|
virtio: introduce VirtIOConfigSizeParams & virtio_get_config_size
This is the first step towards moving all device config size calculation
logic into the virtio core code. In particular, this adds a struct that
contains all the necessary information for common virtio code to be able
to calculate the final config size for a device. This is expected to be
used with the new virtio_get_config_size helper, which calculates the
final length based on the provided host features.
This builds on top of already existing code like VirtIOFeature and
virtio_feature_get_config_size(), but adds additional fields, as well as
sanity checking so that device-specifc code doesn't have to duplicate it.
An example usage would be:
static const VirtIOFeature dev_features[] = {
{.flags = 1ULL << FEATURE_1_BIT,
.end = endof(struct virtio_dev_config, feature_1)},
{.flags = 1ULL << FEATURE_2_BIT,
.end = endof(struct virtio_dev_config, feature_2)},
{}
};
static const VirtIOConfigSizeParams dev_cfg_size_params = {
.min_size = DEV_BASE_CONFIG_SIZE,
.max_size = sizeof(struct virtio_dev_config),
.feature_sizes = dev_features
};
// code inside my_dev_device_realize()
size_t config_size = virtio_get_config_size(&dev_cfg_size_params,
host_features);
virtio_init(vdev, VIRTIO_ID_MYDEV, config_size);
Currently every device is expected to write its own boilerplate from the
example above in device_realize(), however, the next step of this
transition is moving VirtIOConfigSizeParams into VirtioDeviceClass,
so that it can be done automatically by the virtio initialization code.
All of the users of virtio_feature_get_config_size have been converted
to use virtio_get_config_size so it's no longer needed and is removed
with this commit.
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
Message-Id: <20220906073111.353245-2-d-tatianin@yandex-team.ru>
Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-06 09:31:07 +02:00
|
|
|
size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
|
|
|
|
uint64_t host_features)
|
2019-02-21 11:33:08 +01:00
|
|
|
{
|
virtio: introduce VirtIOConfigSizeParams & virtio_get_config_size
This is the first step towards moving all device config size calculation
logic into the virtio core code. In particular, this adds a struct that
contains all the necessary information for common virtio code to be able
to calculate the final config size for a device. This is expected to be
used with the new virtio_get_config_size helper, which calculates the
final length based on the provided host features.
This builds on top of already existing code like VirtIOFeature and
virtio_feature_get_config_size(), but adds additional fields, as well as
sanity checking so that device-specifc code doesn't have to duplicate it.
An example usage would be:
static const VirtIOFeature dev_features[] = {
{.flags = 1ULL << FEATURE_1_BIT,
.end = endof(struct virtio_dev_config, feature_1)},
{.flags = 1ULL << FEATURE_2_BIT,
.end = endof(struct virtio_dev_config, feature_2)},
{}
};
static const VirtIOConfigSizeParams dev_cfg_size_params = {
.min_size = DEV_BASE_CONFIG_SIZE,
.max_size = sizeof(struct virtio_dev_config),
.feature_sizes = dev_features
};
// code inside my_dev_device_realize()
size_t config_size = virtio_get_config_size(&dev_cfg_size_params,
host_features);
virtio_init(vdev, VIRTIO_ID_MYDEV, config_size);
Currently every device is expected to write its own boilerplate from the
example above in device_realize(), however, the next step of this
transition is moving VirtIOConfigSizeParams into VirtioDeviceClass,
so that it can be done automatically by the virtio initialization code.
All of the users of virtio_feature_get_config_size have been converted
to use virtio_get_config_size so it's no longer needed and is removed
with this commit.
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
Message-Id: <20220906073111.353245-2-d-tatianin@yandex-team.ru>
Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-06 09:31:07 +02:00
|
|
|
size_t config_size = params->min_size;
|
|
|
|
const VirtIOFeature *feature_sizes = params->feature_sizes;
|
|
|
|
size_t i;
|
2019-02-21 11:33:08 +01:00
|
|
|
|
|
|
|
for (i = 0; feature_sizes[i].flags != 0; i++) {
|
|
|
|
if (host_features & feature_sizes[i].flags) {
|
|
|
|
config_size = MAX(feature_sizes[i].end, config_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
virtio: introduce VirtIOConfigSizeParams & virtio_get_config_size
This is the first step towards moving all device config size calculation
logic into the virtio core code. In particular, this adds a struct that
contains all the necessary information for common virtio code to be able
to calculate the final config size for a device. This is expected to be
used with the new virtio_get_config_size helper, which calculates the
final length based on the provided host features.
This builds on top of already existing code like VirtIOFeature and
virtio_feature_get_config_size(), but adds additional fields, as well as
sanity checking so that device-specifc code doesn't have to duplicate it.
An example usage would be:
static const VirtIOFeature dev_features[] = {
{.flags = 1ULL << FEATURE_1_BIT,
.end = endof(struct virtio_dev_config, feature_1)},
{.flags = 1ULL << FEATURE_2_BIT,
.end = endof(struct virtio_dev_config, feature_2)},
{}
};
static const VirtIOConfigSizeParams dev_cfg_size_params = {
.min_size = DEV_BASE_CONFIG_SIZE,
.max_size = sizeof(struct virtio_dev_config),
.feature_sizes = dev_features
};
// code inside my_dev_device_realize()
size_t config_size = virtio_get_config_size(&dev_cfg_size_params,
host_features);
virtio_init(vdev, VIRTIO_ID_MYDEV, config_size);
Currently every device is expected to write its own boilerplate from the
example above in device_realize(), however, the next step of this
transition is moving VirtIOConfigSizeParams into VirtioDeviceClass,
so that it can be done automatically by the virtio initialization code.
All of the users of virtio_feature_get_config_size have been converted
to use virtio_get_config_size so it's no longer needed and is removed
with this commit.
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
Message-Id: <20220906073111.353245-2-d-tatianin@yandex-team.ru>
Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-06 09:31:07 +02:00
|
|
|
assert(config_size <= params->max_size);
|
2019-02-21 11:33:08 +01:00
|
|
|
return config_size;
|
|
|
|
}
|
|
|
|
|
2014-06-24 19:15:31 +02:00
|
|
|
int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2014-04-03 18:51:14 +02:00
|
|
|
int i, ret;
|
2014-04-28 15:08:23 +02:00
|
|
|
int32_t config_len;
|
2014-04-03 18:51:14 +02:00
|
|
|
uint32_t num;
|
2009-12-08 19:07:48 +01:00
|
|
|
uint32_t features;
|
2013-04-24 10:21:21 +02:00
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
2014-06-24 19:15:31 +02:00
|
|
|
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2014-06-24 19:38:54 +02:00
|
|
|
/*
|
|
|
|
* We poison the endianness to ensure it does not get used before
|
|
|
|
* subsections have been loaded.
|
|
|
|
*/
|
|
|
|
vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
|
|
|
|
|
2013-04-24 10:21:21 +02:00
|
|
|
if (k->load_config) {
|
|
|
|
ret = k->load_config(qbus->parent, f);
|
2009-06-21 18:50:40 +02:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
qemu_get_8s(f, &vdev->status);
|
|
|
|
qemu_get_8s(f, &vdev->isr);
|
|
|
|
qemu_get_be16s(f, &vdev->queue_sel);
|
2015-05-29 08:15:31 +02:00
|
|
|
if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
|
2014-04-03 18:51:46 +02:00
|
|
|
return -1;
|
|
|
|
}
|
2009-12-08 19:07:48 +01:00
|
|
|
qemu_get_be32s(f, &features);
|
2011-11-24 13:28:52 +01:00
|
|
|
|
2016-07-04 13:39:10 +02:00
|
|
|
/*
|
|
|
|
* Temporarily set guest_features low bits - needed by
|
|
|
|
* virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
|
|
|
|
* VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
|
|
|
|
*
|
|
|
|
* Note: devices should always test host features in future - don't create
|
|
|
|
* new dependencies like this.
|
|
|
|
*/
|
|
|
|
vdev->guest_features = features;
|
|
|
|
|
2014-04-28 15:08:23 +02:00
|
|
|
config_len = qemu_get_be32(f);
|
2014-06-27 21:02:48 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* There are cases where the incoming config can be bigger or smaller
|
|
|
|
* than what we have; so load what we have space for, and skip
|
|
|
|
* any excess that's in the stream.
|
|
|
|
*/
|
|
|
|
qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
|
|
|
|
|
|
|
|
while (config_len > vdev->config_len) {
|
|
|
|
qemu_get_byte(f);
|
|
|
|
config_len--;
|
2014-04-28 15:08:23 +02:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
|
|
|
num = qemu_get_be32(f);
|
|
|
|
|
2015-05-29 08:15:31 +02:00
|
|
|
if (num > VIRTIO_QUEUE_MAX) {
|
2016-01-08 11:00:00 +01:00
|
|
|
error_report("Invalid number of virtqueues: 0x%x", num);
|
2014-04-03 18:51:14 +02:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2008-12-04 20:38:57 +01:00
|
|
|
for (i = 0; i < num; i++) {
|
|
|
|
vdev->vq[i].vring.num = qemu_get_be32(f);
|
2013-07-16 14:25:08 +02:00
|
|
|
if (k->has_variable_vring_alignment) {
|
|
|
|
vdev->vq[i].vring.align = qemu_get_be32(f);
|
|
|
|
}
|
2015-06-04 12:34:12 +02:00
|
|
|
vdev->vq[i].vring.desc = qemu_get_be64(f);
|
2008-12-04 20:38:57 +01:00
|
|
|
qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
|
2011-06-12 15:21:57 +02:00
|
|
|
vdev->vq[i].signalled_used_valid = false;
|
Revert "virtio: turn vq->notification into a nested counter"
This reverts commit aff8fd18f1786fc5af259a9bc0077727222f51ca.
Both virtio-net and virtio-crypto do not balance
virtio_queue_set_notification() enable and disable calls. This makes
the notifications_disabled counter unreliable and Doug Goldstein
reported the following assertion failure:
#3 0x00007ffff44d1c62 in __GI___assert_fail (
assertion=assertion@entry=0x555555ae8e8a "vq->notification_disabled > 0",
file=file@entry=0x555555ae89c0 "/home/doug/work/qemu/hw/virtio/virtio.c",
line=line@entry=215,
function=function@entry=0x555555ae9630 <__PRETTY_FUNCTION__.43707>
"virtio_queue_set_notification") at assert.c:101
#4 0x00005555557f25d6 in virtio_queue_set_notification (vq=0x55555666aa90,
enable=enable@entry=1) at /home/doug/work/qemu/hw/virtio/virtio.c:215
#5 0x00005555557dc311 in virtio_net_has_buffers (q=<optimized out>,
q=<optimized out>, bufsize=102)
at /home/doug/work/qemu/hw/net/virtio-net.c:1008
#6 virtio_net_receive (nc=<optimized out>, buf=0x555557386b88 "", size=102)
at /home/doug/work/qemu/hw/net/virtio-net.c:1148
#7 0x00005555559cad33 in nc_sendv_compat (flags=<optimized out>, iovcnt=1,
iov=0x7fffead746d0, nc=0x55555788b340) at net/net.c:705
#8 qemu_deliver_packet_iov (sender=<optimized out>, flags=<optimized out>,
iov=0x7fffead746d0, iovcnt=1, opaque=0x55555788b340) at net/net.c:732
#9 0x00005555559cd929 in qemu_net_queue_deliver (size=<optimized out>,
data=<optimized out>, flags=<optimized out>, sender=<optimized out>,
queue=0x55555788b550) at net/queue.c:164
#10 qemu_net_queue_flush (queue=0x55555788b550) at net/queue.c:261
This patch is safe to revert since it's just an optimization for
virtqueue polling. The next patch will improve the situation again
without resorting to nesting.
Reported-by: Doug Goldstein <cardoe@cardoe.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Richard Henderson <rth@twiddle.net>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-01-12 12:46:10 +01:00
|
|
|
vdev->vq[i].notification = true;
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2017-02-22 17:37:34 +01:00
|
|
|
if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
|
2010-11-23 20:55:39 +01:00
|
|
|
error_report("VQ %d address 0x0 "
|
2011-06-22 14:03:54 +02:00
|
|
|
"inconsistent with Host index 0x%x",
|
2010-11-23 20:55:39 +01:00
|
|
|
i, vdev->vq[i].last_avail_idx);
|
2017-02-22 17:37:34 +01:00
|
|
|
return -1;
|
2016-09-21 17:52:18 +02:00
|
|
|
}
|
2013-04-24 10:21:21 +02:00
|
|
|
if (k->load_queue) {
|
|
|
|
ret = k->load_queue(qbus->parent, i, f);
|
2009-06-21 18:50:40 +02:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2009-06-21 18:50:13 +02:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2009-06-21 18:50:13 +02:00
|
|
|
virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
|
2014-06-24 19:15:31 +02:00
|
|
|
|
|
|
|
if (vdc->load != NULL) {
|
2014-06-24 19:22:30 +02:00
|
|
|
ret = vdc->load(vdev, f, version_id);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
2014-06-24 19:15:31 +02:00
|
|
|
}
|
|
|
|
|
2016-10-27 19:36:36 +02:00
|
|
|
if (vdc->vmsd) {
|
|
|
|
ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-24 19:38:54 +02:00
|
|
|
/* Subsections */
|
|
|
|
ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
|
|
|
|
vdev->device_endian = virtio_default_endian();
|
|
|
|
}
|
|
|
|
|
2015-06-01 10:45:40 +02:00
|
|
|
if (virtio_64bit_features_needed(vdev)) {
|
|
|
|
/*
|
|
|
|
* Subsection load filled vdev->guest_features. Run them
|
|
|
|
* through virtio_set_features to sanity-check them against
|
|
|
|
* host_features.
|
|
|
|
*/
|
|
|
|
uint64_t features64 = vdev->guest_features;
|
2015-06-04 12:34:14 +02:00
|
|
|
if (virtio_set_features_nocheck(vdev, features64) < 0) {
|
2015-06-01 10:45:40 +02:00
|
|
|
error_report("Features 0x%" PRIx64 " unsupported. "
|
|
|
|
"Allowed features: 0x%" PRIx64,
|
|
|
|
features64, vdev->host_features);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else {
|
2015-06-04 12:34:14 +02:00
|
|
|
if (virtio_set_features_nocheck(vdev, features) < 0) {
|
2015-06-01 10:45:40 +02:00
|
|
|
error_report("Features 0x%x unsupported. "
|
|
|
|
"Allowed features: 0x%" PRIx64,
|
|
|
|
features, vdev->host_features);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-26 04:31:28 +02:00
|
|
|
if (!virtio_device_started(vdev, vdev->status) &&
|
|
|
|
!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
|
|
|
|
vdev->start_on_kick = true;
|
|
|
|
}
|
|
|
|
|
2019-10-28 17:11:09 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2014-06-24 19:38:54 +02:00
|
|
|
for (i = 0; i < num; i++) {
|
2015-06-04 12:34:12 +02:00
|
|
|
if (vdev->vq[i].vring.desc) {
|
2014-06-24 19:38:54 +02:00
|
|
|
uint16_t nheads;
|
2017-02-22 17:37:34 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* VIRTIO-1 devices migrate desc, used, and avail ring addresses so
|
|
|
|
* only the region cache needs to be set up. Legacy devices need
|
|
|
|
* to calculate used and avail ring addresses based on the desc
|
|
|
|
* address.
|
|
|
|
*/
|
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
|
|
|
|
virtio_init_region_cache(vdev, i);
|
|
|
|
} else {
|
|
|
|
virtio_queue_update_rings(vdev, i);
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
|
|
|
|
vdev->vq[i].shadow_avail_wrap_counter =
|
|
|
|
vdev->vq[i].last_avail_wrap_counter;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2014-06-24 19:38:54 +02:00
|
|
|
nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
|
|
|
|
/* Check it isn't doing strange things with descriptor numbers. */
|
|
|
|
if (nheads > vdev->vq[i].vring.num) {
|
2020-11-20 19:51:07 +01:00
|
|
|
virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
|
|
|
|
"inconsistent with Host index 0x%x: delta 0x%x",
|
|
|
|
i, vdev->vq[i].vring.num,
|
|
|
|
vring_avail_idx(&vdev->vq[i]),
|
|
|
|
vdev->vq[i].last_avail_idx, nheads);
|
|
|
|
vdev->vq[i].used_idx = 0;
|
|
|
|
vdev->vq[i].shadow_avail_idx = 0;
|
|
|
|
vdev->vq[i].inuse = 0;
|
|
|
|
continue;
|
2014-06-24 19:38:54 +02:00
|
|
|
}
|
2016-01-31 11:29:04 +01:00
|
|
|
vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
|
2016-01-31 11:29:05 +01:00
|
|
|
vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
|
2016-08-15 14:54:15 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Some devices migrate VirtQueueElements that have been popped
|
|
|
|
* from the avail ring but not yet returned to the used ring.
|
2016-12-19 16:44:44 +01:00
|
|
|
* Since max ring size < UINT16_MAX it's safe to use modulo
|
|
|
|
* UINT16_MAX + 1 subtraction.
|
2016-08-15 14:54:15 +02:00
|
|
|
*/
|
2016-12-19 16:44:44 +01:00
|
|
|
vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
|
|
|
|
vdev->vq[i].used_idx);
|
2016-08-15 14:54:15 +02:00
|
|
|
if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
|
|
|
|
error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
|
|
|
|
"used_idx 0x%x",
|
|
|
|
i, vdev->vq[i].vring.num,
|
|
|
|
vdev->vq[i].last_avail_idx,
|
|
|
|
vdev->vq[i].used_idx);
|
|
|
|
return -1;
|
|
|
|
}
|
2014-06-24 19:38:54 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-11 15:58:03 +02:00
|
|
|
if (vdc->post_load) {
|
|
|
|
ret = vdc->post_load(vdev);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-24 19:38:54 +02:00
|
|
|
return 0;
|
2008-12-04 20:38:57 +01:00
|
|
|
}
|
|
|
|
|
2013-04-24 10:21:22 +02:00
|
|
|
void virtio_cleanup(VirtIODevice *vdev)
|
2009-04-17 19:11:08 +02:00
|
|
|
{
|
2011-01-10 13:28:40 +01:00
|
|
|
qemu_del_vm_change_state_handler(vdev->vmstate);
|
2013-01-15 00:08:02 +01:00
|
|
|
}
|
|
|
|
|
2021-01-11 16:20:20 +01:00
|
|
|
static void virtio_vmstate_change(void *opaque, bool running, RunState state)
|
2011-01-10 13:28:40 +01:00
|
|
|
{
|
|
|
|
VirtIODevice *vdev = opaque;
|
2013-04-24 10:21:21 +02:00
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
2019-06-26 04:31:26 +02:00
|
|
|
bool backend_run = running && virtio_device_started(vdev, vdev->status);
|
2014-09-11 17:42:02 +02:00
|
|
|
vdev->vm_running = running;
|
2011-01-10 13:28:40 +01:00
|
|
|
|
|
|
|
if (backend_run) {
|
|
|
|
virtio_set_status(vdev, vdev->status);
|
|
|
|
}
|
|
|
|
|
2013-04-24 10:21:21 +02:00
|
|
|
if (k->vmstate_change) {
|
|
|
|
k->vmstate_change(qbus->parent, backend_run);
|
2011-01-10 13:28:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!backend_run) {
|
|
|
|
virtio_set_status(vdev, vdev->status);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-30 08:10:38 +02:00
|
|
|
void virtio_instance_init_common(Object *proxy_obj, void *data,
|
|
|
|
size_t vdev_size, const char *vdev_name)
|
|
|
|
{
|
|
|
|
DeviceState *vdev = data;
|
|
|
|
|
qom: Less verbose object_initialize_child()
All users of object_initialize_child() pass the obvious child size
argument. Almost all pass &error_abort and no properties. Tiresome.
Rename object_initialize_child() to
object_initialize_child_with_props() to free the name. New
convenience wrapper object_initialize_child() automates the size
argument, and passes &error_abort and no properties.
Rename object_initialize_childv() to
object_initialize_child_with_propsv() for consistency.
Convert callers with this Coccinelle script:
@@
expression parent, propname, type;
expression child, size;
symbol error_abort;
@@
- object_initialize_child(parent, propname, OBJECT(child), size, type, &error_abort, NULL)
+ object_initialize_child(parent, propname, child, size, type, &error_abort, NULL)
@@
expression parent, propname, type;
expression child;
symbol error_abort;
@@
- object_initialize_child(parent, propname, child, sizeof(*child), type, &error_abort, NULL)
+ object_initialize_child(parent, propname, child, type)
@@
expression parent, propname, type;
expression child;
symbol error_abort;
@@
- object_initialize_child(parent, propname, &child, sizeof(child), type, &error_abort, NULL)
+ object_initialize_child(parent, propname, &child, type)
@@
expression parent, propname, type;
expression child, size, err;
expression list props;
@@
- object_initialize_child(parent, propname, child, size, type, err, props)
+ object_initialize_child_with_props(parent, propname, child, size, type, err, props)
Note that Coccinelle chokes on ARMSSE typedef vs. macro in
hw/arm/armsse.c. Worked around by temporarily renaming the macro for
the spatch run.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
[Rebased: machine opentitan is new (commit fe0fe4735e7)]
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200610053247.1583243-37-armbru@redhat.com>
2020-06-10 07:32:25 +02:00
|
|
|
object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
|
|
|
|
vdev_size, vdev_name, &error_abort,
|
|
|
|
NULL);
|
2014-09-30 08:10:38 +02:00
|
|
|
qdev_alias_all_properties(vdev, proxy_obj);
|
|
|
|
}
|
|
|
|
|
2022-04-01 15:23:18 +02:00
|
|
|
void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
|
2008-12-04 20:38:57 +01:00
|
|
|
{
|
2015-04-23 08:21:46 +02:00
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
2009-09-07 20:20:15 +02:00
|
|
|
int i;
|
2015-04-23 08:21:46 +02:00
|
|
|
int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
|
|
|
|
|
|
|
|
if (nvectors) {
|
|
|
|
vdev->vector_queues =
|
|
|
|
g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
|
|
|
|
}
|
|
|
|
|
2019-06-26 04:31:28 +02:00
|
|
|
vdev->start_on_kick = false;
|
2019-03-20 12:26:40 +01:00
|
|
|
vdev->started = false;
|
2022-04-01 15:23:19 +02:00
|
|
|
vdev->vhost_started = false;
|
2009-05-18 15:51:59 +02:00
|
|
|
vdev->device_id = device_id;
|
2008-12-04 20:38:57 +01:00
|
|
|
vdev->status = 0;
|
2020-09-23 12:56:46 +02:00
|
|
|
qatomic_set(&vdev->isr, 0);
|
2008-12-04 20:38:57 +01:00
|
|
|
vdev->queue_sel = 0;
|
2009-06-21 18:50:13 +02:00
|
|
|
vdev->config_vector = VIRTIO_NO_VECTOR;
|
2022-03-15 15:41:56 +01:00
|
|
|
vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
|
2011-07-29 20:36:43 +02:00
|
|
|
vdev->vm_running = runstate_is_running();
|
2016-09-21 17:52:19 +02:00
|
|
|
vdev->broken = false;
|
2015-05-29 08:15:31 +02:00
|
|
|
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
2009-09-07 20:20:15 +02:00
|
|
|
vdev->vq[i].vector = VIRTIO_NO_VECTOR;
|
2010-03-17 12:08:02 +01:00
|
|
|
vdev->vq[i].vdev = vdev;
|
2013-01-30 12:12:37 +01:00
|
|
|
vdev->vq[i].queue_index = i;
|
2019-11-05 15:09:46 +01:00
|
|
|
vdev->vq[i].host_notifier_enabled = false;
|
2010-03-17 12:08:02 +01:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2022-04-01 15:23:18 +02:00
|
|
|
vdev->name = virtio_id_to_name(device_id);
|
2008-12-04 20:38:57 +01:00
|
|
|
vdev->config_len = config_size;
|
2013-01-15 00:08:02 +01:00
|
|
|
if (vdev->config_len) {
|
2011-08-21 05:09:37 +02:00
|
|
|
vdev->config = g_malloc0(config_size);
|
2013-01-15 00:08:02 +01:00
|
|
|
} else {
|
2008-12-04 20:38:57 +01:00
|
|
|
vdev->config = NULL;
|
2013-01-15 00:08:02 +01:00
|
|
|
}
|
2019-06-20 19:37:09 +02:00
|
|
|
vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
|
|
|
|
virtio_vmstate_change, vdev);
|
2014-06-24 19:38:54 +02:00
|
|
|
vdev->device_endian = virtio_default_endian();
|
2016-02-18 15:12:23 +01:00
|
|
|
vdev->use_guest_notifier_mask = true;
|
2013-01-15 00:08:02 +01:00
|
|
|
}
|
2008-12-04 20:38:57 +01:00
|
|
|
|
2020-07-07 12:54:45 +02:00
|
|
|
/*
|
|
|
|
* Only devices that have already been around prior to defining the virtio
|
|
|
|
* standard support legacy mode; this includes devices not specified in the
|
|
|
|
* standard. All newer devices conform to the virtio standard only.
|
|
|
|
*/
|
|
|
|
bool virtio_legacy_allowed(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
switch (vdev->device_id) {
|
|
|
|
case VIRTIO_ID_NET:
|
|
|
|
case VIRTIO_ID_BLOCK:
|
|
|
|
case VIRTIO_ID_CONSOLE:
|
|
|
|
case VIRTIO_ID_RNG:
|
|
|
|
case VIRTIO_ID_BALLOON:
|
|
|
|
case VIRTIO_ID_RPMSG:
|
|
|
|
case VIRTIO_ID_SCSI:
|
|
|
|
case VIRTIO_ID_9P:
|
|
|
|
case VIRTIO_ID_RPROC_SERIAL:
|
|
|
|
case VIRTIO_ID_CAIF:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-21 14:25:03 +02:00
|
|
|
bool virtio_legacy_check_disabled(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
return vdev->disable_legacy_check;
|
|
|
|
}
|
|
|
|
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
|
2010-03-17 12:08:02 +01:00
|
|
|
{
|
|
|
|
return vdev->vq[n].vring.desc;
|
|
|
|
}
|
|
|
|
|
2020-07-27 17:33:19 +02:00
|
|
|
bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
|
|
|
|
{
|
|
|
|
return virtio_queue_get_desc_addr(vdev, n) != 0;
|
|
|
|
}
|
|
|
|
|
2019-03-25 04:40:36 +01:00
|
|
|
bool virtio_queue_enabled(VirtIODevice *vdev, int n)
|
|
|
|
{
|
2020-07-01 16:55:27 +02:00
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
|
|
|
|
if (k->queue_enabled) {
|
|
|
|
return k->queue_enabled(qbus->parent, n);
|
|
|
|
}
|
2020-07-27 17:33:19 +02:00
|
|
|
return virtio_queue_enabled_legacy(vdev, n);
|
2019-03-25 04:40:36 +01:00
|
|
|
}
|
|
|
|
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
|
2010-03-17 12:08:02 +01:00
|
|
|
{
|
|
|
|
return vdev->vq[n].vring.avail;
|
|
|
|
}
|
|
|
|
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
|
2010-03-17 12:08:02 +01:00
|
|
|
{
|
|
|
|
return vdev->vq[n].vring.used;
|
|
|
|
}
|
|
|
|
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
|
2010-03-17 12:08:02 +01:00
|
|
|
{
|
|
|
|
return sizeof(VRingDesc) * vdev->vq[n].vring.num;
|
|
|
|
}
|
|
|
|
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
|
2010-03-17 12:08:02 +01:00
|
|
|
{
|
2019-10-25 10:35:21 +02:00
|
|
|
int s;
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
return sizeof(struct VRingPackedDescEvent);
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:21 +02:00
|
|
|
s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
|
2010-03-17 12:08:02 +01:00
|
|
|
return offsetof(VRingAvail, ring) +
|
2019-10-25 10:35:21 +02:00
|
|
|
sizeof(uint16_t) * vdev->vq[n].vring.num + s;
|
2010-03-17 12:08:02 +01:00
|
|
|
}
|
|
|
|
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
|
2010-03-17 12:08:02 +01:00
|
|
|
{
|
2019-10-25 10:35:21 +02:00
|
|
|
int s;
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
return sizeof(struct VRingPackedDescEvent);
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:21 +02:00
|
|
|
s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
|
2010-03-17 12:08:02 +01:00
|
|
|
return offsetof(VRingUsed, ring) +
|
2019-10-25 10:35:21 +02:00
|
|
|
sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
|
2010-03-17 12:08:02 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
|
|
|
|
int n)
|
|
|
|
{
|
|
|
|
unsigned int avail, used;
|
|
|
|
|
|
|
|
avail = vdev->vq[n].last_avail_idx;
|
|
|
|
avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
|
|
|
|
|
|
|
|
used = vdev->vq[n].used_idx;
|
|
|
|
used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
|
|
|
|
|
|
|
|
return avail | used << 16;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
|
|
|
|
int n)
|
2010-03-17 12:08:02 +01:00
|
|
|
{
|
|
|
|
return vdev->vq[n].last_avail_idx;
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
|
2010-03-17 12:08:02 +01:00
|
|
|
{
|
2019-10-25 10:35:24 +02:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
return virtio_queue_packed_get_last_avail_idx(vdev, n);
|
|
|
|
} else {
|
|
|
|
return virtio_queue_split_get_last_avail_idx(vdev, n);
|
|
|
|
}
|
2010-03-17 12:08:02 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
|
|
|
|
int n, unsigned int idx)
|
|
|
|
{
|
|
|
|
struct VirtQueue *vq = &vdev->vq[n];
|
|
|
|
|
|
|
|
vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
|
|
|
|
vq->last_avail_wrap_counter =
|
|
|
|
vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
|
|
|
|
idx >>= 16;
|
|
|
|
vq->used_idx = idx & 0x7ffff;
|
|
|
|
vq->used_wrap_counter = !!(idx & 0x8000);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
|
|
|
|
int n, unsigned int idx)
|
|
|
|
{
|
|
|
|
vdev->vq[n].last_avail_idx = idx;
|
|
|
|
vdev->vq[n].shadow_avail_idx = idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
|
|
|
|
unsigned int idx)
|
|
|
|
{
|
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
|
|
|
|
} else {
|
|
|
|
virtio_queue_split_set_last_avail_idx(vdev, n, idx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
|
|
|
|
int n)
|
|
|
|
{
|
|
|
|
/* We don't have a reference like avail idx in shared memory */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
|
|
|
|
int n)
|
2017-11-16 19:48:34 +01:00
|
|
|
{
|
2019-10-28 17:11:09 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2017-11-16 19:48:34 +01:00
|
|
|
if (vdev->vq[n].vring.desc) {
|
|
|
|
vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
|
|
|
|
vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
|
|
|
|
{
|
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
virtio_queue_packed_restore_last_avail_idx(vdev, n);
|
|
|
|
} else {
|
|
|
|
virtio_queue_split_restore_last_avail_idx(vdev, n);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
|
|
|
|
{
|
|
|
|
/* used idx was updated through set_last_avail_idx() */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
|
2016-12-13 09:12:05 +01:00
|
|
|
{
|
2019-10-28 17:11:09 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2017-01-27 16:40:19 +01:00
|
|
|
if (vdev->vq[n].vring.desc) {
|
|
|
|
vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
|
|
|
|
}
|
2016-12-13 09:12:05 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 10:35:24 +02:00
|
|
|
void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
|
|
|
|
{
|
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
return virtio_queue_packed_update_used_idx(vdev, n);
|
|
|
|
} else {
|
|
|
|
return virtio_split_packed_update_used_idx(vdev, n);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-12 11:08:09 +02:00
|
|
|
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
|
|
|
|
{
|
|
|
|
vdev->vq[n].signalled_used_valid = false;
|
|
|
|
}
|
|
|
|
|
2010-03-17 12:08:02 +01:00
|
|
|
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
|
|
|
|
{
|
|
|
|
return vdev->vq + n;
|
|
|
|
}
|
|
|
|
|
2013-01-30 12:12:37 +01:00
|
|
|
uint16_t virtio_get_queue_index(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
return vq->queue_index;
|
|
|
|
}
|
|
|
|
|
2012-07-05 17:16:30 +02:00
|
|
|
static void virtio_queue_guest_notifier_read(EventNotifier *n)
|
|
|
|
{
|
|
|
|
VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
|
|
|
|
if (event_notifier_test_and_clear(n)) {
|
2017-02-17 03:52:16 +01:00
|
|
|
virtio_irq(vq);
|
2012-07-05 17:16:30 +02:00
|
|
|
}
|
|
|
|
}
|
2022-12-22 08:04:47 +01:00
|
|
|
static void virtio_config_guest_notifier_read(EventNotifier *n)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
|
2012-07-05 17:16:30 +02:00
|
|
|
|
2022-12-22 08:04:47 +01:00
|
|
|
if (event_notifier_test_and_clear(n)) {
|
|
|
|
virtio_notify_config(vdev);
|
|
|
|
}
|
|
|
|
}
|
2012-07-05 17:16:30 +02:00
|
|
|
void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
|
|
|
|
bool with_irqfd)
|
|
|
|
{
|
|
|
|
if (assign && !with_irqfd) {
|
2017-01-10 11:54:52 +01:00
|
|
|
event_notifier_set_handler(&vq->guest_notifier,
|
2012-07-05 17:16:30 +02:00
|
|
|
virtio_queue_guest_notifier_read);
|
|
|
|
} else {
|
2017-01-10 11:54:52 +01:00
|
|
|
event_notifier_set_handler(&vq->guest_notifier, NULL);
|
2012-07-05 17:16:30 +02:00
|
|
|
}
|
|
|
|
if (!assign) {
|
|
|
|
/* Test and clear notifier before closing it,
|
|
|
|
* in case poll callback didn't have time to run. */
|
|
|
|
virtio_queue_guest_notifier_read(&vq->guest_notifier);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-22 08:04:47 +01:00
|
|
|
void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
|
|
|
|
bool assign, bool with_irqfd)
|
|
|
|
{
|
|
|
|
EventNotifier *n;
|
|
|
|
n = &vdev->config_notifier;
|
|
|
|
if (assign && !with_irqfd) {
|
|
|
|
event_notifier_set_handler(n, virtio_config_guest_notifier_read);
|
|
|
|
} else {
|
|
|
|
event_notifier_set_handler(n, NULL);
|
|
|
|
}
|
|
|
|
if (!assign) {
|
|
|
|
/* Test and clear notifier before closing it,*/
|
|
|
|
/* in case poll callback didn't have time to run. */
|
|
|
|
virtio_config_guest_notifier_read(n);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-03-17 12:08:02 +01:00
|
|
|
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
return &vq->guest_notifier;
|
|
|
|
}
|
2012-07-05 17:16:29 +02:00
|
|
|
|
2016-12-01 20:26:50 +01:00
|
|
|
static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
|
|
|
|
{
|
|
|
|
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
|
|
|
|
|
|
|
|
virtio_queue_set_notification(vq, 0);
|
|
|
|
}
|
|
|
|
|
2016-12-01 20:26:43 +01:00
|
|
|
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
|
|
|
|
{
|
|
|
|
EventNotifier *n = opaque;
|
|
|
|
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
|
|
|
|
|
aio-posix: split poll check from ready handler
Adaptive polling measures the execution time of the polling check plus
handlers called when a polled event becomes ready. Handlers can take a
significant amount of time, making it look like polling was running for
a long time when in fact the event handler was running for a long time.
For example, on Linux the io_submit(2) syscall invoked when a virtio-blk
device's virtqueue becomes ready can take 10s of microseconds. This
can exceed the default polling interval (32 microseconds) and cause
adaptive polling to stop polling.
By excluding the handler's execution time from the polling check we make
the adaptive polling calculation more accurate. As a result, the event
loop now stays in polling mode where previously it would have fallen
back to file descriptor monitoring.
The following data was collected with virtio-blk num-queues=2
event_idx=off using an IOThread. Before:
168k IOPS, IOThread syscalls:
9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16
9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8
9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8
9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3
9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32
174k IOPS (+3.6%), IOThread syscalls:
9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32
9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8
9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8
9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32
Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because
the IOThread stays in polling mode instead of falling back to file
descriptor monitoring.
As usual, polling is not implemented on Windows so this patch ignores
the new io_poll_read() callback in aio-win32.c.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20211207132336.36627-2-stefanha@redhat.com
[Fixed up aio_set_event_notifier() calls in
tests/unit/test-fdmon-epoll.c added after this series was queued.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 14:23:31 +01:00
|
|
|
return vq->vring.desc && !virtio_queue_empty(vq);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
|
|
|
|
{
|
|
|
|
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
|
2016-12-01 20:26:43 +01:00
|
|
|
|
2021-12-07 14:23:35 +01:00
|
|
|
virtio_queue_notify_vq(vq);
|
2016-12-01 20:26:43 +01:00
|
|
|
}
|
|
|
|
|
2016-12-01 20:26:50 +01:00
|
|
|
static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
|
|
|
|
{
|
|
|
|
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
|
|
|
|
|
|
|
|
/* Caller polls once more after this to catch requests that race with us */
|
|
|
|
virtio_queue_set_notification(vq, 1);
|
|
|
|
}
|
|
|
|
|
2021-12-07 14:23:36 +01:00
|
|
|
void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
|
|
|
|
{
|
|
|
|
aio_set_event_notifier(ctx, &vq->host_notifier, true,
|
|
|
|
virtio_queue_host_notifier_read,
|
|
|
|
virtio_queue_host_notifier_aio_poll,
|
|
|
|
virtio_queue_host_notifier_aio_poll_ready);
|
|
|
|
aio_set_event_notifier_poll(ctx, &vq->host_notifier,
|
|
|
|
virtio_queue_host_notifier_aio_poll_begin,
|
|
|
|
virtio_queue_host_notifier_aio_poll_end);
|
|
|
|
}
|
|
|
|
|
virtio-scsi: don't waste CPU polling the event virtqueue
The virtio-scsi event virtqueue is not emptied by its handler function.
This is typical for rx virtqueues where the device uses buffers when
some event occurs (e.g. a packet is received, an error condition
happens, etc).
Polling non-empty virtqueues wastes CPU cycles. We are not waiting for
new buffers to become available, we are waiting for an event to occur,
so it's a misuse of CPU resources to poll for buffers.
Introduce the new virtio_queue_aio_attach_host_notifier_no_poll() API,
which is identical to virtio_queue_aio_attach_host_notifier() except
that it does not poll the virtqueue.
Before this patch the following command-line consumed 100% CPU in the
IOThread polling and calling virtio_scsi_handle_event():
$ qemu-system-x86_64 -M accel=kvm -m 1G -cpu host \
--object iothread,id=iothread0 \
--device virtio-scsi-pci,iothread=iothread0 \
--blockdev file,filename=test.img,aio=native,cache.direct=on,node-name=drive0 \
--device scsi-hd,drive=drive0
After this patch CPU is no longer wasted.
Reported-by: Nir Soffer <nsoffer@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Nir Soffer <nsoffer@redhat.com>
Message-id: 20220427143541.119567-3-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2022-04-27 16:35:37 +02:00
|
|
|
/*
|
|
|
|
* Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
|
|
|
|
* this for rx virtqueues and similar cases where the virtqueue handler
|
|
|
|
* function does not pop all elements. When the virtqueue is left non-empty
|
|
|
|
* polling consumes CPU cycles and should not be used.
|
|
|
|
*/
|
|
|
|
void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
|
|
|
|
{
|
|
|
|
aio_set_event_notifier(ctx, &vq->host_notifier, true,
|
|
|
|
virtio_queue_host_notifier_read,
|
|
|
|
NULL, NULL);
|
|
|
|
}
|
|
|
|
|
2021-12-07 14:23:36 +01:00
|
|
|
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
|
|
|
|
{
|
|
|
|
aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
|
|
|
|
/* Test and clear notifier before after disabling event,
|
|
|
|
* in case poll callback didn't have time to run. */
|
|
|
|
virtio_queue_host_notifier_read(&vq->host_notifier);
|
2016-04-06 12:16:25 +02:00
|
|
|
}
|
|
|
|
|
2016-10-21 22:48:15 +02:00
|
|
|
void virtio_queue_host_notifier_read(EventNotifier *n)
|
2016-04-06 12:16:25 +02:00
|
|
|
{
|
|
|
|
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
|
|
|
|
if (event_notifier_test_and_clear(n)) {
|
|
|
|
virtio_queue_notify_vq(vq);
|
2016-02-14 18:17:06 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-03-17 12:08:02 +01:00
|
|
|
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
|
|
|
|
{
|
|
|
|
return &vq->host_notifier;
|
|
|
|
}
|
2013-01-15 00:08:02 +01:00
|
|
|
|
2022-12-22 08:04:47 +01:00
|
|
|
EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
return &vdev->config_notifier;
|
|
|
|
}
|
|
|
|
|
2019-11-05 15:09:46 +01:00
|
|
|
void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
|
|
|
|
{
|
|
|
|
vq->host_notifier_enabled = enabled;
|
|
|
|
}
|
|
|
|
|
2018-04-12 17:12:30 +02:00
|
|
|
int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
|
|
|
|
MemoryRegion *mr, bool assign)
|
|
|
|
{
|
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
|
|
|
|
if (k->set_host_notifier_mr) {
|
|
|
|
return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2013-04-30 16:08:48 +02:00
|
|
|
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
|
|
|
|
{
|
2014-06-06 18:43:29 +02:00
|
|
|
g_free(vdev->bus_name);
|
2014-06-06 18:43:30 +02:00
|
|
|
vdev->bus_name = g_strdup(bus_name);
|
2013-04-30 16:08:48 +02:00
|
|
|
}
|
|
|
|
|
2022-02-20 17:39:25 +01:00
|
|
|
void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
|
2016-09-21 17:52:19 +02:00
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, fmt);
|
|
|
|
error_vreport(fmt, ap);
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
|
2017-12-13 20:59:54 +01:00
|
|
|
vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
|
2016-09-21 17:52:19 +02:00
|
|
|
virtio_notify_config(vdev);
|
|
|
|
}
|
2017-05-17 10:17:51 +02:00
|
|
|
|
|
|
|
vdev->broken = true;
|
2016-09-21 17:52:19 +02:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:17 +01:00
|
|
|
static void virtio_memory_listener_commit(MemoryListener *listener)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
|
|
|
if (vdev->vq[i].vring.num == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
virtio_init_region_cache(vdev, i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-07-30 00:50:27 +02:00
|
|
|
static void virtio_device_realize(DeviceState *dev, Error **errp)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
|
|
|
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
|
|
|
|
Error *err = NULL;
|
|
|
|
|
2016-10-27 19:36:36 +02:00
|
|
|
/* Devices should either use vmsd or the load/save methods */
|
|
|
|
assert(!vdc->vmsd || !vdc->load);
|
|
|
|
|
2013-07-30 00:50:27 +02:00
|
|
|
if (vdc->realize != NULL) {
|
|
|
|
vdc->realize(dev, &err);
|
|
|
|
if (err != NULL) {
|
|
|
|
error_propagate(errp, err);
|
|
|
|
return;
|
|
|
|
}
|
2013-01-15 00:08:02 +01:00
|
|
|
}
|
2015-05-29 08:15:25 +02:00
|
|
|
|
|
|
|
virtio_bus_device_plugged(vdev, &err);
|
|
|
|
if (err != NULL) {
|
|
|
|
error_propagate(errp, err);
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 17:29:24 +02:00
|
|
|
vdc->unrealize(dev);
|
2015-05-29 08:15:25 +02:00
|
|
|
return;
|
|
|
|
}
|
2017-01-27 16:40:17 +01:00
|
|
|
|
|
|
|
vdev->listener.commit = virtio_memory_listener_commit;
|
2021-08-17 03:35:52 +02:00
|
|
|
vdev->listener.name = "virtio";
|
2017-01-27 16:40:17 +01:00
|
|
|
memory_listener_register(&vdev->listener, vdev->dma_as);
|
2022-08-11 14:24:39 +02:00
|
|
|
QTAILQ_INSERT_TAIL(&virtio_list, vdev, next);
|
2013-01-15 00:08:02 +01:00
|
|
|
}
|
|
|
|
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 17:29:24 +02:00
|
|
|
static void virtio_device_unrealize(DeviceState *dev)
|
2013-04-30 16:08:48 +02:00
|
|
|
{
|
2013-07-30 00:50:27 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
2013-07-30 03:50:44 +02:00
|
|
|
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
|
2013-07-30 00:50:27 +02:00
|
|
|
|
virtio: Add corresponding memory_listener_unregister to unrealize
Address space is destroyed without proper removal of its listeners with
current code. They are expected to be removed in
virtio_device_instance_finalize [1], but qemu calls it through
object_deinit, after address_space_destroy call through
device_set_realized [2].
Move it to virtio_device_unrealize, called before device_set_realized
[3] and making it symmetric with memory_listener_register in
virtio_device_realize.
v2: Delete no-op call of virtio_device_instance_finalize.
Add backtraces.
[1]
#0 virtio_device_instance_finalize (obj=0x555557de5120)
at /home/qemu/include/hw/virtio/virtio.h:71
#1 0x0000555555b703c9 in object_deinit (type=0x555556639860,
obj=<optimized out>) at ../qom/object.c:671
#2 object_finalize (data=0x555557de5120) at ../qom/object.c:685
#3 object_unref (objptr=0x555557de5120) at ../qom/object.c:1184
#4 0x0000555555b4de9d in bus_free_bus_child (kid=0x555557df0660)
at ../hw/core/qdev.c:55
#5 0x0000555555c65003 in call_rcu_thread (opaque=opaque@entry=0x0)
at ../util/rcu.c:281
Queued by:
#0 bus_remove_child (bus=0x555557de5098,
child=child@entry=0x555557de5120) at ../hw/core/qdev.c:60
#1 0x0000555555b4ee31 in device_unparent (obj=<optimized out>)
at ../hw/core/qdev.c:984
#2 0x0000555555b70465 in object_finalize_child_property (
obj=<optimized out>, name=<optimized out>, opaque=0x555557de5120)
at ../qom/object.c:1725
#3 0x0000555555b6fa17 in object_property_del_child (
child=0x555557de5120, obj=0x555557ddcf90) at ../qom/object.c:645
#4 object_unparent (obj=0x555557de5120) at ../qom/object.c:664
#5 0x0000555555b4c071 in bus_unparent (obj=<optimized out>)
at ../hw/core/bus.c:147
#6 0x0000555555b70465 in object_finalize_child_property (
obj=<optimized out>, name=<optimized out>, opaque=0x555557de5098)
at ../qom/object.c:1725
#7 0x0000555555b6fa17 in object_property_del_child (
child=0x555557de5098, obj=0x555557ddcf90) at ../qom/object.c:645
#8 object_unparent (obj=0x555557de5098) at ../qom/object.c:664
#9 0x0000555555b4ee19 in device_unparent (obj=<optimized out>)
at ../hw/core/qdev.c:981
#10 0x0000555555b70465 in object_finalize_child_property (
obj=<optimized out>, name=<optimized out>, opaque=0x555557ddcf90)
at ../qom/object.c:1725
#11 0x0000555555b6fa17 in object_property_del_child (
child=0x555557ddcf90, obj=0x55555685da10) at ../qom/object.c:645
#12 object_unparent (obj=0x555557ddcf90) at ../qom/object.c:664
#13 0x00005555558dc331 in pci_for_each_device_under_bus (
opaque=<optimized out>, fn=<optimized out>, bus=<optimized out>)
at ../hw/pci/pci.c:1654
[2]
Optimizer omits pci_qdev_unrealize, called by device_set_realized, and
do_pci_unregister_device, called by pci_qdev_unrealize and caller of
address_space_destroy.
#0 address_space_destroy (as=0x555557ddd1b8)
at ../softmmu/memory.c:2840
#1 0x0000555555b4fc53 in device_set_realized (obj=0x555557ddcf90,
value=<optimized out>, errp=0x7fffeea8f1e0)
at ../hw/core/qdev.c:850
#2 0x0000555555b6eaa6 in property_set_bool (obj=0x555557ddcf90,
v=<optimized out>, name=<optimized out>, opaque=0x555556650ba0,
errp=0x7fffeea8f1e0) at ../qom/object.c:2255
#3 0x0000555555b70e07 in object_property_set (
obj=obj@entry=0x555557ddcf90,
name=name@entry=0x555555db99df "realized",
v=v@entry=0x7fffe46b7500,
errp=errp@entry=0x5555565bbf38 <error_abort>)
at ../qom/object.c:1400
#4 0x0000555555b73c5f in object_property_set_qobject (
obj=obj@entry=0x555557ddcf90,
name=name@entry=0x555555db99df "realized",
value=value@entry=0x7fffe44f6180,
errp=errp@entry=0x5555565bbf38 <error_abort>)
at ../qom/qom-qobject.c:28
#5 0x0000555555b71044 in object_property_set_bool (
obj=0x555557ddcf90, name=0x555555db99df "realized",
value=<optimized out>, errp=0x5555565bbf38 <error_abort>)
at ../qom/object.c:1470
#6 0x0000555555921cb7 in pcie_unplug_device (bus=<optimized out>,
dev=0x555557ddcf90,
opaque=<optimized out>) at /home/qemu/include/hw/qdev-core.h:17
#7 0x00005555558dc331 in pci_for_each_device_under_bus (
opaque=<optimized out>, fn=<optimized out>,
bus=<optimized out>) at ../hw/pci/pci.c:1654
[3]
#0 virtio_device_unrealize (dev=0x555557de5120)
at ../hw/virtio/virtio.c:3680
#1 0x0000555555b4fc63 in device_set_realized (obj=0x555557de5120,
value=<optimized out>, errp=0x7fffee28df90)
at ../hw/core/qdev.c:850
#2 0x0000555555b6eab6 in property_set_bool (obj=0x555557de5120,
v=<optimized out>, name=<optimized out>, opaque=0x555556650ba0,
errp=0x7fffee28df90) at ../qom/object.c:2255
#3 0x0000555555b70e17 in object_property_set (
obj=obj@entry=0x555557de5120,
name=name@entry=0x555555db99ff "realized",
v=v@entry=0x7ffdd8035040,
errp=errp@entry=0x5555565bbf38 <error_abort>)
at ../qom/object.c:1400
#4 0x0000555555b73c6f in object_property_set_qobject (
obj=obj@entry=0x555557de5120,
name=name@entry=0x555555db99ff "realized",
value=value@entry=0x7ffdd8035020,
errp=errp@entry=0x5555565bbf38 <error_abort>)
at ../qom/qom-qobject.c:28
#5 0x0000555555b71054 in object_property_set_bool (
obj=0x555557de5120, name=name@entry=0x555555db99ff "realized",
value=value@entry=false, errp=0x5555565bbf38 <error_abort>)
at ../qom/object.c:1470
#6 0x0000555555b4edc5 in qdev_unrealize (dev=<optimized out>)
at ../hw/core/qdev.c:403
#7 0x0000555555b4c2a9 in bus_set_realized (obj=<optimized out>,
value=<optimized out>, errp=<optimized out>)
at ../hw/core/bus.c:204
#8 0x0000555555b6eab6 in property_set_bool (obj=0x555557de5098,
v=<optimized out>, name=<optimized out>, opaque=0x555557df04c0,
errp=0x7fffee28e0a0) at ../qom/object.c:2255
#9 0x0000555555b70e17 in object_property_set (
obj=obj@entry=0x555557de5098,
name=name@entry=0x555555db99ff "realized",
v=v@entry=0x7ffdd8034f50,
errp=errp@entry=0x5555565bbf38 <error_abort>)
at ../qom/object.c:1400
#10 0x0000555555b73c6f in object_property_set_qobject (
obj=obj@entry=0x555557de5098,
name=name@entry=0x555555db99ff "realized",
value=value@entry=0x7ffdd8020630,
errp=errp@entry=0x5555565bbf38 <error_abort>)
at ../qom/qom-qobject.c:28
#11 0x0000555555b71054 in object_property_set_bool (
obj=obj@entry=0x555557de5098,
name=name@entry=0x555555db99ff "realized",
value=value@entry=false, errp=0x5555565bbf38 <error_abort>)
at ../qom/object.c:1470
#12 0x0000555555b4c725 in qbus_unrealize (
bus=bus@entry=0x555557de5098) at ../hw/core/bus.c:178
#13 0x0000555555b4fc00 in device_set_realized (obj=0x555557ddcf90,
value=<optimized out>, errp=0x7fffee28e1e0)
at ../hw/core/qdev.c:844
#14 0x0000555555b6eab6 in property_set_bool (obj=0x555557ddcf90,
v=<optimized out>, name=<optimized out>, opaque=0x555556650ba0,
errp=0x7fffee28e1e0) at ../qom/object.c:2255
#15 0x0000555555b70e17 in object_property_set (
obj=obj@entry=0x555557ddcf90,
name=name@entry=0x555555db99ff "realized",
v=v@entry=0x7ffdd8020560,
errp=errp@entry=0x5555565bbf38 <error_abort>)
at ../qom/object.c:1400
#16 0x0000555555b73c6f in object_property_set_qobject (
obj=obj@entry=0x555557ddcf90,
name=name@entry=0x555555db99ff "realized",
value=value@entry=0x7ffdd8020540,
errp=errp@entry=0x5555565bbf38 <error_abort>)
at ../qom/qom-qobject.c:28
#17 0x0000555555b71054 in object_property_set_bool (
obj=0x555557ddcf90, name=0x555555db99ff "realized",
value=<optimized out>, errp=0x5555565bbf38 <error_abort>)
at ../qom/object.c:1470
#18 0x0000555555921cb7 in pcie_unplug_device (bus=<optimized out>,
dev=0x555557ddcf90, opaque=<optimized out>)
at /home/qemu/include/hw/qdev-core.h:17
#19 0x00005555558dc331 in pci_for_each_device_under_bus (
opaque=<optimized out>, fn=<optimized out>, bus=<optimized out>)
at ../hw/pci/pci.c:1654
Fixes: c611c76417f ("virtio: add MemoryListener to cache ring translations")
Buglink: https://bugs.launchpad.net/qemu/+bug/1912846
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Message-Id: <20210125192505.390554-1-eperezma@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2021-01-25 20:25:05 +01:00
|
|
|
memory_listener_unregister(&vdev->listener);
|
2013-12-20 19:48:51 +01:00
|
|
|
virtio_bus_device_unplugged(vdev);
|
|
|
|
|
2013-07-30 03:50:44 +02:00
|
|
|
if (vdc->unrealize != NULL) {
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 17:29:24 +02:00
|
|
|
vdc->unrealize(dev);
|
2013-09-20 13:59:08 +02:00
|
|
|
}
|
2013-07-30 00:50:27 +02:00
|
|
|
|
2022-08-11 14:24:39 +02:00
|
|
|
QTAILQ_REMOVE(&virtio_list, vdev, next);
|
2014-06-06 18:43:29 +02:00
|
|
|
g_free(vdev->bus_name);
|
|
|
|
vdev->bus_name = NULL;
|
2013-04-30 16:08:48 +02:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:17 +01:00
|
|
|
static void virtio_device_free_virtqueues(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
if (!vdev->vq) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
|
|
|
if (vdev->vq[i].vring.num == 0) {
|
|
|
|
break;
|
|
|
|
}
|
2017-03-15 12:48:31 +01:00
|
|
|
virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
|
2017-01-27 16:40:17 +01:00
|
|
|
}
|
|
|
|
g_free(vdev->vq);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_device_instance_finalize(Object *obj)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(obj);
|
|
|
|
|
|
|
|
virtio_device_free_virtqueues(vdev);
|
|
|
|
|
|
|
|
g_free(vdev->config);
|
|
|
|
g_free(vdev->vector_queues);
|
|
|
|
}
|
|
|
|
|
2015-05-26 16:34:47 +02:00
|
|
|
static Property virtio_properties[] = {
|
|
|
|
DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
|
2019-06-26 04:31:26 +02:00
|
|
|
DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 01:50:03 +01:00
|
|
|
DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
|
2020-09-21 14:25:03 +02:00
|
|
|
DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
|
|
|
|
disable_legacy_check, false),
|
2015-05-26 16:34:47 +02:00
|
|
|
DEFINE_PROP_END_OF_LIST(),
|
|
|
|
};
|
|
|
|
|
2016-10-21 22:48:07 +02:00
|
|
|
static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
2018-01-29 15:20:57 +01:00
|
|
|
int i, n, r, err;
|
2016-10-21 22:48:07 +02:00
|
|
|
|
2021-05-17 15:26:37 +02:00
|
|
|
/*
|
|
|
|
* Batch all the host notifiers in a single transaction to avoid
|
|
|
|
* quadratic time complexity in address_space_update_ioeventfds().
|
|
|
|
*/
|
2018-01-29 15:20:57 +01:00
|
|
|
memory_region_transaction_begin();
|
2016-10-21 22:48:07 +02:00
|
|
|
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
|
2016-10-21 22:48:15 +02:00
|
|
|
VirtQueue *vq = &vdev->vq[n];
|
2016-10-21 22:48:07 +02:00
|
|
|
if (!virtio_queue_get_num(vdev, n)) {
|
|
|
|
continue;
|
|
|
|
}
|
2016-10-21 22:48:14 +02:00
|
|
|
r = virtio_bus_set_host_notifier(qbus, n, true);
|
2016-10-21 22:48:07 +02:00
|
|
|
if (r < 0) {
|
|
|
|
err = r;
|
|
|
|
goto assign_error;
|
|
|
|
}
|
2017-01-10 11:54:52 +01:00
|
|
|
event_notifier_set_handler(&vq->host_notifier,
|
2016-10-21 22:48:15 +02:00
|
|
|
virtio_queue_host_notifier_read);
|
2016-10-21 22:48:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
|
|
|
|
/* Kick right away to begin processing requests already in vring */
|
|
|
|
VirtQueue *vq = &vdev->vq[n];
|
|
|
|
if (!vq->vring.num) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
event_notifier_set(&vq->host_notifier);
|
2016-10-21 22:48:07 +02:00
|
|
|
}
|
2018-01-29 15:20:57 +01:00
|
|
|
memory_region_transaction_commit();
|
2016-10-21 22:48:07 +02:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
assign_error:
|
2018-01-29 15:20:57 +01:00
|
|
|
i = n; /* save n for a second iteration after transaction is committed. */
|
2016-10-21 22:48:07 +02:00
|
|
|
while (--n >= 0) {
|
2016-10-21 22:48:15 +02:00
|
|
|
VirtQueue *vq = &vdev->vq[n];
|
2016-10-21 22:48:07 +02:00
|
|
|
if (!virtio_queue_get_num(vdev, n)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-01-10 11:54:52 +01:00
|
|
|
event_notifier_set_handler(&vq->host_notifier, NULL);
|
2016-10-21 22:48:14 +02:00
|
|
|
r = virtio_bus_set_host_notifier(qbus, n, false);
|
2016-10-21 22:48:07 +02:00
|
|
|
assert(r >= 0);
|
2018-01-29 15:20:57 +01:00
|
|
|
}
|
2021-05-17 15:26:37 +02:00
|
|
|
/*
|
|
|
|
* The transaction expects the ioeventfds to be open when it
|
|
|
|
* commits. Do it now, before the cleanup loop.
|
|
|
|
*/
|
2018-01-29 15:20:57 +01:00
|
|
|
memory_region_transaction_commit();
|
|
|
|
|
|
|
|
while (--i >= 0) {
|
|
|
|
if (!virtio_queue_get_num(vdev, i)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
virtio_bus_cleanup_host_notifier(qbus, i);
|
2016-10-21 22:48:07 +02:00
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int virtio_device_start_ioeventfd(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusState *vbus = VIRTIO_BUS(qbus);
|
|
|
|
|
|
|
|
return virtio_bus_start_ioeventfd(vbus);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
|
|
|
int n, r;
|
|
|
|
|
2021-05-17 15:26:37 +02:00
|
|
|
/*
|
|
|
|
* Batch all the host notifiers in a single transaction to avoid
|
|
|
|
* quadratic time complexity in address_space_update_ioeventfds().
|
|
|
|
*/
|
2018-01-29 15:20:57 +01:00
|
|
|
memory_region_transaction_begin();
|
2016-10-21 22:48:07 +02:00
|
|
|
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
|
2016-10-21 22:48:15 +02:00
|
|
|
VirtQueue *vq = &vdev->vq[n];
|
|
|
|
|
2016-10-21 22:48:07 +02:00
|
|
|
if (!virtio_queue_get_num(vdev, n)) {
|
|
|
|
continue;
|
|
|
|
}
|
2017-01-10 11:54:52 +01:00
|
|
|
event_notifier_set_handler(&vq->host_notifier, NULL);
|
2016-10-21 22:48:14 +02:00
|
|
|
r = virtio_bus_set_host_notifier(qbus, n, false);
|
2016-10-21 22:48:07 +02:00
|
|
|
assert(r >= 0);
|
2018-01-29 15:20:57 +01:00
|
|
|
}
|
2021-05-17 15:26:37 +02:00
|
|
|
/*
|
|
|
|
* The transaction expects the ioeventfds to be open when it
|
|
|
|
* commits. Do it now, before the cleanup loop.
|
|
|
|
*/
|
2018-01-29 15:20:57 +01:00
|
|
|
memory_region_transaction_commit();
|
|
|
|
|
|
|
|
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
|
|
|
|
if (!virtio_queue_get_num(vdev, n)) {
|
|
|
|
continue;
|
|
|
|
}
|
2018-01-29 15:20:56 +01:00
|
|
|
virtio_bus_cleanup_host_notifier(qbus, n);
|
2016-10-21 22:48:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-18 16:07:00 +01:00
|
|
|
int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusState *vbus = VIRTIO_BUS(qbus);
|
|
|
|
|
|
|
|
return virtio_bus_grab_ioeventfd(vbus);
|
|
|
|
}
|
|
|
|
|
|
|
|
void virtio_device_release_ioeventfd(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusState *vbus = VIRTIO_BUS(qbus);
|
|
|
|
|
|
|
|
virtio_bus_release_ioeventfd(vbus);
|
|
|
|
}
|
|
|
|
|
2013-01-15 00:08:02 +01:00
|
|
|
static void virtio_device_class_init(ObjectClass *klass, void *data)
|
|
|
|
{
|
|
|
|
/* Set the default value here. */
|
2016-10-21 22:48:07 +02:00
|
|
|
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
|
2013-01-15 00:08:02 +01:00
|
|
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
2013-07-30 00:50:27 +02:00
|
|
|
|
|
|
|
dc->realize = virtio_device_realize;
|
|
|
|
dc->unrealize = virtio_device_unrealize;
|
2013-01-15 00:08:02 +01:00
|
|
|
dc->bus_type = TYPE_VIRTIO_BUS;
|
2020-01-10 16:30:32 +01:00
|
|
|
device_class_set_props(dc, virtio_properties);
|
2016-10-21 22:48:07 +02:00
|
|
|
vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
|
|
|
|
vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
|
2016-11-04 11:04:23 +01:00
|
|
|
|
|
|
|
vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
|
2022-08-11 14:24:39 +02:00
|
|
|
|
|
|
|
QTAILQ_INIT(&virtio_list);
|
2013-01-15 00:08:02 +01:00
|
|
|
}
|
|
|
|
|
2016-10-21 22:48:08 +02:00
|
|
|
bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
|
|
|
VirtioBusState *vbus = VIRTIO_BUS(qbus);
|
|
|
|
|
|
|
|
return virtio_bus_ioeventfd_enabled(vbus);
|
|
|
|
}
|
|
|
|
|
2022-08-11 14:24:42 +02:00
|
|
|
VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
|
|
|
|
uint16_t queue,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev;
|
|
|
|
VirtQueueStatus *status;
|
|
|
|
|
2022-12-22 09:00:04 +01:00
|
|
|
vdev = qmp_find_virtio_device(path);
|
2022-08-11 14:24:42 +02:00
|
|
|
if (vdev == NULL) {
|
|
|
|
error_setg(errp, "Path %s is not a VirtIODevice", path);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
|
|
|
|
error_setg(errp, "Invalid virtqueue number %d", queue);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
status = g_new0(VirtQueueStatus, 1);
|
|
|
|
status->name = g_strdup(vdev->name);
|
|
|
|
status->queue_index = vdev->vq[queue].queue_index;
|
|
|
|
status->inuse = vdev->vq[queue].inuse;
|
|
|
|
status->vring_num = vdev->vq[queue].vring.num;
|
|
|
|
status->vring_num_default = vdev->vq[queue].vring.num_default;
|
|
|
|
status->vring_align = vdev->vq[queue].vring.align;
|
|
|
|
status->vring_desc = vdev->vq[queue].vring.desc;
|
|
|
|
status->vring_avail = vdev->vq[queue].vring.avail;
|
|
|
|
status->vring_used = vdev->vq[queue].vring.used;
|
|
|
|
status->used_idx = vdev->vq[queue].used_idx;
|
|
|
|
status->signalled_used = vdev->vq[queue].signalled_used;
|
|
|
|
status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
|
|
|
|
|
|
|
|
if (vdev->vhost_started) {
|
|
|
|
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
|
|
|
|
struct vhost_dev *hdev = vdc->get_vhost(vdev);
|
|
|
|
|
|
|
|
/* check if vq index exists for vhost as well */
|
|
|
|
if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
|
|
|
|
status->has_last_avail_idx = true;
|
|
|
|
|
|
|
|
int vhost_vq_index =
|
|
|
|
hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
|
|
|
|
struct vhost_vring_state state = {
|
|
|
|
.index = vhost_vq_index,
|
|
|
|
};
|
|
|
|
|
|
|
|
status->last_avail_idx =
|
|
|
|
hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
status->has_shadow_avail_idx = true;
|
|
|
|
status->has_last_avail_idx = true;
|
|
|
|
status->last_avail_idx = vdev->vq[queue].last_avail_idx;
|
|
|
|
status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2022-08-11 14:24:43 +02:00
|
|
|
static strList *qmp_decode_vring_desc_flags(uint16_t flags)
|
|
|
|
{
|
|
|
|
strList *list = NULL;
|
|
|
|
strList *node;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
struct {
|
|
|
|
uint16_t flag;
|
|
|
|
const char *value;
|
|
|
|
} map[] = {
|
|
|
|
{ VRING_DESC_F_NEXT, "next" },
|
|
|
|
{ VRING_DESC_F_WRITE, "write" },
|
|
|
|
{ VRING_DESC_F_INDIRECT, "indirect" },
|
|
|
|
{ 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
|
|
|
|
{ 1 << VRING_PACKED_DESC_F_USED, "used" },
|
|
|
|
{ 0, "" }
|
|
|
|
};
|
|
|
|
|
|
|
|
for (i = 0; map[i].flag; i++) {
|
|
|
|
if ((map[i].flag & flags) == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
node = g_malloc0(sizeof(strList));
|
|
|
|
node->value = g_strdup(map[i].value);
|
|
|
|
node->next = list;
|
|
|
|
list = node;
|
|
|
|
}
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
|
|
|
VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
|
|
|
|
uint16_t queue,
|
|
|
|
bool has_index,
|
|
|
|
uint16_t index,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev;
|
|
|
|
VirtQueue *vq;
|
|
|
|
VirtioQueueElement *element = NULL;
|
|
|
|
|
2022-12-22 09:00:04 +01:00
|
|
|
vdev = qmp_find_virtio_device(path);
|
2022-08-11 14:24:43 +02:00
|
|
|
if (vdev == NULL) {
|
|
|
|
error_setg(errp, "Path %s is not a VirtIO device", path);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
|
|
|
|
error_setg(errp, "Invalid virtqueue number %d", queue);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
vq = &vdev->vq[queue];
|
|
|
|
|
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
|
|
|
|
error_setg(errp, "Packed ring not supported");
|
|
|
|
return NULL;
|
|
|
|
} else {
|
|
|
|
unsigned int head, i, max;
|
|
|
|
VRingMemoryRegionCaches *caches;
|
|
|
|
MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
|
|
|
|
MemoryRegionCache *desc_cache;
|
|
|
|
VRingDesc desc;
|
|
|
|
VirtioRingDescList *list = NULL;
|
|
|
|
VirtioRingDescList *node;
|
|
|
|
int rc; int ndescs;
|
|
|
|
|
|
|
|
RCU_READ_LOCK_GUARD();
|
|
|
|
|
|
|
|
max = vq->vring.num;
|
|
|
|
|
|
|
|
if (!has_index) {
|
|
|
|
head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
|
|
|
|
} else {
|
|
|
|
head = vring_avail_ring(vq, index % vq->vring.num);
|
|
|
|
}
|
|
|
|
i = head;
|
|
|
|
|
|
|
|
caches = vring_get_region_caches(vq);
|
|
|
|
if (!caches) {
|
|
|
|
error_setg(errp, "Region caches not initialized");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (caches->desc.len < max * sizeof(VRingDesc)) {
|
|
|
|
error_setg(errp, "Cannot map descriptor ring");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
desc_cache = &caches->desc;
|
|
|
|
vring_split_desc_read(vdev, &desc, desc_cache, i);
|
|
|
|
if (desc.flags & VRING_DESC_F_INDIRECT) {
|
|
|
|
int64_t len;
|
|
|
|
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
|
|
|
|
desc.addr, desc.len, false);
|
|
|
|
desc_cache = &indirect_desc_cache;
|
|
|
|
if (len < desc.len) {
|
|
|
|
error_setg(errp, "Cannot map indirect buffer");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
max = desc.len / sizeof(VRingDesc);
|
|
|
|
i = 0;
|
|
|
|
vring_split_desc_read(vdev, &desc, desc_cache, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
element = g_new0(VirtioQueueElement, 1);
|
|
|
|
element->avail = g_new0(VirtioRingAvail, 1);
|
|
|
|
element->used = g_new0(VirtioRingUsed, 1);
|
|
|
|
element->name = g_strdup(vdev->name);
|
|
|
|
element->index = head;
|
|
|
|
element->avail->flags = vring_avail_flags(vq);
|
|
|
|
element->avail->idx = vring_avail_idx(vq);
|
|
|
|
element->avail->ring = head;
|
|
|
|
element->used->flags = vring_used_flags(vq);
|
|
|
|
element->used->idx = vring_used_idx(vq);
|
|
|
|
ndescs = 0;
|
|
|
|
|
|
|
|
do {
|
|
|
|
/* A buggy driver may produce an infinite loop */
|
|
|
|
if (ndescs >= max) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
node = g_new0(VirtioRingDescList, 1);
|
|
|
|
node->value = g_new0(VirtioRingDesc, 1);
|
|
|
|
node->value->addr = desc.addr;
|
|
|
|
node->value->len = desc.len;
|
|
|
|
node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
|
|
|
|
node->next = list;
|
|
|
|
list = node;
|
|
|
|
|
|
|
|
ndescs++;
|
|
|
|
rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache,
|
|
|
|
max, &i);
|
|
|
|
} while (rc == VIRTQUEUE_READ_DESC_MORE);
|
|
|
|
element->descs = list;
|
|
|
|
done:
|
|
|
|
address_space_cache_destroy(&indirect_desc_cache);
|
|
|
|
}
|
|
|
|
|
|
|
|
return element;
|
|
|
|
}
|
|
|
|
|
2013-01-15 00:08:02 +01:00
|
|
|
static const TypeInfo virtio_device_info = {
|
|
|
|
.name = TYPE_VIRTIO_DEVICE,
|
|
|
|
.parent = TYPE_DEVICE,
|
|
|
|
.instance_size = sizeof(VirtIODevice),
|
|
|
|
.class_init = virtio_device_class_init,
|
2017-01-27 16:40:17 +01:00
|
|
|
.instance_finalize = virtio_device_instance_finalize,
|
2013-01-15 00:08:02 +01:00
|
|
|
.abstract = true,
|
|
|
|
.class_size = sizeof(VirtioDeviceClass),
|
|
|
|
};
|
|
|
|
|
|
|
|
static void virtio_register_types(void)
|
|
|
|
{
|
|
|
|
type_register_static(&virtio_device_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_init(virtio_register_types)
|