From a40dcec9fcbf63bcdc013bb20970651b237203ef Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Fri, 25 Oct 2019 10:35:20 +0200 Subject: [PATCH 01/25] virtio: basic structure for packed ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define packed ring structure according to Qemu nomenclature, field data(wrap counter, etc) are also included. Signed-off-by: Wei Xu Signed-off-by: Jason Wang Signed-off-by: Eugenio Pérez Reviewed-by: Jens Freimann Message-Id: <20191025083527.30803-2-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 527df03bfd..fdac203cdf 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -43,6 +43,13 @@ typedef struct VRingDesc uint16_t next; } VRingDesc; +typedef struct VRingPackedDesc { + uint64_t addr; + uint32_t len; + uint16_t id; + uint16_t flags; +} VRingPackedDesc; + typedef struct VRingAvail { uint16_t flags; @@ -81,17 +88,25 @@ typedef struct VRing VRingMemoryRegionCaches *caches; } VRing; +typedef struct VRingPackedDescEvent { + uint16_t off_wrap; + uint16_t flags; +} VRingPackedDescEvent ; + struct VirtQueue { VRing vring; /* Next head to pop */ uint16_t last_avail_idx; + bool last_avail_wrap_counter; /* Last avail_idx read from VQ. */ uint16_t shadow_avail_idx; + bool shadow_avail_wrap_counter; uint16_t used_idx; + bool used_wrap_counter; /* Last used index value we have signalled on */ uint16_t signalled_used; From f90cda636d4d18f691d8e9ff386e222570ff6552 Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Fri, 25 Oct 2019 10:35:21 +0200 Subject: [PATCH 02/25] virtio: device/driver area size calculation refactor for split ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is slight size difference between split/packed rings. This is the refactor of split ring as well as a helper to expanding device and driver area size calculation for packed ring. Signed-off-by: Wei Xu Signed-off-by: Jason Wang Signed-off-by: Eugenio Pérez Reviewed-by: Jens Freimann Message-Id: <20191025083527.30803-3-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index fdac203cdf..74cc10fad9 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -159,10 +159,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) VRingMemoryRegionCaches *old = vq->vring.caches; VRingMemoryRegionCaches *new = NULL; hwaddr addr, size; - int event_size; int64_t len; - event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; addr = vq->vring.desc; if (!addr) { @@ -177,7 +175,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_desc; } - size = virtio_queue_get_used_size(vdev, n) + event_size; + size = virtio_queue_get_used_size(vdev, n); len = address_space_cache_init(&new->used, vdev->dma_as, vq->vring.used, size, true); if (len < size) { @@ -185,7 +183,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_used; } - size = virtio_queue_get_avail_size(vdev, n) + event_size; + size = virtio_queue_get_avail_size(vdev, n); len = address_space_cache_init(&new->avail, vdev->dma_as, vq->vring.avail, size, false); if (len < size) { @@ -2414,14 +2412,20 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) { + int s; + + s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingAvail, ring) + - sizeof(uint16_t) * vdev->vq[n].vring.num; + sizeof(uint16_t) * vdev->vq[n].vring.num + s; } hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) { + int s; + + s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingUsed, ring) + - sizeof(VRingUsedElem) * vdev->vq[n].vring.num; + sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; } uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) From 4a0117cf6e04d568a1963540bc0fa133b4d09b33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 25 Oct 2019 10:35:22 +0200 Subject: [PATCH 03/25] virtio: Free blk virqueues at unrealize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function virtio_del_queue was not called at unrealize() callback. This was detected due to add an allocated element on the vq introduce in future commits (used_elems) and running address sanitizer memory leak detector. Signed-off-by: Eugenio Pérez Message-Id: <20191025083527.30803-4-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/virtio-blk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index ed2ddebd2b..ba846fe9dc 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1206,9 +1206,14 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOBlock *s = VIRTIO_BLK(dev); + VirtIOBlkConf *conf = &s->conf; + unsigned i; virtio_blk_data_plane_destroy(s->dataplane); s->dataplane = NULL; + for (i = 0; i < conf->num_queues; i++) { + virtio_del_queue(vdev, i); + } qemu_del_vm_change_state_handler(s->change); blockdev_mark_auto_del(s->blk); virtio_cleanup(vdev); From 522bbb191cec2ac799f4bd662b0fcf6f00dfc2ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 25 Oct 2019 10:35:23 +0200 Subject: [PATCH 04/25] virtio: Free rnd virqueue at unrealize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function virtio_del_queue was not called at unrealize() callback. This was detected due to add an allocated element on the vq introduce in future commits (used_elems) and running address sanitizer memory leak detector. Signed-off-by: Eugenio Pérez Message-Id: <20191025083527.30803-5-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-rng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index e93bed020f..b498a20332 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -238,6 +238,7 @@ static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp) qemu_del_vm_change_state_handler(vrng->vmstate); timer_del(vrng->rate_limit_timer); timer_free(vrng->rate_limit_timer); + virtio_del_queue(vdev, 0); virtio_cleanup(vdev); } From 86044b24e865fb9596ed77a4d0f3af8b90a088a1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 25 Oct 2019 10:35:24 +0200 Subject: [PATCH 05/25] virtio: basic packed virtqueue support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements basic support for the packed virtqueue. Compare the split virtqueue which has three rings, packed virtqueue only have one which is supposed to have better cache utilization and more hardware friendly. Please refer virtio specification for more information. Signed-off-by: Wei Xu Signed-off-by: Jason Wang Signed-off-by: Eugenio Pérez Message-Id: <20191025083527.30803-6-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/virtio-blk.c | 2 +- hw/char/virtio-serial-bus.c | 2 +- hw/scsi/virtio-scsi.c | 3 +- hw/virtio/virtio.c | 893 +++++++++++++++++++++++++++++++++--- include/hw/virtio/virtio.h | 10 +- 5 files changed, 832 insertions(+), 78 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index ba846fe9dc..7dbdeaaab9 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1052,7 +1052,7 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) qemu_put_be32(f, virtio_get_queue_index(req->vq)); } - qemu_put_virtqueue_element(f, &req->elem); + qemu_put_virtqueue_element(vdev, f, &req->elem); req = req->next; } qemu_put_sbyte(f, 0); diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 4e0ed829ae..33259042a9 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -708,7 +708,7 @@ static void virtio_serial_save_device(VirtIODevice *vdev, QEMUFile *f) if (elem_popped) { qemu_put_be32s(f, &port->iov_idx); qemu_put_be64s(f, &port->iov_offset); - qemu_put_virtqueue_element(f, port->elem); + qemu_put_virtqueue_element(vdev, f, port->elem); } } } diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index ee52aa7d17..e8b2b64d09 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -190,11 +190,12 @@ static void virtio_scsi_save_request(QEMUFile *f, SCSIRequest *sreq) { VirtIOSCSIReq *req = sreq->hba_private; VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(req->dev); + VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); uint32_t n = virtio_get_queue_index(req->vq) - 2; assert(n < vs->conf.num_queues); qemu_put_be32s(f, &n); - qemu_put_virtqueue_element(f, &req->elem); + qemu_put_virtqueue_element(vdev, f, &req->elem); } static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 74cc10fad9..a694b4ab50 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -96,6 +96,7 @@ typedef struct VRingPackedDescEvent { struct VirtQueue { VRing vring; + VirtQueueElement *used_elems; /* Next head to pop */ uint16_t last_avail_idx; @@ -160,6 +161,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) VRingMemoryRegionCaches *new = NULL; hwaddr addr, size; int64_t len; + bool packed; addr = vq->vring.desc; @@ -168,8 +170,10 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) } new = g_new0(VRingMemoryRegionCaches, 1); size = virtio_queue_get_desc_size(vdev, n); + packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? + true : false; len = address_space_cache_init(&new->desc, vdev->dma_as, - addr, size, false); + addr, size, packed); if (len < size) { virtio_error(vdev, "Cannot map desc"); goto err_desc; @@ -225,8 +229,8 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n) } /* Called within rcu_read_lock(). */ -static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, - MemoryRegionCache *cache, int i) +static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc, + MemoryRegionCache *cache, int i) { address_space_read_cached(cache, i * sizeof(VRingDesc), desc, sizeof(VRingDesc)); @@ -236,6 +240,7 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, virtio_tswap16s(vdev, &desc->next); } +/* Called within rcu_read_lock(). */ static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) { VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches); @@ -370,6 +375,95 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read_flags(VirtIODevice *vdev, + uint16_t *flags, + MemoryRegionCache *cache, + int i) +{ + address_space_read_cached(cache, + i * sizeof(VRingPackedDesc) + + offsetof(VRingPackedDesc, flags), + flags, sizeof(*flags)); + virtio_tswap16s(vdev, flags); +} + +static void vring_packed_desc_read(VirtIODevice *vdev, + VRingPackedDesc *desc, + MemoryRegionCache *cache, + int i, bool strict_order) +{ + hwaddr off = i * sizeof(VRingPackedDesc); + + vring_packed_desc_read_flags(vdev, &desc->flags, cache, i); + + if (strict_order) { + /* Make sure flags is read before the rest fields. */ + smp_rmb(); + } + + address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr), + &desc->addr, sizeof(desc->addr)); + address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id), + &desc->id, sizeof(desc->id)); + address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len), + &desc->len, sizeof(desc->len)); + virtio_tswap64s(vdev, &desc->addr); + virtio_tswap16s(vdev, &desc->id); + virtio_tswap32s(vdev, &desc->len); +} + +static void vring_packed_desc_write_data(VirtIODevice *vdev, + VRingPackedDesc *desc, + MemoryRegionCache *cache, + int i) +{ + hwaddr off_id = i * sizeof(VRingPackedDesc) + + offsetof(VRingPackedDesc, id); + hwaddr off_len = i * sizeof(VRingPackedDesc) + + offsetof(VRingPackedDesc, len); + + virtio_tswap32s(vdev, &desc->len); + virtio_tswap16s(vdev, &desc->id); + address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id)); + address_space_cache_invalidate(cache, off_id, sizeof(desc->id)); + address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len)); + address_space_cache_invalidate(cache, off_len, sizeof(desc->len)); +} + +static void vring_packed_desc_write_flags(VirtIODevice *vdev, + VRingPackedDesc *desc, + MemoryRegionCache *cache, + int i) +{ + hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); + + virtio_tswap16s(vdev, &desc->flags); + address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags)); + address_space_cache_invalidate(cache, off, sizeof(desc->flags)); +} + +static void vring_packed_desc_write(VirtIODevice *vdev, + VRingPackedDesc *desc, + MemoryRegionCache *cache, + int i, bool strict_order) +{ + vring_packed_desc_write_data(vdev, desc, cache, i); + if (strict_order) { + /* Make sure data is wrote before flags. */ + smp_wmb(); + } + vring_packed_desc_write_flags(vdev, desc, cache, i); +} + +static inline bool is_desc_avail(uint16_t flags, bool wrap_counter) +{ + bool avail, used; + + avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); + used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); + return (avail != used) && (avail == wrap_counter); +} + /* Fetch avail_idx from VQ memory only when we really need to know if * guest has added some buffers. * Called within rcu_read_lock(). */ @@ -390,7 +484,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq) return vring_avail_idx(vq) == vq->last_avail_idx; } -int virtio_queue_empty(VirtQueue *vq) +static int virtio_queue_split_empty(VirtQueue *vq) { bool empty; @@ -412,6 +506,41 @@ int virtio_queue_empty(VirtQueue *vq) return empty; } +static int virtio_queue_packed_empty_rcu(VirtQueue *vq) +{ + struct VRingPackedDesc desc; + VRingMemoryRegionCaches *cache; + + if (unlikely(!vq->vring.desc)) { + return 1; + } + + cache = vring_get_region_caches(vq); + vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc, + vq->last_avail_idx); + + return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter); +} + +static int virtio_queue_packed_empty(VirtQueue *vq) +{ + bool empty; + + rcu_read_lock(); + empty = virtio_queue_packed_empty_rcu(vq); + rcu_read_unlock(); + return empty; +} + +int virtio_queue_empty(VirtQueue *vq) +{ + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + return virtio_queue_packed_empty(vq); + } else { + return virtio_queue_split_empty(vq); + } +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { @@ -449,10 +578,25 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { - vq->inuse--; + vq->inuse -= elem->ndescs; virtqueue_unmap_sg(vq, elem, len); } +static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num) +{ + vq->last_avail_idx -= num; +} + +static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num) +{ + if (vq->last_avail_idx < num) { + vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num; + vq->last_avail_wrap_counter ^= 1; + } else { + vq->last_avail_idx -= num; + } +} + /* virtqueue_unpop: * @vq: The #VirtQueue * @elem: The #VirtQueueElement @@ -464,7 +608,13 @@ void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem, void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { - vq->last_avail_idx--; + + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + virtqueue_packed_rewind(vq, 1); + } else { + virtqueue_split_rewind(vq, 1); + } + virtqueue_detach_element(vq, elem, len); } @@ -485,25 +635,21 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num) if (num > vq->inuse) { return false; } - vq->last_avail_idx -= num; + vq->inuse -= num; + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + virtqueue_packed_rewind(vq, num); + } else { + virtqueue_split_rewind(vq, num); + } return true; } -/* Called within rcu_read_lock(). */ -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { VRingUsedElem uelem; - trace_virtqueue_fill(vq, elem, len, idx); - - virtqueue_unmap_sg(vq, elem, len); - - if (unlikely(vq->vdev->broken)) { - return; - } - if (unlikely(!vq->vring.used)) { return; } @@ -515,16 +661,72 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, vring_used_write(vq, &uelem, idx); } -/* Called within rcu_read_lock(). */ -void virtqueue_flush(VirtQueue *vq, unsigned int count) +static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx) { - uint16_t old, new; + vq->used_elems[idx].index = elem->index; + vq->used_elems[idx].len = len; + vq->used_elems[idx].ndescs = elem->ndescs; +} - if (unlikely(vq->vdev->broken)) { - vq->inuse -= count; +static void virtqueue_packed_fill_desc(VirtQueue *vq, + const VirtQueueElement *elem, + unsigned int idx, + bool strict_order) +{ + uint16_t head; + VRingMemoryRegionCaches *caches; + VRingPackedDesc desc = { + .id = elem->index, + .len = elem->len, + }; + bool wrap_counter = vq->used_wrap_counter; + + if (unlikely(!vq->vring.desc)) { return; } + head = vq->used_idx + idx; + if (head >= vq->vring.num) { + head -= vq->vring.num; + wrap_counter ^= 1; + } + if (wrap_counter) { + desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL); + desc.flags |= (1 << VRING_PACKED_DESC_F_USED); + } else { + desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL); + desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED); + } + + caches = vring_get_region_caches(vq); + vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order); +} + +/* Called within rcu_read_lock(). */ +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx) +{ + trace_virtqueue_fill(vq, elem, len, idx); + + virtqueue_unmap_sg(vq, elem, len); + + if (unlikely(vq->vdev->broken)) { + return; + } + + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + virtqueue_packed_fill(vq, elem, len, idx); + } else { + virtqueue_split_fill(vq, elem, len, idx); + } +} + +/* Called within rcu_read_lock(). */ +static void virtqueue_split_flush(VirtQueue *vq, unsigned int count) +{ + uint16_t old, new; + if (unlikely(!vq->vring.used)) { return; } @@ -540,6 +742,43 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) vq->signalled_used_valid = false; } +static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count) +{ + unsigned int i, ndescs = 0; + + if (unlikely(!vq->vring.desc)) { + return; + } + + for (i = 1; i < count; i++) { + virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false); + ndescs += vq->used_elems[i].ndescs; + } + virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true); + ndescs += vq->used_elems[0].ndescs; + + vq->inuse -= ndescs; + vq->used_idx += ndescs; + if (vq->used_idx >= vq->vring.num) { + vq->used_idx -= vq->vring.num; + vq->used_wrap_counter ^= 1; + } +} + +void virtqueue_flush(VirtQueue *vq, unsigned int count) +{ + if (unlikely(vq->vdev->broken)) { + vq->inuse -= count; + return; + } + + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + virtqueue_packed_flush(vq, count); + } else { + virtqueue_split_flush(vq, count); + } +} + void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { @@ -592,9 +831,9 @@ enum { VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */ }; -static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, - MemoryRegionCache *desc_cache, unsigned int max, - unsigned int *next) +static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, + MemoryRegionCache *desc_cache, + unsigned int max, unsigned int *next) { /* If this descriptor says it doesn't chain, we're done. */ if (!(desc->flags & VRING_DESC_F_NEXT)) { @@ -611,13 +850,13 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, return VIRTQUEUE_READ_DESC_ERROR; } - vring_desc_read(vdev, desc, desc_cache, *next); + vring_split_desc_read(vdev, desc, desc_cache, *next); return VIRTQUEUE_READ_DESC_MORE; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +static void virtqueue_split_get_avail_bytes(VirtQueue *vq, + unsigned int *in_bytes, unsigned int *out_bytes, + unsigned max_in_bytes, unsigned max_out_bytes) { VirtIODevice *vdev = vq->vdev; unsigned int max, idx; @@ -627,27 +866,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, int64_t len = 0; int rc; - if (unlikely(!vq->vring.desc)) { - if (in_bytes) { - *in_bytes = 0; - } - if (out_bytes) { - *out_bytes = 0; - } - return; - } - rcu_read_lock(); idx = vq->last_avail_idx; total_bufs = in_total = out_total = 0; max = vq->vring.num; caches = vring_get_region_caches(vq); - if (caches->desc.len < max * sizeof(VRingDesc)) { - virtio_error(vdev, "Cannot map descriptor ring"); - goto err; - } - while ((rc = virtqueue_num_heads(vq, idx)) > 0) { MemoryRegionCache *desc_cache = &caches->desc; unsigned int num_bufs; @@ -660,7 +884,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, goto err; } - vring_desc_read(vdev, &desc, desc_cache, i); + vring_split_desc_read(vdev, &desc, desc_cache, i); if (desc.flags & VRING_DESC_F_INDIRECT) { if (!desc.len || (desc.len % sizeof(VRingDesc))) { @@ -686,7 +910,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, max = desc.len / sizeof(VRingDesc); num_bufs = i = 0; - vring_desc_read(vdev, &desc, desc_cache, i); + vring_split_desc_read(vdev, &desc, desc_cache, i); } do { @@ -705,7 +929,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, goto done; } - rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i); + rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i); } while (rc == VIRTQUEUE_READ_DESC_MORE); if (rc == VIRTQUEUE_READ_DESC_ERROR) { @@ -740,6 +964,186 @@ err: goto done; } +static int virtqueue_packed_read_next_desc(VirtQueue *vq, + VRingPackedDesc *desc, + MemoryRegionCache + *desc_cache, + unsigned int max, + unsigned int *next, + bool indirect) +{ + /* If this descriptor says it doesn't chain, we're done. */ + if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) { + return VIRTQUEUE_READ_DESC_DONE; + } + + ++*next; + if (*next == max) { + if (indirect) { + return VIRTQUEUE_READ_DESC_DONE; + } else { + (*next) -= vq->vring.num; + } + } + + vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false); + return VIRTQUEUE_READ_DESC_MORE; +} + +static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, + unsigned int *in_bytes, + unsigned int *out_bytes, + unsigned max_in_bytes, + unsigned max_out_bytes) +{ + VirtIODevice *vdev = vq->vdev; + unsigned int max, idx; + unsigned int total_bufs, in_total, out_total; + MemoryRegionCache *desc_cache; + VRingMemoryRegionCaches *caches; + MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + int64_t len = 0; + VRingPackedDesc desc; + bool wrap_counter; + + rcu_read_lock(); + idx = vq->last_avail_idx; + wrap_counter = vq->last_avail_wrap_counter; + total_bufs = in_total = out_total = 0; + + max = vq->vring.num; + caches = vring_get_region_caches(vq); + + for (;;) { + unsigned int num_bufs = total_bufs; + unsigned int i = idx; + int rc; + + desc_cache = &caches->desc; + vring_packed_desc_read(vdev, &desc, desc_cache, idx, true); + if (!is_desc_avail(desc.flags, wrap_counter)) { + break; + } + + if (desc.flags & VRING_DESC_F_INDIRECT) { + if (desc.len % sizeof(VRingPackedDesc)) { + virtio_error(vdev, "Invalid size for indirect buffer table"); + goto err; + } + + /* If we've got too many, that implies a descriptor loop. */ + if (num_bufs >= max) { + virtio_error(vdev, "Looped descriptor"); + goto err; + } + + /* loop over the indirect descriptor table */ + len = address_space_cache_init(&indirect_desc_cache, + vdev->dma_as, + desc.addr, desc.len, false); + desc_cache = &indirect_desc_cache; + if (len < desc.len) { + virtio_error(vdev, "Cannot map indirect buffer"); + goto err; + } + + max = desc.len / sizeof(VRingPackedDesc); + num_bufs = i = 0; + vring_packed_desc_read(vdev, &desc, desc_cache, i, false); + } + + do { + /* If we've got too many, that implies a descriptor loop. */ + if (++num_bufs > max) { + virtio_error(vdev, "Looped descriptor"); + goto err; + } + + if (desc.flags & VRING_DESC_F_WRITE) { + in_total += desc.len; + } else { + out_total += desc.len; + } + if (in_total >= max_in_bytes && out_total >= max_out_bytes) { + goto done; + } + + rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, + &i, desc_cache == + &indirect_desc_cache); + } while (rc == VIRTQUEUE_READ_DESC_MORE); + + if (desc_cache == &indirect_desc_cache) { + address_space_cache_destroy(&indirect_desc_cache); + total_bufs++; + idx++; + } else { + idx += num_bufs - total_bufs; + total_bufs = num_bufs; + } + + if (idx >= vq->vring.num) { + idx -= vq->vring.num; + wrap_counter ^= 1; + } + } + + /* Record the index and wrap counter for a kick we want */ + vq->shadow_avail_idx = idx; + vq->shadow_avail_wrap_counter = wrap_counter; +done: + address_space_cache_destroy(&indirect_desc_cache); + if (in_bytes) { + *in_bytes = in_total; + } + if (out_bytes) { + *out_bytes = out_total; + } + rcu_read_unlock(); + return; + +err: + in_total = out_total = 0; + goto done; +} + +void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes, + unsigned max_in_bytes, unsigned max_out_bytes) +{ + uint16_t desc_size; + VRingMemoryRegionCaches *caches; + + if (unlikely(!vq->vring.desc)) { + goto err; + } + + caches = vring_get_region_caches(vq); + desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? + sizeof(VRingPackedDesc) : sizeof(VRingDesc); + if (caches->desc.len < vq->vring.num * desc_size) { + virtio_error(vq->vdev, "Cannot map descriptor ring"); + goto err; + } + + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes, + max_in_bytes, max_out_bytes); + } else { + virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes, + max_in_bytes, max_out_bytes); + } + + return; +err: + if (in_bytes) { + *in_bytes = 0; + } + if (out_bytes) { + *out_bytes = 0; + } +} + int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, unsigned int out_bytes) { @@ -864,7 +1268,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu return elem; } -void *virtqueue_pop(VirtQueue *vq, size_t sz) +static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) { unsigned int i, head, max; VRingMemoryRegionCaches *caches; @@ -879,9 +1283,6 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) VRingDesc desc; int rc; - if (unlikely(vdev->broken)) { - return NULL; - } rcu_read_lock(); if (virtio_queue_empty_rcu(vq)) { goto done; @@ -917,7 +1318,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) } desc_cache = &caches->desc; - vring_desc_read(vdev, &desc, desc_cache, i); + vring_split_desc_read(vdev, &desc, desc_cache, i); if (desc.flags & VRING_DESC_F_INDIRECT) { if (!desc.len || (desc.len % sizeof(VRingDesc))) { virtio_error(vdev, "Invalid size for indirect buffer table"); @@ -935,7 +1336,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) max = desc.len / sizeof(VRingDesc); i = 0; - vring_desc_read(vdev, &desc, desc_cache, i); + vring_split_desc_read(vdev, &desc, desc_cache, i); } /* Collect all the descriptors */ @@ -966,7 +1367,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) goto err_undo_map; } - rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i); + rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i); } while (rc == VIRTQUEUE_READ_DESC_MORE); if (rc == VIRTQUEUE_READ_DESC_ERROR) { @@ -976,6 +1377,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) /* Now copy what we have collected and mapped */ elem = virtqueue_alloc_element(sz, out_num, in_num); elem->index = head; + elem->ndescs = 1; for (i = 0; i < out_num; i++) { elem->out_addr[i] = addr[i]; elem->out_sg[i] = iov[i]; @@ -999,23 +1401,204 @@ err_undo_map: goto done; } -/* virtqueue_drop_all: - * @vq: The #VirtQueue - * Drops all queued buffers and indicates them to the guest - * as if they are done. Useful when buffers can not be - * processed but must be returned to the guest. - */ -unsigned int virtqueue_drop_all(VirtQueue *vq) +static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) +{ + unsigned int i, max; + VRingMemoryRegionCaches *caches; + MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + MemoryRegionCache *desc_cache; + int64_t len; + VirtIODevice *vdev = vq->vdev; + VirtQueueElement *elem = NULL; + unsigned out_num, in_num, elem_entries; + hwaddr addr[VIRTQUEUE_MAX_SIZE]; + struct iovec iov[VIRTQUEUE_MAX_SIZE]; + VRingPackedDesc desc; + uint16_t id; + int rc; + + rcu_read_lock(); + if (virtio_queue_packed_empty_rcu(vq)) { + goto done; + } + + /* When we start there are none of either input nor output. */ + out_num = in_num = elem_entries = 0; + + max = vq->vring.num; + + if (vq->inuse >= vq->vring.num) { + virtio_error(vdev, "Virtqueue size exceeded"); + goto done; + } + + i = vq->last_avail_idx; + + caches = vring_get_region_caches(vq); + if (caches->desc.len < max * sizeof(VRingDesc)) { + virtio_error(vdev, "Cannot map descriptor ring"); + goto done; + } + + desc_cache = &caches->desc; + vring_packed_desc_read(vdev, &desc, desc_cache, i, true); + id = desc.id; + if (desc.flags & VRING_DESC_F_INDIRECT) { + if (desc.len % sizeof(VRingPackedDesc)) { + virtio_error(vdev, "Invalid size for indirect buffer table"); + goto done; + } + + /* loop over the indirect descriptor table */ + len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, + desc.addr, desc.len, false); + desc_cache = &indirect_desc_cache; + if (len < desc.len) { + virtio_error(vdev, "Cannot map indirect buffer"); + goto done; + } + + max = desc.len / sizeof(VRingPackedDesc); + i = 0; + vring_packed_desc_read(vdev, &desc, desc_cache, i, false); + } + + /* Collect all the descriptors */ + do { + bool map_ok; + + if (desc.flags & VRING_DESC_F_WRITE) { + map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num, + iov + out_num, + VIRTQUEUE_MAX_SIZE - out_num, true, + desc.addr, desc.len); + } else { + if (in_num) { + virtio_error(vdev, "Incorrect order for descriptors"); + goto err_undo_map; + } + map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov, + VIRTQUEUE_MAX_SIZE, false, + desc.addr, desc.len); + } + if (!map_ok) { + goto err_undo_map; + } + + /* If we've got too many, that implies a descriptor loop. */ + if (++elem_entries > max) { + virtio_error(vdev, "Looped descriptor"); + goto err_undo_map; + } + + rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i, + desc_cache == + &indirect_desc_cache); + } while (rc == VIRTQUEUE_READ_DESC_MORE); + + /* Now copy what we have collected and mapped */ + elem = virtqueue_alloc_element(sz, out_num, in_num); + for (i = 0; i < out_num; i++) { + elem->out_addr[i] = addr[i]; + elem->out_sg[i] = iov[i]; + } + for (i = 0; i < in_num; i++) { + elem->in_addr[i] = addr[out_num + i]; + elem->in_sg[i] = iov[out_num + i]; + } + + elem->index = id; + elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries; + vq->last_avail_idx += elem->ndescs; + vq->inuse += elem->ndescs; + + if (vq->last_avail_idx >= vq->vring.num) { + vq->last_avail_idx -= vq->vring.num; + vq->last_avail_wrap_counter ^= 1; + } + + vq->shadow_avail_idx = vq->last_avail_idx; + vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter; + + trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); +done: + address_space_cache_destroy(&indirect_desc_cache); + rcu_read_unlock(); + + return elem; + +err_undo_map: + virtqueue_undo_map_desc(out_num, in_num, iov); + goto done; +} + +void *virtqueue_pop(VirtQueue *vq, size_t sz) +{ + if (unlikely(vq->vdev->broken)) { + return NULL; + } + + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + return virtqueue_packed_pop(vq, sz); + } else { + return virtqueue_split_pop(vq, sz); + } +} + +static unsigned int virtqueue_packed_drop_all(VirtQueue *vq) +{ + VRingMemoryRegionCaches *caches; + MemoryRegionCache *desc_cache; + unsigned int dropped = 0; + VirtQueueElement elem = {}; + VirtIODevice *vdev = vq->vdev; + VRingPackedDesc desc; + + caches = vring_get_region_caches(vq); + desc_cache = &caches->desc; + + virtio_queue_set_notification(vq, 0); + + while (vq->inuse < vq->vring.num) { + unsigned int idx = vq->last_avail_idx; + /* + * works similar to virtqueue_pop but does not map buffers + * and does not allocate any memory. + */ + vring_packed_desc_read(vdev, &desc, desc_cache, + vq->last_avail_idx , true); + if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) { + break; + } + elem.index = desc.id; + elem.ndescs = 1; + while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache, + vq->vring.num, &idx, false)) { + ++elem.ndescs; + } + /* + * immediately push the element, nothing to unmap + * as both in_num and out_num are set to 0. + */ + virtqueue_push(vq, &elem, 0); + dropped++; + vq->last_avail_idx += elem.ndescs; + if (vq->last_avail_idx >= vq->vring.num) { + vq->last_avail_idx -= vq->vring.num; + vq->last_avail_wrap_counter ^= 1; + } + } + + return dropped; +} + +static unsigned int virtqueue_split_drop_all(VirtQueue *vq) { unsigned int dropped = 0; VirtQueueElement elem = {}; VirtIODevice *vdev = vq->vdev; bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); - if (unlikely(vdev->broken)) { - return 0; - } - while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) { /* works similar to virtqueue_pop but does not map buffers * and does not allocate any memory */ @@ -1037,6 +1620,27 @@ unsigned int virtqueue_drop_all(VirtQueue *vq) return dropped; } +/* virtqueue_drop_all: + * @vq: The #VirtQueue + * Drops all queued buffers and indicates them to the guest + * as if they are done. Useful when buffers can not be + * processed but must be returned to the guest. + */ +unsigned int virtqueue_drop_all(VirtQueue *vq) +{ + struct VirtIODevice *vdev = vq->vdev; + + if (unlikely(vdev->broken)) { + return 0; + } + + if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + return virtqueue_packed_drop_all(vq); + } else { + return virtqueue_split_drop_all(vq); + } +} + /* Reading and writing a structure directly to QEMUFile is *awful*, but * it is what QEMU has always done by mistake. We can change it sooner * or later by bumping the version number of the affected vm states. @@ -1093,11 +1697,16 @@ void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz) elem->out_sg[i].iov_len = data.out_sg[i].iov_len; } + if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + qemu_get_be32s(f, &elem->ndescs); + } + virtqueue_map(vdev, elem); return elem; } -void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem) +void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, + VirtQueueElement *elem) { VirtQueueElementOld data; int i; @@ -1125,6 +1734,11 @@ void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem) /* Do not save iov_base as above. */ data.out_sg[i].iov_len = elem->out_sg[i].iov_len; } + + if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + qemu_put_be32s(f, &elem->ndescs); + } + qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); } @@ -1249,6 +1863,9 @@ void virtio_reset(void *opaque) vdev->vq[i].last_avail_idx = 0; vdev->vq[i].shadow_avail_idx = 0; vdev->vq[i].used_idx = 0; + vdev->vq[i].last_avail_wrap_counter = true; + vdev->vq[i].shadow_avail_wrap_counter = true; + vdev->vq[i].used_wrap_counter = true; virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); vdev->vq[i].signalled_used = 0; vdev->vq[i].signalled_used_valid = false; @@ -1639,6 +2256,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; vdev->vq[i].handle_output = handle_output; vdev->vq[i].handle_aio_output = NULL; + vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) * + queue_size); return &vdev->vq[i]; } @@ -1653,6 +2272,7 @@ void virtio_del_queue(VirtIODevice *vdev, int n) vdev->vq[n].vring.num_default = 0; vdev->vq[n].handle_output = NULL; vdev->vq[n].handle_aio_output = NULL; + g_free(vdev->vq[n].used_elems); } static void virtio_set_isr(VirtIODevice *vdev, int value) @@ -1780,6 +2400,13 @@ static bool virtio_virtqueue_needed(void *opaque) return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); } +static bool virtio_packed_virtqueue_needed(void *opaque) +{ + VirtIODevice *vdev = opaque; + + return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED); +} + static bool virtio_ringsize_needed(void *opaque) { VirtIODevice *vdev = opaque; @@ -1828,6 +2455,20 @@ static const VMStateDescription vmstate_virtqueue = { } }; +static const VMStateDescription vmstate_packed_virtqueue = { + .name = "packed_virtqueue_state", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT16(last_avail_idx, struct VirtQueue), + VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue), + VMSTATE_UINT16(used_idx, struct VirtQueue), + VMSTATE_BOOL(used_wrap_counter, struct VirtQueue), + VMSTATE_UINT32(inuse, struct VirtQueue), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio_virtqueues = { .name = "virtio/virtqueues", .version_id = 1, @@ -1840,6 +2481,18 @@ static const VMStateDescription vmstate_virtio_virtqueues = { } }; +static const VMStateDescription vmstate_virtio_packed_virtqueues = { + .name = "virtio/packed_virtqueues", + .version_id = 1, + .minimum_version_id = 1, + .needed = &virtio_packed_virtqueue_needed, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, + VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_ringsize = { .name = "ringsize_state", .version_id = 1, @@ -1972,6 +2625,7 @@ static const VMStateDescription vmstate_virtio = { &vmstate_virtio_broken, &vmstate_virtio_extra_state, &vmstate_virtio_started, + &vmstate_virtio_packed_virtqueues, NULL } }; @@ -2271,6 +2925,13 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) virtio_queue_update_rings(vdev, i); } + if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx; + vdev->vq[i].shadow_avail_wrap_counter = + vdev->vq[i].last_avail_wrap_counter; + continue; + } + nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; /* Check it isn't doing strange things with descriptor numbers. */ if (nheads > vdev->vq[i].vring.num) { @@ -2414,6 +3075,10 @@ hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) { int s; + if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + return sizeof(struct VRingPackedDescEvent); + } + s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingAvail, ring) + sizeof(uint16_t) * vdev->vq[n].vring.num + s; @@ -2423,23 +3088,83 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) { int s; + if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + return sizeof(struct VRingPackedDescEvent); + } + s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingUsed, ring) + sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; } -uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) +static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev, + int n) +{ + unsigned int avail, used; + + avail = vdev->vq[n].last_avail_idx; + avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15; + + used = vdev->vq[n].used_idx; + used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15; + + return avail | used << 16; +} + +static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev, + int n) { return vdev->vq[n].last_avail_idx; } -void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) +unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) { - vdev->vq[n].last_avail_idx = idx; - vdev->vq[n].shadow_avail_idx = idx; + if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + return virtio_queue_packed_get_last_avail_idx(vdev, n); + } else { + return virtio_queue_split_get_last_avail_idx(vdev, n); + } } -void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) +static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev, + int n, unsigned int idx) +{ + struct VirtQueue *vq = &vdev->vq[n]; + + vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff; + vq->last_avail_wrap_counter = + vq->shadow_avail_wrap_counter = !!(idx & 0x8000); + idx >>= 16; + vq->used_idx = idx & 0x7ffff; + vq->used_wrap_counter = !!(idx & 0x8000); +} + +static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev, + int n, unsigned int idx) +{ + vdev->vq[n].last_avail_idx = idx; + vdev->vq[n].shadow_avail_idx = idx; +} + +void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, + unsigned int idx) +{ + if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + virtio_queue_packed_set_last_avail_idx(vdev, n, idx); + } else { + virtio_queue_split_set_last_avail_idx(vdev, n, idx); + } +} + +static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev, + int n) +{ + /* We don't have a reference like avail idx in shared memory */ + return; +} + +static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev, + int n) { rcu_read_lock(); if (vdev->vq[n].vring.desc) { @@ -2449,7 +3174,22 @@ void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) rcu_read_unlock(); } -void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) +void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) +{ + if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + virtio_queue_packed_restore_last_avail_idx(vdev, n); + } else { + virtio_queue_split_restore_last_avail_idx(vdev, n); + } +} + +static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n) +{ + /* used idx was updated through set_last_avail_idx() */ + return; +} + +static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n) { rcu_read_lock(); if (vdev->vq[n].vring.desc) { @@ -2458,6 +3198,15 @@ void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) rcu_read_unlock(); } +void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) +{ + if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + return virtio_queue_packed_update_used_idx(vdev, n); + } else { + return virtio_split_packed_update_used_idx(vdev, n); + } +} + void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) { vdev->vq[n].signalled_used_valid = false; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 48e8d04ff6..d123d5b181 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -59,6 +59,8 @@ typedef struct VirtQueue VirtQueue; typedef struct VirtQueueElement { unsigned int index; + unsigned int len; + unsigned int ndescs; unsigned int out_num; unsigned int in_num; hwaddr *in_addr; @@ -196,7 +198,8 @@ void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem); void *virtqueue_pop(VirtQueue *vq, size_t sz); unsigned int virtqueue_drop_all(VirtQueue *vq); void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz); -void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem); +void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, + VirtQueueElement *elem); int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, unsigned int out_bytes); void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, @@ -291,8 +294,9 @@ hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n); -uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n); -void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx); +unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n); +void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, + unsigned int idx); void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n); void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n); void virtio_queue_update_used_idx(VirtIODevice *vdev, int n); From 683f7665679c1e9ef64183c3636b8e13ce012cdc Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 25 Oct 2019 10:35:25 +0200 Subject: [PATCH 06/25] virtio: event suppression support for packed ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements event suppression through device/driver area. Please refer virtio specification for more information. Signed-off-by: Wei Xu Signed-off-by: Jason Wang Signed-off-by: Eugenio Pérez Message-Id: <20191025083527.30803-7-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 142 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 133 insertions(+), 9 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index a694b4ab50..828c27de1f 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -240,6 +240,44 @@ static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc, virtio_tswap16s(vdev, &desc->next); } +static void vring_packed_event_read(VirtIODevice *vdev, + MemoryRegionCache *cache, + VRingPackedDescEvent *e) +{ + hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap); + hwaddr off_flags = offsetof(VRingPackedDescEvent, flags); + + address_space_read_cached(cache, off_flags, &e->flags, + sizeof(e->flags)); + /* Make sure flags is seen before off_wrap */ + smp_rmb(); + address_space_read_cached(cache, off_off, &e->off_wrap, + sizeof(e->off_wrap)); + virtio_tswap16s(vdev, &e->off_wrap); + virtio_tswap16s(vdev, &e->flags); +} + +static void vring_packed_off_wrap_write(VirtIODevice *vdev, + MemoryRegionCache *cache, + uint16_t off_wrap) +{ + hwaddr off = offsetof(VRingPackedDescEvent, off_wrap); + + virtio_tswap16s(vdev, &off_wrap); + address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap)); + address_space_cache_invalidate(cache, off, sizeof(off_wrap)); +} + +static void vring_packed_flags_write(VirtIODevice *vdev, + MemoryRegionCache *cache, uint16_t flags) +{ + hwaddr off = offsetof(VRingPackedDescEvent, flags); + + virtio_tswap16s(vdev, &flags); + address_space_write_cached(cache, off, &flags, sizeof(flags)); + address_space_cache_invalidate(cache, off, sizeof(flags)); +} + /* Called within rcu_read_lock(). */ static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) { @@ -347,14 +385,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) address_space_cache_invalidate(&caches->used, pa, sizeof(val)); } -void virtio_queue_set_notification(VirtQueue *vq, int enable) +static void virtio_queue_split_set_notification(VirtQueue *vq, int enable) { - vq->notification = enable; - - if (!vq->vring.desc) { - return; - } - rcu_read_lock(); if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { vring_set_avail_event(vq, vring_avail_idx(vq)); @@ -370,6 +402,51 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable) rcu_read_unlock(); } +static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) +{ + uint16_t off_wrap; + VRingPackedDescEvent e; + VRingMemoryRegionCaches *caches; + + rcu_read_lock(); + caches = vring_get_region_caches(vq); + vring_packed_event_read(vq->vdev, &caches->used, &e); + + if (!enable) { + e.flags = VRING_PACKED_EVENT_FLAG_DISABLE; + } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { + off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15; + vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap); + /* Make sure off_wrap is wrote before flags */ + smp_wmb(); + e.flags = VRING_PACKED_EVENT_FLAG_DESC; + } else { + e.flags = VRING_PACKED_EVENT_FLAG_ENABLE; + } + + vring_packed_flags_write(vq->vdev, &caches->used, e.flags); + if (enable) { + /* Expose avail event/used flags before caller checks the avail idx. */ + smp_mb(); + } + rcu_read_unlock(); +} + +void virtio_queue_set_notification(VirtQueue *vq, int enable) +{ + vq->notification = enable; + + if (!vq->vring.desc) { + return; + } + + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + virtio_queue_packed_set_notification(vq, enable); + } else { + virtio_queue_split_set_notification(vq, enable); + } +} + int virtio_queue_ready(VirtQueue *vq) { return vq->vring.avail != 0; @@ -2287,8 +2364,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value) } } -/* Called within rcu_read_lock(). */ -static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) { uint16_t old, new; bool v; @@ -2311,6 +2387,54 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) return !v || vring_need_event(vring_get_used_event(vq), new, old); } +static bool vring_packed_need_event(VirtQueue *vq, bool wrap, + uint16_t off_wrap, uint16_t new, + uint16_t old) +{ + int off = off_wrap & ~(1 << 15); + + if (wrap != off_wrap >> 15) { + off -= vq->vring.num; + } + + return vring_need_event(off, new, old); +} + +static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ + VRingPackedDescEvent e; + uint16_t old, new; + bool v; + VRingMemoryRegionCaches *caches; + + caches = vring_get_region_caches(vq); + vring_packed_event_read(vdev, &caches->avail, &e); + + old = vq->signalled_used; + new = vq->signalled_used = vq->used_idx; + v = vq->signalled_used_valid; + vq->signalled_used_valid = true; + + if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) { + return false; + } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) { + return true; + } + + return !v || vring_packed_need_event(vq, vq->used_wrap_counter, + e.off_wrap, new, old); +} + +/* Called within rcu_read_lock(). */ +static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ + if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + return virtio_packed_should_notify(vdev, vq); + } else { + return virtio_split_should_notify(vdev, vq); + } +} + void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) { bool should_notify; From dfea7930456656a9e8be21300635dfdda2ffac3f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 25 Oct 2019 10:35:26 +0200 Subject: [PATCH 07/25] vhost_net: enable packed ring support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jason Wang Signed-off-by: Eugenio Pérez Reviewed-by: Jens Freimann Message-Id: <20191025083527.30803-8-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index e975700f95..6b82803fa7 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -49,6 +49,7 @@ static const int kernel_feature_bits[] = { VIRTIO_F_VERSION_1, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_F_RING_PACKED, VHOST_INVALID_FEATURE_BIT }; @@ -74,6 +75,7 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_F_RING_PACKED, /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, From 74b3e46630446568aecb0be1c77c4875d7a52f6d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 25 Oct 2019 10:35:27 +0200 Subject: [PATCH 08/25] virtio: add property to enable packed virtqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jason Wang Signed-off-by: Eugenio Pérez Reviewed-by: Jens Freimann Message-Id: <20191025083527.30803-9-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/virtio.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index d123d5b181..40ddeafadb 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -285,7 +285,9 @@ typedef struct VirtIORNGConf VirtIORNGConf; DEFINE_PROP_BIT64("any_layout", _state, _field, \ VIRTIO_F_ANY_LAYOUT, true), \ DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ - VIRTIO_F_IOMMU_PLATFORM, false) + VIRTIO_F_IOMMU_PLATFORM, false), \ + DEFINE_PROP_BIT64("packed", _state, _field, \ + VIRTIO_F_RING_PACKED, false) hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); bool virtio_queue_enabled(VirtIODevice *vdev, int n); From c42bf5f21043c86d50773220998b54ded091f779 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:10 +0100 Subject: [PATCH 09/25] tests/virtio-blk-test: read config space after feature negotiation The VIRTIO Configuration Space cannot be accessed before device feature bits have been read because a driver doesn't know the endianness until it has checked VIRTIO_F_VERSION_1. Fix this problem in preparation for VIRTIO 1.0 support. Signed-off-by: Stefan Hajnoczi Reviewed-by: Thomas Huth Message-Id: <20191023100425.12168-2-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/virtio-blk-test.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index ed13167392..f6674fb233 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -125,10 +125,6 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, char *data; QTestState *qts = global_qtest; - capacity = qvirtio_config_readq(dev, 0); - - g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); - features = qvirtio_get_features(dev); features = features & ~(QVIRTIO_F_BAD_FEATURE | (1u << VIRTIO_RING_F_INDIRECT_DESC) | @@ -136,6 +132,9 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, (1u << VIRTIO_BLK_F_SCSI)); qvirtio_set_features(dev, features); + capacity = qvirtio_config_readq(dev, 0); + g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); + qvirtio_set_driver_ok(dev); /* Write and read with 3 descriptor layout */ @@ -359,9 +358,6 @@ static void indirect(void *obj, void *u_data, QGuestAllocator *t_alloc) char *data; QTestState *qts = global_qtest; - capacity = qvirtio_config_readq(dev, 0); - g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); - features = qvirtio_get_features(dev); g_assert_cmphex(features & (1u << VIRTIO_RING_F_INDIRECT_DESC), !=, 0); features = features & ~(QVIRTIO_F_BAD_FEATURE | @@ -369,6 +365,9 @@ static void indirect(void *obj, void *u_data, QGuestAllocator *t_alloc) (1u << VIRTIO_BLK_F_SCSI)); qvirtio_set_features(dev, features); + capacity = qvirtio_config_readq(dev, 0); + g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); + vq = qvirtqueue_setup(dev, t_alloc, 0); qvirtio_set_driver_ok(dev); @@ -434,8 +433,16 @@ static void config(void *obj, void *data, QGuestAllocator *t_alloc) QVirtioBlk *blk_if = obj; QVirtioDevice *dev = blk_if->vdev; int n_size = TEST_IMAGE_SIZE / 2; + uint64_t features; uint64_t capacity; + features = qvirtio_get_features(dev); + features = features & ~(QVIRTIO_F_BAD_FEATURE | + (1u << VIRTIO_RING_F_INDIRECT_DESC) | + (1u << VIRTIO_RING_F_EVENT_IDX) | + (1u << VIRTIO_BLK_F_SCSI)); + qvirtio_set_features(dev, features); + capacity = qvirtio_config_readq(dev, 0); g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); @@ -475,9 +482,6 @@ static void msix(void *obj, void *u_data, QGuestAllocator *t_alloc) qpci_msix_enable(pdev->pdev); qvirtio_pci_set_msix_configuration_vector(pdev, t_alloc, 0); - capacity = qvirtio_config_readq(dev, 0); - g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); - features = qvirtio_get_features(dev); features = features & ~(QVIRTIO_F_BAD_FEATURE | (1u << VIRTIO_RING_F_INDIRECT_DESC) | @@ -485,6 +489,9 @@ static void msix(void *obj, void *u_data, QGuestAllocator *t_alloc) (1u << VIRTIO_BLK_F_SCSI)); qvirtio_set_features(dev, features); + capacity = qvirtio_config_readq(dev, 0); + g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); + vq = qvirtqueue_setup(dev, t_alloc, 0); qvirtqueue_pci_msix_setup(pdev, (QVirtQueuePCI *)vq, t_alloc, 1); @@ -584,9 +591,6 @@ static void idx(void *obj, void *u_data, QGuestAllocator *t_alloc) qpci_msix_enable(pdev->pdev); qvirtio_pci_set_msix_configuration_vector(pdev, t_alloc, 0); - capacity = qvirtio_config_readq(dev, 0); - g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); - features = qvirtio_get_features(dev); features = features & ~(QVIRTIO_F_BAD_FEATURE | (1u << VIRTIO_RING_F_INDIRECT_DESC) | @@ -594,6 +598,9 @@ static void idx(void *obj, void *u_data, QGuestAllocator *t_alloc) (1u << VIRTIO_BLK_F_SCSI)); qvirtio_set_features(dev, features); + capacity = qvirtio_config_readq(dev, 0); + g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); + vq = qvirtqueue_setup(dev, t_alloc, 0); qvirtqueue_pci_msix_setup(pdev, (QVirtQueuePCI *)vq, t_alloc, 1); From 60ce3a40fc18423d6c3ba330e60ca03fc74f9d21 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:11 +0100 Subject: [PATCH 10/25] libqos: read QVIRTIO_MMIO_VERSION register There was no real virtio-mmio ABI change between Legacy and VIRTIO 1.0 except that the Version field was incremented from 1 to 2. However, QEMU does not allow Legacy drivers to perform VIRTIO 1.0 operations like accessing 64-bit feature bits. Since we will introduce 64-bit feature bit support we need a way to differentiate between virtio-mmio Version 1 and 2 to avoid upsetting QEMU when we operate in Legacy mode. Stash away the Version field so later patches can change behavior depending on the version. Signed-off-by: Stefan Hajnoczi Reviewed-by: Thomas Huth Message-Id: <20191023100425.12168-3-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/libqos/virtio-mmio.c | 3 +++ tests/libqos/virtio-mmio.h | 1 + 2 files changed, 4 insertions(+) diff --git a/tests/libqos/virtio-mmio.c b/tests/libqos/virtio-mmio.c index d0047876a8..7154b03c1d 100644 --- a/tests/libqos/virtio-mmio.c +++ b/tests/libqos/virtio-mmio.c @@ -223,6 +223,9 @@ void qvirtio_mmio_init_device(QVirtioMMIODevice *dev, QTestState *qts, magic = qtest_readl(qts, addr + QVIRTIO_MMIO_MAGIC_VALUE); g_assert(magic == ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)); + dev->version = qtest_readl(qts, addr + QVIRTIO_MMIO_VERSION); + g_assert(dev->version == 1 || dev->version == 2); + dev->qts = qts; dev->addr = addr; dev->page_size = page_size; diff --git a/tests/libqos/virtio-mmio.h b/tests/libqos/virtio-mmio.h index 17a17141c3..0e45778b07 100644 --- a/tests/libqos/virtio-mmio.h +++ b/tests/libqos/virtio-mmio.h @@ -40,6 +40,7 @@ typedef struct QVirtioMMIODevice { uint64_t addr; uint32_t page_size; uint32_t features; /* As it cannot be read later, save it */ + uint32_t version; } QVirtioMMIODevice; extern const QVirtioBus qvirtio_mmio; From a9340358f4d5d0e8e0c7a3a23cb4e5b68334974c Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:12 +0100 Subject: [PATCH 11/25] libqos: extend feature bits to 64-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In VIRTIO 1.0 feature bits changed from 32-bit to 64-bit. (In fact, the transports allow even more feature bits but nothing uses more than 64 bits today.) Add 64-bit feature bit support to virtio-mmio and virtio-pci. This will be necessary for VIRTIO 1.0 support. Signed-off-by: Stefan Hajnoczi Reviewed-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20191023100425.12168-4-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/libqos/virtio-mmio.c | 28 ++++++++++++++++++++++------ tests/libqos/virtio-net.c | 6 +++--- tests/libqos/virtio-pci.c | 12 ++++++------ tests/libqos/virtio.c | 4 ++-- tests/libqos/virtio.h | 12 ++++++------ tests/virtio-blk-test.c | 8 ++++---- 6 files changed, 43 insertions(+), 27 deletions(-) diff --git a/tests/libqos/virtio-mmio.c b/tests/libqos/virtio-mmio.c index 7154b03c1d..78066e6e05 100644 --- a/tests/libqos/virtio-mmio.c +++ b/tests/libqos/virtio-mmio.c @@ -40,22 +40,38 @@ static uint64_t qvirtio_mmio_config_readq(QVirtioDevice *d, uint64_t off) return qtest_readq(dev->qts, dev->addr + QVIRTIO_MMIO_DEVICE_SPECIFIC + off); } -static uint32_t qvirtio_mmio_get_features(QVirtioDevice *d) +static uint64_t qvirtio_mmio_get_features(QVirtioDevice *d) { QVirtioMMIODevice *dev = container_of(d, QVirtioMMIODevice, vdev); + uint64_t lo; + uint64_t hi = 0; + qtest_writel(dev->qts, dev->addr + QVIRTIO_MMIO_HOST_FEATURES_SEL, 0); - return qtest_readl(dev->qts, dev->addr + QVIRTIO_MMIO_HOST_FEATURES); + lo = qtest_readl(dev->qts, dev->addr + QVIRTIO_MMIO_HOST_FEATURES); + + if (dev->version >= 2) { + qtest_writel(dev->qts, dev->addr + QVIRTIO_MMIO_HOST_FEATURES_SEL, 1); + hi = qtest_readl(dev->qts, dev->addr + QVIRTIO_MMIO_HOST_FEATURES); + } + + return (hi << 32) | lo; } -static void qvirtio_mmio_set_features(QVirtioDevice *d, uint32_t features) +static void qvirtio_mmio_set_features(QVirtioDevice *d, uint64_t features) { QVirtioMMIODevice *dev = container_of(d, QVirtioMMIODevice, vdev); dev->features = features; qtest_writel(dev->qts, dev->addr + QVIRTIO_MMIO_GUEST_FEATURES_SEL, 0); qtest_writel(dev->qts, dev->addr + QVIRTIO_MMIO_GUEST_FEATURES, features); + + if (dev->version >= 2) { + qtest_writel(dev->qts, dev->addr + QVIRTIO_MMIO_GUEST_FEATURES_SEL, 1); + qtest_writel(dev->qts, dev->addr + QVIRTIO_MMIO_GUEST_FEATURES, + features >> 32); + } } -static uint32_t qvirtio_mmio_get_guest_features(QVirtioDevice *d) +static uint64_t qvirtio_mmio_get_guest_features(QVirtioDevice *d) { QVirtioMMIODevice *dev = container_of(d, QVirtioMMIODevice, vdev); return dev->features; @@ -149,8 +165,8 @@ static QVirtQueue *qvirtio_mmio_virtqueue_setup(QVirtioDevice *d, vq->free_head = 0; vq->num_free = vq->size; vq->align = dev->page_size; - vq->indirect = (dev->features & (1u << VIRTIO_RING_F_INDIRECT_DESC)) != 0; - vq->event = (dev->features & (1u << VIRTIO_RING_F_EVENT_IDX)) != 0; + vq->indirect = dev->features & (1ull << VIRTIO_RING_F_INDIRECT_DESC); + vq->event = dev->features & (1ull << VIRTIO_RING_F_EVENT_IDX); qtest_writel(dev->qts, dev->addr + QVIRTIO_MMIO_QUEUE_NUM, vq->size); diff --git a/tests/libqos/virtio-net.c b/tests/libqos/virtio-net.c index 6567beb553..710d440c3d 100644 --- a/tests/libqos/virtio-net.c +++ b/tests/libqos/virtio-net.c @@ -44,11 +44,11 @@ static void virtio_net_setup(QVirtioNet *interface) features = qvirtio_get_features(vdev); features &= ~(QVIRTIO_F_BAD_FEATURE | - (1u << VIRTIO_RING_F_INDIRECT_DESC) | - (1u << VIRTIO_RING_F_EVENT_IDX)); + (1ull << VIRTIO_RING_F_INDIRECT_DESC) | + (1ull << VIRTIO_RING_F_EVENT_IDX)); qvirtio_set_features(vdev, features); - if (features & (1u << VIRTIO_NET_F_MQ)) { + if (features & (1ull << VIRTIO_NET_F_MQ)) { interface->n_queues = qvirtio_config_readw(vdev, 8) * 2; } else { interface->n_queues = 2; diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index 50499e75ef..1b6b760fc6 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -96,19 +96,19 @@ static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, uint64_t off) return val; } -static uint32_t qvirtio_pci_get_features(QVirtioDevice *d) +static uint64_t qvirtio_pci_get_features(QVirtioDevice *d) { QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); return qpci_io_readl(dev->pdev, dev->bar, VIRTIO_PCI_HOST_FEATURES); } -static void qvirtio_pci_set_features(QVirtioDevice *d, uint32_t features) +static void qvirtio_pci_set_features(QVirtioDevice *d, uint64_t features) { QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); qpci_io_writel(dev->pdev, dev->bar, VIRTIO_PCI_GUEST_FEATURES, features); } -static uint32_t qvirtio_pci_get_guest_features(QVirtioDevice *d) +static uint64_t qvirtio_pci_get_guest_features(QVirtioDevice *d) { QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); return qpci_io_readl(dev->pdev, dev->bar, VIRTIO_PCI_GUEST_FEATURES); @@ -208,7 +208,7 @@ static void qvirtio_pci_set_queue_address(QVirtioDevice *d, uint32_t pfn) static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d, QGuestAllocator *alloc, uint16_t index) { - uint32_t feat; + uint64_t feat; uint64_t addr; QVirtQueuePCI *vqpci; QVirtioPCIDevice *qvpcidev = container_of(d, QVirtioPCIDevice, vdev); @@ -222,8 +222,8 @@ static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d, vqpci->vq.free_head = 0; vqpci->vq.num_free = vqpci->vq.size; vqpci->vq.align = VIRTIO_PCI_VRING_ALIGN; - vqpci->vq.indirect = (feat & (1u << VIRTIO_RING_F_INDIRECT_DESC)) != 0; - vqpci->vq.event = (feat & (1u << VIRTIO_RING_F_EVENT_IDX)) != 0; + vqpci->vq.indirect = feat & (1ull << VIRTIO_RING_F_INDIRECT_DESC); + vqpci->vq.event = feat & (1ull << VIRTIO_RING_F_EVENT_IDX); vqpci->msix_entry = -1; vqpci->msix_addr = 0; diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c index 0ae9956fc8..4f7e6bb8a1 100644 --- a/tests/libqos/virtio.c +++ b/tests/libqos/virtio.c @@ -33,12 +33,12 @@ uint64_t qvirtio_config_readq(QVirtioDevice *d, uint64_t addr) return d->bus->config_readq(d, addr); } -uint32_t qvirtio_get_features(QVirtioDevice *d) +uint64_t qvirtio_get_features(QVirtioDevice *d) { return d->bus->get_features(d); } -void qvirtio_set_features(QVirtioDevice *d, uint32_t features) +void qvirtio_set_features(QVirtioDevice *d, uint64_t features) { d->features = features; d->bus->set_features(d, features); diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h index 2cb2448f46..a5c99fb3c9 100644 --- a/tests/libqos/virtio.h +++ b/tests/libqos/virtio.h @@ -13,7 +13,7 @@ #include "libqos/malloc.h" #include "standard-headers/linux/virtio_ring.h" -#define QVIRTIO_F_BAD_FEATURE 0x40000000 +#define QVIRTIO_F_BAD_FEATURE 0x40000000ull typedef struct QVirtioBus QVirtioBus; @@ -52,13 +52,13 @@ struct QVirtioBus { uint64_t (*config_readq)(QVirtioDevice *d, uint64_t addr); /* Get features of the device */ - uint32_t (*get_features)(QVirtioDevice *d); + uint64_t (*get_features)(QVirtioDevice *d); /* Set features of the device */ - void (*set_features)(QVirtioDevice *d, uint32_t features); + void (*set_features)(QVirtioDevice *d, uint64_t features); /* Get features of the guest */ - uint32_t (*get_guest_features)(QVirtioDevice *d); + uint64_t (*get_guest_features)(QVirtioDevice *d); /* Get status of the device */ uint8_t (*get_status)(QVirtioDevice *d); @@ -103,8 +103,8 @@ uint8_t qvirtio_config_readb(QVirtioDevice *d, uint64_t addr); uint16_t qvirtio_config_readw(QVirtioDevice *d, uint64_t addr); uint32_t qvirtio_config_readl(QVirtioDevice *d, uint64_t addr); uint64_t qvirtio_config_readq(QVirtioDevice *d, uint64_t addr); -uint32_t qvirtio_get_features(QVirtioDevice *d); -void qvirtio_set_features(QVirtioDevice *d, uint32_t features); +uint64_t qvirtio_get_features(QVirtioDevice *d); +void qvirtio_set_features(QVirtioDevice *d, uint64_t features); bool qvirtio_is_big_endian(QVirtioDevice *d); void qvirtio_reset(QVirtioDevice *d); diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index f6674fb233..31680cc159 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -119,7 +119,7 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, QVirtioBlkReq req; uint64_t req_addr; uint64_t capacity; - uint32_t features; + uint64_t features; uint32_t free_head; uint8_t status; char *data; @@ -352,7 +352,7 @@ static void indirect(void *obj, void *u_data, QGuestAllocator *t_alloc) QVRingIndirectDesc *indirect; uint64_t req_addr; uint64_t capacity; - uint32_t features; + uint64_t features; uint32_t free_head; uint8_t status; char *data; @@ -467,7 +467,7 @@ static void msix(void *obj, void *u_data, QGuestAllocator *t_alloc) int n_size = TEST_IMAGE_SIZE / 2; uint64_t req_addr; uint64_t capacity; - uint32_t features; + uint64_t features; uint32_t free_head; uint8_t status; char *data; @@ -574,7 +574,7 @@ static void idx(void *obj, void *u_data, QGuestAllocator *t_alloc) QVirtioBlkReq req; uint64_t req_addr; uint64_t capacity; - uint32_t features; + uint64_t features; uint32_t free_head; uint32_t write_head; uint32_t desc_idx; From e73255be96bf435fe6bf9a5c196dcd7c76adb0da Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:13 +0100 Subject: [PATCH 12/25] virtio-scsi-test: add missing feature negotiation VIRTIO Device Initialization requires feature negotiation. Currently virtio-scsi-test.c is non-compliant. libqos tests acknowledge all feature bits advertised by the device, except VIRTIO_F_BAD_FEATURE (which devices use to detect broken drivers!) and VIRTIO_RING_F_EVENT_IDX (which is not implemented in libqos and accepting it would break notifications). Signed-off-by: Stefan Hajnoczi Message-Id: <20191023100425.12168-5-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Thomas Huth" --- tests/virtio-scsi-test.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c index 7c8f9b27f8..0415e75876 100644 --- a/tests/virtio-scsi-test.c +++ b/tests/virtio-scsi-test.c @@ -123,10 +123,16 @@ static QVirtioSCSIQueues *qvirtio_scsi_init(QVirtioDevice *dev) QVirtioSCSIQueues *vs; const uint8_t test_unit_ready_cdb[VIRTIO_SCSI_CDB_SIZE] = {}; struct virtio_scsi_cmd_resp resp; + uint64_t features; int i; vs = g_new0(QVirtioSCSIQueues, 1); vs->dev = dev; + + features = qvirtio_get_features(dev); + features &= ~(QVIRTIO_F_BAD_FEATURE | (1ull << VIRTIO_RING_F_EVENT_IDX)); + qvirtio_set_features(dev, features); + vs->num_queues = qvirtio_config_readl(dev, 0); g_assert_cmpint(vs->num_queues, <, MAX_NUM_QUEUES); @@ -135,6 +141,8 @@ static QVirtioSCSIQueues *qvirtio_scsi_init(QVirtioDevice *dev) vs->vq[i] = qvirtqueue_setup(dev, alloc, i); } + qvirtio_set_driver_ok(dev); + /* Clear the POWER ON OCCURRED unit attention */ g_assert_cmpint(virtio_scsi_do_command(vs, test_unit_ready_cdb, NULL, 0, NULL, 0, &resp), From 36d27d2120979db239230bb440c03d41dbe75165 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:14 +0100 Subject: [PATCH 13/25] tests/virtio-blk-test: set up virtqueue after feature negotiation VIRTIO Device Initialization requires that feature negotiation has completed before virtqueues are set up. This makes sense because the driver must know whether it is operating in Legacy or VIRTIO 1.0 mode before it can access vring fields with the correct endianness. Signed-off-by: Stefan Hajnoczi Reviewed-by: Thomas Huth Message-Id: <20191023100425.12168-6-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/virtio-blk-test.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 31680cc159..fe0dc4a896 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -113,8 +113,8 @@ static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioDevice *d, return addr; } -static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, - QVirtQueue *vq) +/* Returns the request virtqueue so the caller can perform further tests */ +static QVirtQueue *test_basic(QVirtioDevice *dev, QGuestAllocator *alloc) { QVirtioBlkReq req; uint64_t req_addr; @@ -124,6 +124,7 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, uint8_t status; char *data; QTestState *qts = global_qtest; + QVirtQueue *vq; features = qvirtio_get_features(dev); features = features & ~(QVIRTIO_F_BAD_FEATURE | @@ -135,6 +136,8 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, capacity = qvirtio_config_readq(dev, 0); g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); + vq = qvirtqueue_setup(dev, alloc, 0); + qvirtio_set_driver_ok(dev); /* Write and read with 3 descriptor layout */ @@ -331,14 +334,16 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, guest_free(alloc, req_addr); } + + return vq; } static void basic(void *obj, void *data, QGuestAllocator *t_alloc) { QVirtioBlk *blk_if = obj; QVirtQueue *vq; - vq = qvirtqueue_setup(blk_if->vdev, t_alloc, 0); - test_basic(blk_if->vdev, t_alloc, vq); + + vq = test_basic(blk_if->vdev, t_alloc); qvirtqueue_cleanup(blk_if->vdev->bus, vq, t_alloc); } @@ -746,9 +751,7 @@ static void resize(void *obj, void *data, QGuestAllocator *t_alloc) QVirtQueue *vq; QTestState *qts = global_qtest; - vq = qvirtqueue_setup(dev, t_alloc, 0); - - test_basic(dev, t_alloc, vq); + vq = test_basic(dev, t_alloc); qmp_discard_response("{ 'execute': 'block_resize', " " 'arguments': { 'device': 'drive0', " From c5bd6d02e6eb6ae868abbcbeb5c3a3be8c5379a5 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:15 +0100 Subject: [PATCH 14/25] libqos: add missing virtio-9p feature negotiation VIRTIO Device Initialization requires feature negotiation. The libqos virtio-9p driver lacks feature negotiation and is therefore non-compliant. libqos tests acknowledge all feature bits advertised by the device, except VIRTIO_F_BAD_FEATURE (which devices use to detect broken drivers!) and VIRTIO_RING_F_EVENT_IDX (which is not implemented in libqos and accepting it would break notifications). Signed-off-by: Stefan Hajnoczi Message-Id: <20191023100425.12168-7-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Thomas Huth" --- tests/libqos/virtio-9p.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/libqos/virtio-9p.c b/tests/libqos/virtio-9p.c index 8c9efce3e1..77dbfb62ad 100644 --- a/tests/libqos/virtio-9p.c +++ b/tests/libqos/virtio-9p.c @@ -32,6 +32,12 @@ static void virtio_9p_cleanup(QVirtio9P *interface) static void virtio_9p_setup(QVirtio9P *interface) { + uint64_t features; + + features = qvirtio_get_features(interface->vdev); + features &= ~(QVIRTIO_F_BAD_FEATURE | (1ull << VIRTIO_RING_F_EVENT_IDX)); + qvirtio_set_features(interface->vdev, features); + interface->vq = qvirtqueue_setup(interface->vdev, alloc, 0); qvirtio_set_driver_ok(interface->vdev); } From 56140fbb8f2bb717eb2670aa1056aeaefe8b0246 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:16 +0100 Subject: [PATCH 15/25] libqos: enforce Device Initialization order According to VIRTIO 1.1 "3.1.1 Driver Requirements: Device Initialization", configuration space and virtqueues cannot be accessed before features have been negotiated. Enforce this requirement. Signed-off-by: Stefan Hajnoczi Message-Id: <20191023100425.12168-8-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Thomas Huth --- tests/libqos/virtio.c | 7 +++++++ tests/libqos/virtio.h | 1 + 2 files changed, 8 insertions(+) diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c index 4f7e6bb8a1..6049ff3b3e 100644 --- a/tests/libqos/virtio.c +++ b/tests/libqos/virtio.c @@ -15,21 +15,25 @@ uint8_t qvirtio_config_readb(QVirtioDevice *d, uint64_t addr) { + g_assert_true(d->features_negotiated); return d->bus->config_readb(d, addr); } uint16_t qvirtio_config_readw(QVirtioDevice *d, uint64_t addr) { + g_assert_true(d->features_negotiated); return d->bus->config_readw(d, addr); } uint32_t qvirtio_config_readl(QVirtioDevice *d, uint64_t addr) { + g_assert_true(d->features_negotiated); return d->bus->config_readl(d, addr); } uint64_t qvirtio_config_readq(QVirtioDevice *d, uint64_t addr) { + g_assert_true(d->features_negotiated); return d->bus->config_readq(d, addr); } @@ -42,11 +46,13 @@ void qvirtio_set_features(QVirtioDevice *d, uint64_t features) { d->features = features; d->bus->set_features(d, features); + d->features_negotiated = true; } QVirtQueue *qvirtqueue_setup(QVirtioDevice *d, QGuestAllocator *alloc, uint16_t index) { + g_assert_true(d->features_negotiated); return d->bus->virtqueue_setup(d, alloc, index); } @@ -60,6 +66,7 @@ void qvirtio_reset(QVirtioDevice *d) { d->bus->set_status(d, 0); g_assert_cmphex(d->bus->get_status(d), ==, 0); + d->features_negotiated = false; } void qvirtio_set_acknowledge(QVirtioDevice *d) diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h index a5c99fb3c9..0e8f823c7b 100644 --- a/tests/libqos/virtio.h +++ b/tests/libqos/virtio.h @@ -23,6 +23,7 @@ typedef struct QVirtioDevice { uint16_t device_type; uint64_t features; bool big_endian; + bool features_negotiated; } QVirtioDevice; typedef struct QVirtQueue { From c0f79698ed562814158d9652a29e77c97cc074f6 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:17 +0100 Subject: [PATCH 16/25] libqos: implement VIRTIO 1.0 FEATURES_OK step Device initialization has an extra step in VIRTIO 1.0. The FEATURES_OK status bit is set to indicate that feature negotiation has completed. The driver then reads the status register again to check that the device agrees with the final features. Implement this step as part of qvirtio_set_features() instead of introducing a separate function. This way all existing code works without modifications. The check in qvirtio_set_driver_ok() needs to be updated because FEATURES_OK will be set for VIRTIO 1.0 devices. Signed-off-by: Stefan Hajnoczi Message-Id: <20191023100425.12168-9-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Thomas Huth" --- tests/libqos/virtio.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c index 6049ff3b3e..fa597c2481 100644 --- a/tests/libqos/virtio.c +++ b/tests/libqos/virtio.c @@ -46,6 +46,20 @@ void qvirtio_set_features(QVirtioDevice *d, uint64_t features) { d->features = features; d->bus->set_features(d, features); + + /* + * This could be a separate function for drivers that want to access + * configuration space before setting FEATURES_OK, but no existing users + * need that and it's less code for callers if this is done implicitly. + */ + if (features & (1ull << VIRTIO_F_VERSION_1)) { + uint8_t status = d->bus->get_status(d) | + VIRTIO_CONFIG_S_FEATURES_OK; + + d->bus->set_status(d, status); + g_assert_cmphex(d->bus->get_status(d), ==, status); + } + d->features_negotiated = true; } @@ -86,7 +100,9 @@ void qvirtio_set_driver_ok(QVirtioDevice *d) { d->bus->set_status(d, d->bus->get_status(d) | VIRTIO_CONFIG_S_DRIVER_OK); g_assert_cmphex(d->bus->get_status(d), ==, VIRTIO_CONFIG_S_DRIVER_OK | - VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE); + VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE | + (d->features & (1ull << VIRTIO_F_VERSION_1) ? + VIRTIO_CONFIG_S_FEATURES_OK : 0)); } void qvirtio_wait_queue_isr(QTestState *qts, QVirtioDevice *d, From bccd82b4073b7242876480d7f4d93bd281feb548 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:18 +0100 Subject: [PATCH 17/25] libqos: access VIRTIO 1.0 vring in little-endian VIRTIO 1.0 uses little-endian for the vring. Legacy VIRTIO uses guest endianness. Adjust the code to handle both. Note that qvirtio_readq() is not defined because it has no users. All the other accessors are really needed. Signed-off-by: Stefan Hajnoczi Reviewed-by: Thomas Huth Message-Id: <20191023100425.12168-10-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/libqos/virtio-mmio.c | 1 + tests/libqos/virtio-pci.c | 1 + tests/libqos/virtio.c | 131 +++++++++++++++++++++++++++---------- tests/libqos/virtio.h | 4 +- tests/virtio-blk-test.c | 8 +-- 5 files changed, 106 insertions(+), 39 deletions(-) diff --git a/tests/libqos/virtio-mmio.c b/tests/libqos/virtio-mmio.c index 78066e6e05..4db1f1b8bc 100644 --- a/tests/libqos/virtio-mmio.c +++ b/tests/libqos/virtio-mmio.c @@ -157,6 +157,7 @@ static QVirtQueue *qvirtio_mmio_virtqueue_setup(QVirtioDevice *d, uint64_t addr; vq = g_malloc0(sizeof(*vq)); + vq->vdev = d; qvirtio_mmio_queue_select(d, index); qtest_writel(dev->qts, dev->addr + QVIRTIO_MMIO_QUEUE_ALIGN, dev->page_size); diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index 1b6b760fc6..7ecf5d0a52 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -217,6 +217,7 @@ static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d, feat = qvirtio_pci_get_guest_features(d); qvirtio_pci_queue_select(d, index); + vqpci->vq.vdev = d; vqpci->vq.index = index; vqpci->vq.size = qvirtio_pci_get_queue_size(d); vqpci->vq.free_head = 0; diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c index fa597c2481..9aa360620c 100644 --- a/tests/libqos/virtio.c +++ b/tests/libqos/virtio.c @@ -8,11 +8,68 @@ */ #include "qemu/osdep.h" +#include "qemu/bswap.h" #include "libqtest.h" #include "libqos/virtio.h" #include "standard-headers/linux/virtio_config.h" #include "standard-headers/linux/virtio_ring.h" +/* + * qtest_readX/writeX() functions transfer host endian from/to guest endian. + * This works great for Legacy VIRTIO devices where we need guest endian + * accesses. For VIRTIO 1.0 the vring is little-endian so the automatic guest + * endianness conversion is not wanted. + * + * The following qvirtio_readX/writeX() functions handle Legacy and VIRTIO 1.0 + * accesses seamlessly. + */ +static uint16_t qvirtio_readw(QVirtioDevice *d, QTestState *qts, uint64_t addr) +{ + uint16_t val = qtest_readw(qts, addr); + + if (d->features & (1ull << VIRTIO_F_VERSION_1) && qtest_big_endian(qts)) { + val = bswap16(val); + } + return val; +} + +static uint32_t qvirtio_readl(QVirtioDevice *d, QTestState *qts, uint64_t addr) +{ + uint32_t val = qtest_readl(qts, addr); + + if (d->features & (1ull << VIRTIO_F_VERSION_1) && qtest_big_endian(qts)) { + val = bswap32(val); + } + return val; +} + +static void qvirtio_writew(QVirtioDevice *d, QTestState *qts, + uint64_t addr, uint16_t val) +{ + if (d->features & (1ull << VIRTIO_F_VERSION_1) && qtest_big_endian(qts)) { + val = bswap16(val); + } + qtest_writew(qts, addr, val); +} + +static void qvirtio_writel(QVirtioDevice *d, QTestState *qts, + uint64_t addr, uint32_t val) +{ + if (d->features & (1ull << VIRTIO_F_VERSION_1) && qtest_big_endian(qts)) { + val = bswap32(val); + } + qtest_writel(qts, addr, val); +} + +static void qvirtio_writeq(QVirtioDevice *d, QTestState *qts, + uint64_t addr, uint64_t val) +{ + if (d->features & (1ull << VIRTIO_F_VERSION_1) && qtest_big_endian(qts)) { + val = bswap64(val); + } + qtest_writeq(qts, addr, val); +} + uint8_t qvirtio_config_readb(QVirtioDevice *d, uint64_t addr) { g_assert_true(d->features_negotiated); @@ -189,23 +246,23 @@ void qvring_init(QTestState *qts, const QGuestAllocator *alloc, QVirtQueue *vq, for (i = 0; i < vq->size - 1; i++) { /* vq->desc[i].addr */ - qtest_writeq(qts, vq->desc + (16 * i), 0); + qvirtio_writeq(vq->vdev, qts, vq->desc + (16 * i), 0); /* vq->desc[i].next */ - qtest_writew(qts, vq->desc + (16 * i) + 14, i + 1); + qvirtio_writew(vq->vdev, qts, vq->desc + (16 * i) + 14, i + 1); } /* vq->avail->flags */ - qtest_writew(qts, vq->avail, 0); + qvirtio_writew(vq->vdev, qts, vq->avail, 0); /* vq->avail->idx */ - qtest_writew(qts, vq->avail + 2, 0); + qvirtio_writew(vq->vdev, qts, vq->avail + 2, 0); /* vq->avail->used_event */ - qtest_writew(qts, vq->avail + 4 + (2 * vq->size), 0); + qvirtio_writew(vq->vdev, qts, vq->avail + 4 + (2 * vq->size), 0); /* vq->used->flags */ - qtest_writew(qts, vq->used, 0); + qvirtio_writew(vq->vdev, qts, vq->used, 0); /* vq->used->avail_event */ - qtest_writew(qts, vq->used + 2 + sizeof(struct vring_used_elem) * vq->size, - 0); + qvirtio_writew(vq->vdev, qts, vq->used + 2 + + sizeof(struct vring_used_elem) * vq->size, 0); } QVRingIndirectDesc *qvring_indirect_desc_setup(QTestState *qs, QVirtioDevice *d, @@ -221,35 +278,39 @@ QVRingIndirectDesc *qvring_indirect_desc_setup(QTestState *qs, QVirtioDevice *d, for (i = 0; i < elem - 1; ++i) { /* indirect->desc[i].addr */ - qtest_writeq(qs, indirect->desc + (16 * i), 0); + qvirtio_writeq(d, qs, indirect->desc + (16 * i), 0); /* indirect->desc[i].flags */ - qtest_writew(qs, indirect->desc + (16 * i) + 12, VRING_DESC_F_NEXT); + qvirtio_writew(d, qs, indirect->desc + (16 * i) + 12, + VRING_DESC_F_NEXT); /* indirect->desc[i].next */ - qtest_writew(qs, indirect->desc + (16 * i) + 14, i + 1); + qvirtio_writew(d, qs, indirect->desc + (16 * i) + 14, i + 1); } return indirect; } -void qvring_indirect_desc_add(QTestState *qts, QVRingIndirectDesc *indirect, +void qvring_indirect_desc_add(QVirtioDevice *d, QTestState *qts, + QVRingIndirectDesc *indirect, uint64_t data, uint32_t len, bool write) { uint16_t flags; g_assert_cmpint(indirect->index, <, indirect->elem); - flags = qtest_readw(qts, indirect->desc + (16 * indirect->index) + 12); + flags = qvirtio_readw(d, qts, indirect->desc + + (16 * indirect->index) + 12); if (write) { flags |= VRING_DESC_F_WRITE; } /* indirect->desc[indirect->index].addr */ - qtest_writeq(qts, indirect->desc + (16 * indirect->index), data); + qvirtio_writeq(d, qts, indirect->desc + (16 * indirect->index), data); /* indirect->desc[indirect->index].len */ - qtest_writel(qts, indirect->desc + (16 * indirect->index) + 8, len); + qvirtio_writel(d, qts, indirect->desc + (16 * indirect->index) + 8, len); /* indirect->desc[indirect->index].flags */ - qtest_writew(qts, indirect->desc + (16 * indirect->index) + 12, flags); + qvirtio_writew(d, qts, indirect->desc + (16 * indirect->index) + 12, + flags); indirect->index++; } @@ -269,11 +330,11 @@ uint32_t qvirtqueue_add(QTestState *qts, QVirtQueue *vq, uint64_t data, } /* vq->desc[vq->free_head].addr */ - qtest_writeq(qts, vq->desc + (16 * vq->free_head), data); + qvirtio_writeq(vq->vdev, qts, vq->desc + (16 * vq->free_head), data); /* vq->desc[vq->free_head].len */ - qtest_writel(qts, vq->desc + (16 * vq->free_head) + 8, len); + qvirtio_writel(vq->vdev, qts, vq->desc + (16 * vq->free_head) + 8, len); /* vq->desc[vq->free_head].flags */ - qtest_writew(qts, vq->desc + (16 * vq->free_head) + 12, flags); + qvirtio_writew(vq->vdev, qts, vq->desc + (16 * vq->free_head) + 12, flags); return vq->free_head++; /* Return and increase, in this order */ } @@ -288,13 +349,14 @@ uint32_t qvirtqueue_add_indirect(QTestState *qts, QVirtQueue *vq, vq->num_free--; /* vq->desc[vq->free_head].addr */ - qtest_writeq(qts, vq->desc + (16 * vq->free_head), indirect->desc); + qvirtio_writeq(vq->vdev, qts, vq->desc + (16 * vq->free_head), + indirect->desc); /* vq->desc[vq->free_head].len */ - qtest_writel(qts, vq->desc + (16 * vq->free_head) + 8, - sizeof(struct vring_desc) * indirect->elem); + qvirtio_writel(vq->vdev, qts, vq->desc + (16 * vq->free_head) + 8, + sizeof(struct vring_desc) * indirect->elem); /* vq->desc[vq->free_head].flags */ - qtest_writew(qts, vq->desc + (16 * vq->free_head) + 12, - VRING_DESC_F_INDIRECT); + qvirtio_writew(vq->vdev, qts, vq->desc + (16 * vq->free_head) + 12, + VRING_DESC_F_INDIRECT); return vq->free_head++; /* Return and increase, in this order */ } @@ -303,21 +365,21 @@ void qvirtqueue_kick(QTestState *qts, QVirtioDevice *d, QVirtQueue *vq, uint32_t free_head) { /* vq->avail->idx */ - uint16_t idx = qtest_readw(qts, vq->avail + 2); + uint16_t idx = qvirtio_readw(d, qts, vq->avail + 2); /* vq->used->flags */ uint16_t flags; /* vq->used->avail_event */ uint16_t avail_event; /* vq->avail->ring[idx % vq->size] */ - qtest_writew(qts, vq->avail + 4 + (2 * (idx % vq->size)), free_head); + qvirtio_writew(d, qts, vq->avail + 4 + (2 * (idx % vq->size)), free_head); /* vq->avail->idx */ - qtest_writew(qts, vq->avail + 2, idx + 1); + qvirtio_writew(d, qts, vq->avail + 2, idx + 1); /* Must read after idx is updated */ - flags = qtest_readw(qts, vq->avail); - avail_event = qtest_readw(qts, vq->used + 4 + - sizeof(struct vring_used_elem) * vq->size); + flags = qvirtio_readw(d, qts, vq->avail); + avail_event = qvirtio_readw(d, qts, vq->used + 4 + + sizeof(struct vring_used_elem) * vq->size); /* < 1 because we add elements to avail queue one by one */ if ((flags & VRING_USED_F_NO_NOTIFY) == 0 && @@ -342,7 +404,8 @@ bool qvirtqueue_get_buf(QTestState *qts, QVirtQueue *vq, uint32_t *desc_idx, uint16_t idx; uint64_t elem_addr, addr; - idx = qtest_readw(qts, vq->used + offsetof(struct vring_used, idx)); + idx = qvirtio_readw(vq->vdev, qts, + vq->used + offsetof(struct vring_used, idx)); if (idx == vq->last_used_idx) { return false; } @@ -354,12 +417,12 @@ bool qvirtqueue_get_buf(QTestState *qts, QVirtQueue *vq, uint32_t *desc_idx, if (desc_idx) { addr = elem_addr + offsetof(struct vring_used_elem, id); - *desc_idx = qtest_readl(qts, addr); + *desc_idx = qvirtio_readl(vq->vdev, qts, addr); } if (len) { addr = elem_addr + offsetof(struct vring_used_elem, len); - *len = qtest_readw(qts, addr); + *len = qvirtio_readw(vq->vdev, qts, addr); } vq->last_used_idx++; @@ -371,7 +434,7 @@ void qvirtqueue_set_used_event(QTestState *qts, QVirtQueue *vq, uint16_t idx) g_assert(vq->event); /* vq->avail->used_event */ - qtest_writew(qts, vq->avail + 4 + (2 * vq->size), idx); + qvirtio_writew(vq->vdev, qts, vq->avail + 4 + (2 * vq->size), idx); } void qvirtio_start_device(QVirtioDevice *vdev) diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h index 0e8f823c7b..ebbff5193b 100644 --- a/tests/libqos/virtio.h +++ b/tests/libqos/virtio.h @@ -27,6 +27,7 @@ typedef struct QVirtioDevice { } QVirtioDevice; typedef struct QVirtQueue { + QVirtioDevice *vdev; uint64_t desc; /* This points to an array of struct vring_desc */ uint64_t avail; /* This points to a struct vring_avail */ uint64_t used; /* This points to a struct vring_used */ @@ -135,7 +136,8 @@ void qvring_init(QTestState *qts, const QGuestAllocator *alloc, QVirtQueue *vq, QVRingIndirectDesc *qvring_indirect_desc_setup(QTestState *qs, QVirtioDevice *d, QGuestAllocator *alloc, uint16_t elem); -void qvring_indirect_desc_add(QTestState *qts, QVRingIndirectDesc *indirect, +void qvring_indirect_desc_add(QVirtioDevice *d, QTestState *qts, + QVRingIndirectDesc *indirect, uint64_t data, uint32_t len, bool write); uint32_t qvirtqueue_add(QTestState *qts, QVirtQueue *vq, uint64_t data, uint32_t len, bool write, bool next); diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index fe0dc4a896..2a23698211 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -388,8 +388,8 @@ static void indirect(void *obj, void *u_data, QGuestAllocator *t_alloc) g_free(req.data); indirect = qvring_indirect_desc_setup(qts, dev, t_alloc, 2); - qvring_indirect_desc_add(qts, indirect, req_addr, 528, false); - qvring_indirect_desc_add(qts, indirect, req_addr + 528, 1, true); + qvring_indirect_desc_add(dev, qts, indirect, req_addr, 528, false); + qvring_indirect_desc_add(dev, qts, indirect, req_addr + 528, 1, true); free_head = qvirtqueue_add_indirect(qts, vq, indirect); qvirtqueue_kick(qts, dev, vq, free_head); @@ -413,8 +413,8 @@ static void indirect(void *obj, void *u_data, QGuestAllocator *t_alloc) g_free(req.data); indirect = qvring_indirect_desc_setup(qts, dev, t_alloc, 2); - qvring_indirect_desc_add(qts, indirect, req_addr, 16, false); - qvring_indirect_desc_add(qts, indirect, req_addr + 16, 513, true); + qvring_indirect_desc_add(dev, qts, indirect, req_addr, 16, false); + qvring_indirect_desc_add(dev, qts, indirect, req_addr + 16, 513, true); free_head = qvirtqueue_add_indirect(qts, vq, indirect); qvirtqueue_kick(qts, dev, vq, free_head); From a32a028aa702d7bcee0cd19a588d04d15a4437e1 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:19 +0100 Subject: [PATCH 18/25] libqos: add iteration support to qpci_find_capability() VIRTIO 1.0 PCI devices have multiple PCI_CAP_ID_VNDR capabilities so we need a way to iterate over them. Extend qpci_find_capability() to take the last address. Signed-off-by: Stefan Hajnoczi Reviewed-by: Thomas Huth -- v3: * Document qpci_find_capability() Message-Id: <20191023100425.12168-11-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/libqos/pci.c | 30 ++++++++++++++++++++++++------ tests/libqos/pci.h | 2 +- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/tests/libqos/pci.c b/tests/libqos/pci.c index 662ee7a517..2309a724e4 100644 --- a/tests/libqos/pci.c +++ b/tests/libqos/pci.c @@ -115,10 +115,28 @@ void qpci_device_enable(QPCIDevice *dev) g_assert_cmphex(cmd & PCI_COMMAND_MASTER, ==, PCI_COMMAND_MASTER); } -uint8_t qpci_find_capability(QPCIDevice *dev, uint8_t id) +/** + * qpci_find_capability: + * @dev: the PCI device + * @id: the PCI Capability ID (PCI_CAP_ID_*) + * @start_addr: 0 to begin iteration or the last return value to continue + * iteration + * + * Iterate over the PCI Capabilities List. + * + * Returns: PCI Configuration Space offset of the capabililty structure or + * 0 if no further matching capability is found + */ +uint8_t qpci_find_capability(QPCIDevice *dev, uint8_t id, uint8_t start_addr) { uint8_t cap; - uint8_t addr = qpci_config_readb(dev, PCI_CAPABILITY_LIST); + uint8_t addr; + + if (start_addr) { + addr = qpci_config_readb(dev, start_addr + PCI_CAP_LIST_NEXT); + } else { + addr = qpci_config_readb(dev, PCI_CAPABILITY_LIST); + } do { cap = qpci_config_readb(dev, addr); @@ -138,7 +156,7 @@ void qpci_msix_enable(QPCIDevice *dev) uint8_t bir_table; uint8_t bir_pba; - addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX); + addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX, 0); g_assert_cmphex(addr, !=, 0); val = qpci_config_readw(dev, addr + PCI_MSIX_FLAGS); @@ -167,7 +185,7 @@ void qpci_msix_disable(QPCIDevice *dev) uint16_t val; g_assert(dev->msix_enabled); - addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX); + addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX, 0); g_assert_cmphex(addr, !=, 0); val = qpci_config_readw(dev, addr + PCI_MSIX_FLAGS); qpci_config_writew(dev, addr + PCI_MSIX_FLAGS, @@ -203,7 +221,7 @@ bool qpci_msix_masked(QPCIDevice *dev, uint16_t entry) uint64_t vector_off = dev->msix_table_off + entry * PCI_MSIX_ENTRY_SIZE; g_assert(dev->msix_enabled); - addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX); + addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX, 0); g_assert_cmphex(addr, !=, 0); val = qpci_config_readw(dev, addr + PCI_MSIX_FLAGS); @@ -221,7 +239,7 @@ uint16_t qpci_msix_table_size(QPCIDevice *dev) uint8_t addr; uint16_t control; - addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX); + addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX, 0); g_assert_cmphex(addr, !=, 0); control = qpci_config_readw(dev, addr + PCI_MSIX_FLAGS); diff --git a/tests/libqos/pci.h b/tests/libqos/pci.h index a5389a5845..590c175190 100644 --- a/tests/libqos/pci.h +++ b/tests/libqos/pci.h @@ -86,7 +86,7 @@ bool qpci_has_buggy_msi(QPCIDevice *dev); bool qpci_check_buggy_msi(QPCIDevice *dev); void qpci_device_enable(QPCIDevice *dev); -uint8_t qpci_find_capability(QPCIDevice *dev, uint8_t id); +uint8_t qpci_find_capability(QPCIDevice *dev, uint8_t id, uint8_t start_addr); void qpci_msix_enable(QPCIDevice *dev); void qpci_msix_disable(QPCIDevice *dev); bool qpci_msix_pending(QPCIDevice *dev, uint16_t entry); From 1e59a866100caf205782334bf7f7a40f28fd23be Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:20 +0100 Subject: [PATCH 19/25] libqos: pass full QVirtQueue to set_queue_address() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of just passing the vring page frame number, pass the full QVirtQueue. This will allow the VIRTIO 1.0 transport to program the fine-grained vring address registers in the future. Signed-off-by: Stefan Hajnoczi Reviewed-by: Sergio Lopez Reviewed-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20191023100425.12168-12-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/libqos/virtio-mmio.c | 6 ++++-- tests/libqos/virtio-pci.c | 6 ++++-- tests/libqos/virtio.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/libqos/virtio-mmio.c b/tests/libqos/virtio-mmio.c index 4db1f1b8bc..e0a2bd7bc6 100644 --- a/tests/libqos/virtio-mmio.c +++ b/tests/libqos/virtio-mmio.c @@ -143,9 +143,11 @@ static uint16_t qvirtio_mmio_get_queue_size(QVirtioDevice *d) return (uint16_t)qtest_readl(dev->qts, dev->addr + QVIRTIO_MMIO_QUEUE_NUM_MAX); } -static void qvirtio_mmio_set_queue_address(QVirtioDevice *d, uint32_t pfn) +static void qvirtio_mmio_set_queue_address(QVirtioDevice *d, QVirtQueue *vq) { QVirtioMMIODevice *dev = container_of(d, QVirtioMMIODevice, vdev); + uint64_t pfn = vq->desc / dev->page_size; + qtest_writel(dev->qts, dev->addr + QVIRTIO_MMIO_QUEUE_PFN, pfn); } @@ -179,7 +181,7 @@ static QVirtQueue *qvirtio_mmio_virtqueue_setup(QVirtioDevice *d, addr = guest_alloc(alloc, qvring_size(vq->size, dev->page_size)); qvring_init(dev->qts, alloc, vq, addr); - qvirtio_mmio_set_queue_address(d, vq->desc / dev->page_size); + qvirtio_mmio_set_queue_address(d, vq); return vq; } diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index 7ecf5d0a52..e4fa318dcc 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -199,9 +199,11 @@ static uint16_t qvirtio_pci_get_queue_size(QVirtioDevice *d) return qpci_io_readw(dev->pdev, dev->bar, VIRTIO_PCI_QUEUE_NUM); } -static void qvirtio_pci_set_queue_address(QVirtioDevice *d, uint32_t pfn) +static void qvirtio_pci_set_queue_address(QVirtioDevice *d, QVirtQueue *vq) { QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + uint64_t pfn = vq->desc / VIRTIO_PCI_VRING_ALIGN; + qpci_io_writel(dev->pdev, dev->bar, VIRTIO_PCI_QUEUE_PFN, pfn); } @@ -239,7 +241,7 @@ static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d, addr = guest_alloc(alloc, qvring_size(vqpci->vq.size, VIRTIO_PCI_VRING_ALIGN)); qvring_init(qvpcidev->pdev->bus->qts, alloc, &vqpci->vq, addr); - qvirtio_pci_set_queue_address(d, vqpci->vq.desc / VIRTIO_PCI_VRING_ALIGN); + qvirtio_pci_set_queue_address(d, &vqpci->vq); return &vqpci->vq; } diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h index ebbff5193b..529ef7555a 100644 --- a/tests/libqos/virtio.h +++ b/tests/libqos/virtio.h @@ -81,7 +81,7 @@ struct QVirtioBus { uint16_t (*get_queue_size)(QVirtioDevice *d); /* Set the address of the selected queue */ - void (*set_queue_address)(QVirtioDevice *d, uint32_t pfn); + void (*set_queue_address)(QVirtioDevice *d, QVirtQueue *vq); /* Setup the virtqueue specified by index */ QVirtQueue *(*virtqueue_setup)(QVirtioDevice *d, QGuestAllocator *alloc, From 957d8d1d06fa15e1db50166d17a4fe9094f7fec3 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:21 +0100 Subject: [PATCH 20/25] libqos: add MSI-X callbacks to QVirtioPCIDevice The MSI-X vectors are programmed differently in the VIRTIO 1.0 and Legacy interfaces. Introduce callbacks so different implementations can be used depending on the interface version. Signed-off-by: Stefan Hajnoczi Reviewed-by: Sergio Lopez Reviewed-by: Thomas Huth Message-Id: <20191023100425.12168-13-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/libqos/virtio-pci.c | 37 ++++++++++++++++++++++++++++--------- tests/libqos/virtio-pci.h | 12 ++++++++++++ 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index e4fa318dcc..0725777a8d 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -281,6 +281,31 @@ const QVirtioBus qvirtio_pci = { .virtqueue_kick = qvirtio_pci_virtqueue_kick, }; +static void qvirtio_pci_set_config_vector(QVirtioPCIDevice *d, uint16_t entry) +{ + uint16_t vector; + + qpci_io_writew(d->pdev, d->bar, VIRTIO_MSI_CONFIG_VECTOR, entry); + vector = qpci_io_readw(d->pdev, d->bar, VIRTIO_MSI_CONFIG_VECTOR); + g_assert_cmphex(vector, !=, VIRTIO_MSI_NO_VECTOR); +} + +static void qvirtio_pci_set_queue_vector(QVirtioPCIDevice *d, uint16_t vq_idx, + uint16_t entry) +{ + uint16_t vector; + + qvirtio_pci_queue_select(&d->vdev, vq_idx); + qpci_io_writew(d->pdev, d->bar, VIRTIO_MSI_QUEUE_VECTOR, entry); + vector = qpci_io_readw(d->pdev, d->bar, VIRTIO_MSI_QUEUE_VECTOR); + g_assert_cmphex(vector, !=, VIRTIO_MSI_NO_VECTOR); +} + +static const QVirtioPCIMSIXOps qvirtio_pci_msix_ops_legacy = { + .set_config_vector = qvirtio_pci_set_config_vector, + .set_queue_vector = qvirtio_pci_set_queue_vector, +}; + void qvirtio_pci_device_enable(QVirtioPCIDevice *d) { qpci_device_enable(d->pdev); @@ -295,7 +320,6 @@ void qvirtio_pci_device_disable(QVirtioPCIDevice *d) void qvirtqueue_pci_msix_setup(QVirtioPCIDevice *d, QVirtQueuePCI *vqpci, QGuestAllocator *alloc, uint16_t entry) { - uint16_t vector; uint32_t control; uint64_t off; @@ -321,16 +345,12 @@ void qvirtqueue_pci_msix_setup(QVirtioPCIDevice *d, QVirtQueuePCI *vqpci, off + PCI_MSIX_ENTRY_VECTOR_CTRL, control & ~PCI_MSIX_ENTRY_CTRL_MASKBIT); - qvirtio_pci_queue_select(&d->vdev, vqpci->vq.index); - qpci_io_writew(d->pdev, d->bar, VIRTIO_MSI_QUEUE_VECTOR, entry); - vector = qpci_io_readw(d->pdev, d->bar, VIRTIO_MSI_QUEUE_VECTOR); - g_assert_cmphex(vector, !=, VIRTIO_MSI_NO_VECTOR); + d->msix_ops->set_queue_vector(d, vqpci->vq.index, entry); } void qvirtio_pci_set_msix_configuration_vector(QVirtioPCIDevice *d, QGuestAllocator *alloc, uint16_t entry) { - uint16_t vector; uint32_t control; uint64_t off; @@ -358,9 +378,7 @@ void qvirtio_pci_set_msix_configuration_vector(QVirtioPCIDevice *d, off + PCI_MSIX_ENTRY_VECTOR_CTRL, control & ~PCI_MSIX_ENTRY_CTRL_MASKBIT); - qpci_io_writew(d->pdev, d->bar, VIRTIO_MSI_CONFIG_VECTOR, entry); - vector = qpci_io_readw(d->pdev, d->bar, VIRTIO_MSI_CONFIG_VECTOR); - g_assert_cmphex(vector, !=, VIRTIO_MSI_NO_VECTOR); + d->msix_ops->set_config_vector(d, entry); } void qvirtio_pci_destructor(QOSGraphObject *obj) @@ -383,6 +401,7 @@ static void qvirtio_pci_init_from_pcidev(QVirtioPCIDevice *dev, QPCIDevice *pci_ dev->vdev.device_type = qpci_config_readw(pci_dev, PCI_SUBSYSTEM_ID); dev->config_msix_entry = -1; + dev->msix_ops = &qvirtio_pci_msix_ops_legacy; dev->vdev.bus = &qvirtio_pci; dev->vdev.big_endian = qvirtio_pci_is_big_endian(dev); diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h index 728b4715f1..4299efc023 100644 --- a/tests/libqos/virtio-pci.h +++ b/tests/libqos/virtio-pci.h @@ -14,16 +14,28 @@ #include "libqos/pci.h" #include "libqos/qgraph.h" +typedef struct QVirtioPCIMSIXOps QVirtioPCIMSIXOps; + typedef struct QVirtioPCIDevice { QOSGraphObject obj; QVirtioDevice vdev; QPCIDevice *pdev; QPCIBar bar; + const QVirtioPCIMSIXOps *msix_ops; uint16_t config_msix_entry; uint64_t config_msix_addr; uint32_t config_msix_data; } QVirtioPCIDevice; +struct QVirtioPCIMSIXOps { + /* Set the Configuration Vector for MSI-X */ + void (*set_config_vector)(QVirtioPCIDevice *d, uint16_t entry); + + /* Set the Queue Vector for MSI-X */ + void (*set_queue_vector)(QVirtioPCIDevice *d, uint16_t vq_idx, + uint16_t entry); +}; + typedef struct QVirtQueuePCI { QVirtQueue vq; uint16_t msix_entry; From e56536bc8b897efdf069c25e68202e521cdcbb49 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:22 +0100 Subject: [PATCH 21/25] libqos: expose common virtqueue setup/cleanup functions The VIRTIO 1.0 code will need to perform additional steps but it will reuse the common virtqueue setup/cleanup code. Make these functions public. Make sure to invoke callbacks via QVirtioBus instead of directly calling the virtio-pci Legacy versions of these functions. Signed-off-by: Stefan Hajnoczi Reviewed-by: Sergio Lopez Reviewed-by: Thomas Huth Message-Id: <20191023100425.12168-14-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/libqos/virtio-pci.c | 19 ++++++++++--------- tests/libqos/virtio-pci.h | 8 ++++++++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index 0725777a8d..c900742f96 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -207,8 +207,9 @@ static void qvirtio_pci_set_queue_address(QVirtioDevice *d, QVirtQueue *vq) qpci_io_writel(dev->pdev, dev->bar, VIRTIO_PCI_QUEUE_PFN, pfn); } -static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d, - QGuestAllocator *alloc, uint16_t index) +QVirtQueue *qvirtio_pci_virtqueue_setup_common(QVirtioDevice *d, + QGuestAllocator *alloc, + uint16_t index) { uint64_t feat; uint64_t addr; @@ -216,12 +217,12 @@ static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d, QVirtioPCIDevice *qvpcidev = container_of(d, QVirtioPCIDevice, vdev); vqpci = g_malloc0(sizeof(*vqpci)); - feat = qvirtio_pci_get_guest_features(d); + feat = d->bus->get_guest_features(d); - qvirtio_pci_queue_select(d, index); + d->bus->queue_select(d, index); vqpci->vq.vdev = d; vqpci->vq.index = index; - vqpci->vq.size = qvirtio_pci_get_queue_size(d); + vqpci->vq.size = d->bus->get_queue_size(d); vqpci->vq.free_head = 0; vqpci->vq.num_free = vqpci->vq.size; vqpci->vq.align = VIRTIO_PCI_VRING_ALIGN; @@ -241,12 +242,12 @@ static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d, addr = guest_alloc(alloc, qvring_size(vqpci->vq.size, VIRTIO_PCI_VRING_ALIGN)); qvring_init(qvpcidev->pdev->bus->qts, alloc, &vqpci->vq, addr); - qvirtio_pci_set_queue_address(d, &vqpci->vq); + d->bus->set_queue_address(d, &vqpci->vq); return &vqpci->vq; } -static void qvirtio_pci_virtqueue_cleanup(QVirtQueue *vq, +void qvirtio_pci_virtqueue_cleanup_common(QVirtQueue *vq, QGuestAllocator *alloc) { QVirtQueuePCI *vqpci = container_of(vq, QVirtQueuePCI, vq); @@ -276,8 +277,8 @@ const QVirtioBus qvirtio_pci = { .queue_select = qvirtio_pci_queue_select, .get_queue_size = qvirtio_pci_get_queue_size, .set_queue_address = qvirtio_pci_set_queue_address, - .virtqueue_setup = qvirtio_pci_virtqueue_setup, - .virtqueue_cleanup = qvirtio_pci_virtqueue_cleanup, + .virtqueue_setup = qvirtio_pci_virtqueue_setup_common, + .virtqueue_cleanup = qvirtio_pci_virtqueue_cleanup_common, .virtqueue_kick = qvirtio_pci_virtqueue_kick, }; diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h index 4299efc023..0e4a8b7b00 100644 --- a/tests/libqos/virtio-pci.h +++ b/tests/libqos/virtio-pci.h @@ -65,4 +65,12 @@ void qvirtio_pci_set_msix_configuration_vector(QVirtioPCIDevice *d, QGuestAllocator *alloc, uint16_t entry); void qvirtqueue_pci_msix_setup(QVirtioPCIDevice *d, QVirtQueuePCI *vqpci, QGuestAllocator *alloc, uint16_t entry); + +/* Used by Legacy and Modern virtio-pci code */ +QVirtQueue *qvirtio_pci_virtqueue_setup_common(QVirtioDevice *d, + QGuestAllocator *alloc, + uint16_t index); +void qvirtio_pci_virtqueue_cleanup_common(QVirtQueue *vq, + QGuestAllocator *alloc); + #endif From f17429e545dae46fbe85eebd89fec4e720a1e241 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:23 +0100 Subject: [PATCH 22/25] libqos: make the virtio-pci BAR index configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Legacy virtio-pci interface always uses BAR 0. VIRTIO 1.0 may need to use a different BAR index, so make it configurable. Signed-off-by: Stefan Hajnoczi Reviewed-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20191023100425.12168-15-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/libqos/virtio-pci.c | 3 ++- tests/libqos/virtio-pci.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index c900742f96..e9595603f5 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -310,7 +310,7 @@ static const QVirtioPCIMSIXOps qvirtio_pci_msix_ops_legacy = { void qvirtio_pci_device_enable(QVirtioPCIDevice *d) { qpci_device_enable(d->pdev); - d->bar = qpci_iomap(d->pdev, 0, NULL); + d->bar = qpci_iomap(d->pdev, d->bar_idx, NULL); } void qvirtio_pci_device_disable(QVirtioPCIDevice *d) @@ -400,6 +400,7 @@ static void qvirtio_pci_init_from_pcidev(QVirtioPCIDevice *dev, QPCIDevice *pci_ { dev->pdev = pci_dev; dev->vdev.device_type = qpci_config_readw(pci_dev, PCI_SUBSYSTEM_ID); + dev->bar_idx = 0; dev->config_msix_entry = -1; dev->msix_ops = &qvirtio_pci_msix_ops_legacy; diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h index 0e4a8b7b00..78a1c15c2a 100644 --- a/tests/libqos/virtio-pci.h +++ b/tests/libqos/virtio-pci.h @@ -25,6 +25,8 @@ typedef struct QVirtioPCIDevice { uint16_t config_msix_entry; uint64_t config_msix_addr; uint32_t config_msix_data; + + int bar_idx; } QVirtioPCIDevice; struct QVirtioPCIMSIXOps { From 9598f9e4023cc4b0f1f4c4964e4d0debc30cc77c Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:24 +0100 Subject: [PATCH 23/25] libqos: extract Legacy virtio-pci.c code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current libqos virtio-pci.c code implements the VIRTIO Legacy interface. Extract existing code in preparation for VIRTIO 1.0 support. Signed-off-by: Stefan Hajnoczi Reviewed-by: Sergio Lopez Reviewed-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20191023100425.12168-16-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/libqos/virtio-pci.c | 29 ++++++++++++----------------- tests/libqos/virtio-pci.h | 2 -- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index e9595603f5..11866f7772 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -35,14 +35,6 @@ * original qvirtio_pci_destructor and qvirtio_pci_start_hw. */ -static inline bool qvirtio_pci_is_big_endian(QVirtioPCIDevice *dev) -{ - QPCIBus *bus = dev->pdev->bus; - - /* FIXME: virtio 1.0 is always little-endian */ - return qtest_big_endian(bus->qts); -} - #define CONFIG_BASE(dev) (VIRTIO_PCI_CONFIG_OFF((dev)->pdev->msix_enabled)) static uint8_t qvirtio_pci_config_readb(QVirtioDevice *d, uint64_t off) @@ -55,8 +47,7 @@ static uint8_t qvirtio_pci_config_readb(QVirtioDevice *d, uint64_t off) * but virtio ( < 1.0) is in guest order * so with a big-endian guest the order has been reversed, * reverse it again - * virtio-1.0 is always little-endian, like PCI, but this - * case will be managed inside qvirtio_pci_is_big_endian() + * virtio-1.0 is always little-endian, like PCI */ static uint16_t qvirtio_pci_config_readw(QVirtioDevice *d, uint64_t off) @@ -262,7 +253,7 @@ static void qvirtio_pci_virtqueue_kick(QVirtioDevice *d, QVirtQueue *vq) qpci_io_writew(dev->pdev, dev->bar, VIRTIO_PCI_QUEUE_NOTIFY, vq->index); } -const QVirtioBus qvirtio_pci = { +static const QVirtioBus qvirtio_pci_legacy = { .config_readb = qvirtio_pci_config_readb, .config_readw = qvirtio_pci_config_readw, .config_readl = qvirtio_pci_config_readl, @@ -396,17 +387,21 @@ void qvirtio_pci_start_hw(QOSGraphObject *obj) qvirtio_start_device(&dev->vdev); } +static void qvirtio_pci_init_legacy(QVirtioPCIDevice *dev) +{ + dev->vdev.device_type = qpci_config_readw(dev->pdev, PCI_SUBSYSTEM_ID); + dev->bar_idx = 0; + dev->vdev.bus = &qvirtio_pci_legacy; + dev->msix_ops = &qvirtio_pci_msix_ops_legacy; + dev->vdev.big_endian = qtest_big_endian(dev->pdev->bus->qts); +} + static void qvirtio_pci_init_from_pcidev(QVirtioPCIDevice *dev, QPCIDevice *pci_dev) { dev->pdev = pci_dev; - dev->vdev.device_type = qpci_config_readw(pci_dev, PCI_SUBSYSTEM_ID); - dev->bar_idx = 0; - dev->config_msix_entry = -1; - dev->msix_ops = &qvirtio_pci_msix_ops_legacy; - dev->vdev.bus = &qvirtio_pci; - dev->vdev.big_endian = qvirtio_pci_is_big_endian(dev); + qvirtio_pci_init_legacy(dev); /* each virtio-xxx-pci device should override at least this function */ dev->obj.get_driver = NULL; diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h index 78a1c15c2a..6b3a385b06 100644 --- a/tests/libqos/virtio-pci.h +++ b/tests/libqos/virtio-pci.h @@ -45,8 +45,6 @@ typedef struct QVirtQueuePCI { uint32_t msix_data; } QVirtQueuePCI; -extern const QVirtioBus qvirtio_pci; - void virtio_pci_init(QVirtioPCIDevice *dev, QPCIBus *bus, QPCIAddress * addr); QVirtioPCIDevice *virtio_pci_new(QPCIBus *bus, QPCIAddress * addr); From d08f68b8e858fc761e88ae66349b6eab1235e3fe Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Oct 2019 11:04:25 +0100 Subject: [PATCH 24/25] libqos: add VIRTIO PCI 1.0 support Implement the VIRTIO 1.0 virtio-pci interface. The main change here is that the register layout is no longer a fixed layout in BAR 0. Instead we have to iterate of PCI Capabilities to find descriptions of where various registers are located. The vring registers are also more fine-grained, allowing for more flexible vring layouts, but we don't take advantage of that. Signed-off-by: Stefan Hajnoczi Reviewed-by: Sergio Lopez Message-Id: <20191023100425.12168-17-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Thomas Huth --- tests/Makefile.include | 1 + tests/libqos/virtio-pci-modern.c | 443 +++++++++++++++++++++++++++++++ tests/libqos/virtio-pci-modern.h | 17 ++ tests/libqos/virtio-pci.c | 6 +- tests/libqos/virtio-pci.h | 10 + 5 files changed, 476 insertions(+), 1 deletion(-) create mode 100644 tests/libqos/virtio-pci-modern.c create mode 100644 tests/libqos/virtio-pci-modern.h diff --git a/tests/Makefile.include b/tests/Makefile.include index 09e5b410dc..fde8a0c5ef 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -723,6 +723,7 @@ qos-test-obj-y += tests/libqos/virtio-blk.o qos-test-obj-y += tests/libqos/virtio-mmio.o qos-test-obj-y += tests/libqos/virtio-net.o qos-test-obj-y += tests/libqos/virtio-pci.o +qos-test-obj-y += tests/libqos/virtio-pci-modern.o qos-test-obj-y += tests/libqos/virtio-rng.o qos-test-obj-y += tests/libqos/virtio-scsi.o qos-test-obj-y += tests/libqos/virtio-serial.o diff --git a/tests/libqos/virtio-pci-modern.c b/tests/libqos/virtio-pci-modern.c new file mode 100644 index 0000000000..18d118866f --- /dev/null +++ b/tests/libqos/virtio-pci-modern.c @@ -0,0 +1,443 @@ +/* + * libqos VIRTIO 1.0 PCI driver + * + * Copyright (c) 2019 Red Hat, Inc + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "standard-headers/linux/pci_regs.h" +#include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_config.h" +#include "virtio-pci-modern.h" + +static uint8_t config_readb(QVirtioDevice *d, uint64_t addr) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + return qpci_io_readb(dev->pdev, dev->bar, dev->device_cfg_offset + addr); +} + +static uint16_t config_readw(QVirtioDevice *d, uint64_t addr) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + return qpci_io_readw(dev->pdev, dev->bar, dev->device_cfg_offset + addr); +} + +static uint32_t config_readl(QVirtioDevice *d, uint64_t addr) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + return qpci_io_readl(dev->pdev, dev->bar, dev->device_cfg_offset + addr); +} + +static uint64_t config_readq(QVirtioDevice *d, uint64_t addr) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + return qpci_io_readq(dev->pdev, dev->bar, dev->device_cfg_offset + addr); +} + +static uint64_t get_features(QVirtioDevice *d) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + uint64_t lo, hi; + + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + device_feature_select), + 0); + lo = qpci_io_readl(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, device_feature)); + + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + device_feature_select), + 1); + hi = qpci_io_readl(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, device_feature)); + + return (hi << 32) | lo; +} + +static void set_features(QVirtioDevice *d, uint64_t features) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + + /* Drivers must enable VIRTIO 1.0 or else use the Legacy interface */ + g_assert_cmphex(features & (1ull << VIRTIO_F_VERSION_1), !=, 0); + + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + guest_feature_select), + 0); + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + guest_feature), + features); + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + guest_feature_select), + 1); + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + guest_feature), + features >> 32); +} + +static uint64_t get_guest_features(QVirtioDevice *d) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + uint64_t lo, hi; + + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + guest_feature_select), + 0); + lo = qpci_io_readl(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, guest_feature)); + + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + guest_feature_select), + 1); + hi = qpci_io_readl(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, guest_feature)); + + return (hi << 32) | lo; +} + +static uint8_t get_status(QVirtioDevice *d) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + + return qpci_io_readb(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + device_status)); +} + +static void set_status(QVirtioDevice *d, uint8_t status) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + + return qpci_io_writeb(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + device_status), + status); +} + +static bool get_msix_status(QVirtioPCIDevice *dev, uint32_t msix_entry, + uint32_t msix_addr, uint32_t msix_data) +{ + uint32_t data; + + g_assert_cmpint(msix_entry, !=, -1); + if (qpci_msix_masked(dev->pdev, msix_entry)) { + /* No ISR checking should be done if masked, but read anyway */ + return qpci_msix_pending(dev->pdev, msix_entry); + } + + data = qtest_readl(dev->pdev->bus->qts, msix_addr); + if (data == msix_data) { + qtest_writel(dev->pdev->bus->qts, msix_addr, 0); + return true; + } else { + return false; + } +} + +static bool get_queue_isr_status(QVirtioDevice *d, QVirtQueue *vq) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + + if (dev->pdev->msix_enabled) { + QVirtQueuePCI *vqpci = container_of(vq, QVirtQueuePCI, vq); + + return get_msix_status(dev, vqpci->msix_entry, vqpci->msix_addr, + vqpci->msix_data); + } + + return qpci_io_readb(dev->pdev, dev->bar, dev->isr_cfg_offset) & 1; +} + +static bool get_config_isr_status(QVirtioDevice *d) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + + if (dev->pdev->msix_enabled) { + return get_msix_status(dev, dev->config_msix_entry, + dev->config_msix_addr, dev->config_msix_data); + } + + return qpci_io_readb(dev->pdev, dev->bar, dev->isr_cfg_offset) & 2; +} + +static void wait_config_isr_status(QVirtioDevice *d, gint64 timeout_us) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + gint64 start_time = g_get_monotonic_time(); + + do { + g_assert(g_get_monotonic_time() - start_time <= timeout_us); + qtest_clock_step(dev->pdev->bus->qts, 100); + } while (!get_config_isr_status(d)); +} + +static void queue_select(QVirtioDevice *d, uint16_t index) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + + qpci_io_writew(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, queue_select), + index); +} + +static uint16_t get_queue_size(QVirtioDevice *d) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + + return qpci_io_readw(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, queue_size)); +} + +static void set_queue_address(QVirtioDevice *d, QVirtQueue *vq) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, queue_desc_lo), + vq->desc); + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, queue_desc_hi), + vq->desc >> 32); + + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, queue_avail_lo), + vq->avail); + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, queue_avail_hi), + vq->avail >> 32); + + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, queue_used_lo), + vq->used); + qpci_io_writel(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, queue_used_hi), + vq->used >> 32); +} + +static QVirtQueue *virtqueue_setup(QVirtioDevice *d, QGuestAllocator *alloc, + uint16_t index) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + QVirtQueue *vq; + QVirtQueuePCI *vqpci; + uint16_t notify_off; + + vq = qvirtio_pci_virtqueue_setup_common(d, alloc, index); + vqpci = container_of(vq, QVirtQueuePCI, vq); + + notify_off = qpci_io_readw(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + queue_notify_off)); + + vqpci->notify_offset = dev->notify_cfg_offset + + notify_off * dev->notify_off_multiplier; + + qpci_io_writew(dev->pdev, dev->bar, dev->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, queue_enable), 1); + + return vq; +} + +static void virtqueue_kick(QVirtioDevice *d, QVirtQueue *vq) +{ + QVirtioPCIDevice *dev = container_of(d, QVirtioPCIDevice, vdev); + QVirtQueuePCI *vqpci = container_of(vq, QVirtQueuePCI, vq); + + qpci_io_writew(dev->pdev, dev->bar, vqpci->notify_offset, vq->index); +} + +static const QVirtioBus qvirtio_pci_virtio_1 = { + .config_readb = config_readb, + .config_readw = config_readw, + .config_readl = config_readl, + .config_readq = config_readq, + .get_features = get_features, + .set_features = set_features, + .get_guest_features = get_guest_features, + .get_status = get_status, + .set_status = set_status, + .get_queue_isr_status = get_queue_isr_status, + .wait_config_isr_status = wait_config_isr_status, + .queue_select = queue_select, + .get_queue_size = get_queue_size, + .set_queue_address = set_queue_address, + .virtqueue_setup = virtqueue_setup, + .virtqueue_cleanup = qvirtio_pci_virtqueue_cleanup_common, + .virtqueue_kick = virtqueue_kick, +}; + +static void set_config_vector(QVirtioPCIDevice *d, uint16_t entry) +{ + uint16_t vector; + + qpci_io_writew(d->pdev, d->bar, d->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, msix_config), entry); + vector = qpci_io_readw(d->pdev, d->bar, d->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + msix_config)); + g_assert_cmphex(vector, !=, VIRTIO_MSI_NO_VECTOR); +} + +static void set_queue_vector(QVirtioPCIDevice *d, uint16_t vq_idx, + uint16_t entry) +{ + uint16_t vector; + + queue_select(&d->vdev, vq_idx); + qpci_io_writew(d->pdev, d->bar, d->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, queue_msix_vector), + entry); + vector = qpci_io_readw(d->pdev, d->bar, d->common_cfg_offset + + offsetof(struct virtio_pci_common_cfg, + queue_msix_vector)); + g_assert_cmphex(vector, !=, VIRTIO_MSI_NO_VECTOR); +} + +static const QVirtioPCIMSIXOps qvirtio_pci_msix_ops_virtio_1 = { + .set_config_vector = set_config_vector, + .set_queue_vector = set_queue_vector, +}; + +static bool probe_device_type(QVirtioPCIDevice *dev) +{ + uint16_t vendor_id; + uint16_t device_id; + + /* "Drivers MUST match devices with the PCI Vendor ID 0x1AF4" */ + vendor_id = qpci_config_readw(dev->pdev, PCI_VENDOR_ID); + if (vendor_id != 0x1af4) { + return false; + } + + /* + * "Any PCI device with ... PCI Device ID 0x1000 through 0x107F inclusive + * is a virtio device" + */ + device_id = qpci_config_readw(dev->pdev, PCI_DEVICE_ID); + if (device_id < 0x1000 || device_id > 0x107f) { + return false; + } + + /* + * "Devices MAY utilize a Transitional PCI Device ID range, 0x1000 to + * 0x103F depending on the device type" + */ + if (device_id < 0x1040) { + /* + * "Transitional devices MUST have the PCI Subsystem Device ID matching + * the Virtio Device ID" + */ + dev->vdev.device_type = qpci_config_readw(dev->pdev, PCI_SUBSYSTEM_ID); + } else { + /* + * "The PCI Device ID is calculated by adding 0x1040 to the Virtio + * Device ID" + */ + dev->vdev.device_type = device_id - 0x1040; + } + + return true; +} + +/* Find the first VIRTIO 1.0 PCI structure for a given type */ +static bool find_structure(QVirtioPCIDevice *dev, uint8_t cfg_type, + uint8_t *bar, uint32_t *offset, uint32_t *length, + uint8_t *cfg_addr) +{ + uint8_t addr = 0; + + while ((addr = qpci_find_capability(dev->pdev, PCI_CAP_ID_VNDR, + addr)) != 0) { + uint8_t type; + + type = qpci_config_readb(dev->pdev, + addr + offsetof(struct virtio_pci_cap, cfg_type)); + if (type != cfg_type) { + continue; + } + + *bar = qpci_config_readb(dev->pdev, + addr + offsetof(struct virtio_pci_cap, bar)); + *offset = qpci_config_readl(dev->pdev, + addr + offsetof(struct virtio_pci_cap, offset)); + *length = qpci_config_readl(dev->pdev, + addr + offsetof(struct virtio_pci_cap, length)); + if (cfg_addr) { + *cfg_addr = addr; + } + + return true; + } + + return false; +} + +static bool probe_device_layout(QVirtioPCIDevice *dev) +{ + uint8_t bar; + uint8_t cfg_addr; + uint32_t length; + + /* + * Due to the qpci_iomap() API we only support devices that put all + * structures in the same PCI BAR. Luckily this is true with QEMU. + */ + + if (!find_structure(dev, VIRTIO_PCI_CAP_COMMON_CFG, &bar, + &dev->common_cfg_offset, &length, NULL)) { + return false; + } + dev->bar_idx = bar; + + if (!find_structure(dev, VIRTIO_PCI_CAP_NOTIFY_CFG, &bar, + &dev->notify_cfg_offset, &length, &cfg_addr)) { + return false; + } + g_assert_cmphex(bar, ==, dev->bar_idx); + + dev->notify_off_multiplier = qpci_config_readl(dev->pdev, + cfg_addr + offsetof(struct virtio_pci_notify_cap, + notify_off_multiplier)); + + if (!find_structure(dev, VIRTIO_PCI_CAP_ISR_CFG, &bar, + &dev->isr_cfg_offset, &length, NULL)) { + return false; + } + g_assert_cmphex(bar, ==, dev->bar_idx); + + if (!find_structure(dev, VIRTIO_PCI_CAP_DEVICE_CFG, &bar, + &dev->device_cfg_offset, &length, NULL)) { + return false; + } + g_assert_cmphex(bar, ==, dev->bar_idx); + + return true; +} + +/* Probe a VIRTIO 1.0 device */ +bool qvirtio_pci_init_virtio_1(QVirtioPCIDevice *dev) +{ + if (!probe_device_type(dev)) { + return false; + } + + if (!probe_device_layout(dev)) { + return false; + } + + dev->vdev.bus = &qvirtio_pci_virtio_1; + dev->msix_ops = &qvirtio_pci_msix_ops_virtio_1; + dev->vdev.big_endian = false; + return true; +} diff --git a/tests/libqos/virtio-pci-modern.h b/tests/libqos/virtio-pci-modern.h new file mode 100644 index 0000000000..6bf2b207c3 --- /dev/null +++ b/tests/libqos/virtio-pci-modern.h @@ -0,0 +1,17 @@ +/* + * libqos virtio PCI VIRTIO 1.0 definitions + * + * Copyright (c) 2019 Red Hat, Inc + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef LIBQOS_VIRTIO_PCI_MODERN_H +#define LIBQOS_VIRTIO_PCI_MODERN_H + +#include "virtio-pci.h" + +bool qvirtio_pci_init_virtio_1(QVirtioPCIDevice *dev); + +#endif /* LIBQOS_VIRTIO_PCI_MODERN_H */ diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index 11866f7772..62851c29bb 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -22,6 +22,8 @@ #include "hw/pci/pci.h" #include "hw/pci/pci_regs.h" +#include "virtio-pci-modern.h" + /* virtio-pci is a superclass of all virtio-xxx-pci devices; * the relation between virtio-pci and virtio-xxx-pci is implicit, * and therefore virtio-pci does not produce virtio and is not @@ -401,7 +403,9 @@ static void qvirtio_pci_init_from_pcidev(QVirtioPCIDevice *dev, QPCIDevice *pci_ dev->pdev = pci_dev; dev->config_msix_entry = -1; - qvirtio_pci_init_legacy(dev); + if (!qvirtio_pci_init_virtio_1(dev)) { + qvirtio_pci_init_legacy(dev); + } /* each virtio-xxx-pci device should override at least this function */ dev->obj.get_driver = NULL; diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h index 6b3a385b06..294d5567ee 100644 --- a/tests/libqos/virtio-pci.h +++ b/tests/libqos/virtio-pci.h @@ -27,6 +27,13 @@ typedef struct QVirtioPCIDevice { uint32_t config_msix_data; int bar_idx; + + /* VIRTIO 1.0 */ + uint32_t common_cfg_offset; + uint32_t notify_cfg_offset; + uint32_t notify_off_multiplier; + uint32_t isr_cfg_offset; + uint32_t device_cfg_offset; } QVirtioPCIDevice; struct QVirtioPCIMSIXOps { @@ -43,6 +50,9 @@ typedef struct QVirtQueuePCI { uint16_t msix_entry; uint64_t msix_addr; uint32_t msix_data; + + /* VIRTIO 1.0 */ + uint64_t notify_offset; } QVirtQueuePCI; void virtio_pci_init(QVirtioPCIDevice *dev, QPCIBus *bus, QPCIAddress * addr); From 909c548c539b4136e3abf2377980dad6274c4c2e Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 21 Oct 2019 16:03:43 +0100 Subject: [PATCH 25/25] virtio: drop unused virtio_device_stop_ioeventfd() function virtio_device_stop_ioeventfd() has not been used since commit 310837de6c1e0badfd736b1b316b1698c53120a7 ("virtio: introduce grab/release_ioeventfd to fix vhost") in 2016. Nowadays ioeventfd is stopped implicitly by the virtio transport when lifecycle events such as the VM pausing or device unplug occur. Cc: Paolo Bonzini Signed-off-by: Stefan Hajnoczi Message-Id: <20191021150343.30742-1-stefanha@redhat.com> Reviewed-by: Stefano Garzarella Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 8 -------- include/hw/virtio/virtio.h | 1 - 2 files changed, 9 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 828c27de1f..2e91dec567 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -3665,14 +3665,6 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) } } -void virtio_device_stop_ioeventfd(VirtIODevice *vdev) -{ - BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); - VirtioBusState *vbus = VIRTIO_BUS(qbus); - - virtio_bus_stop_ioeventfd(vbus); -} - int virtio_device_grab_ioeventfd(VirtIODevice *vdev) { BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 40ddeafadb..5560f4a5ea 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -308,7 +308,6 @@ EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq); void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, bool with_irqfd); int virtio_device_start_ioeventfd(VirtIODevice *vdev); -void virtio_device_stop_ioeventfd(VirtIODevice *vdev); int virtio_device_grab_ioeventfd(VirtIODevice *vdev); void virtio_device_release_ioeventfd(VirtIODevice *vdev); bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);