From a8bba0ada41b228e4dd5113a02f20bbc0f5d6f0a Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 28 Jun 2016 00:12:03 +0200 Subject: [PATCH 01/14] virtio: Tell the user what went wrong when event_notifier_init failed event_notifier_init() can fail in real life, for example when there are not enough open file handles available (EMFILE) when using a lot of devices. So instead of leaving the average user with a cryptic error number only, print out a proper error message with strerror() instead, so that the user has a better way to figure out what is going on and that using "ulimit -n" might help here for example. Signed-off-by: Thomas Huth Reviewed-by: Eric Blake Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-bus.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index a85b7c8abe..14927935ae 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -164,7 +164,8 @@ static int set_host_notifier_internal(DeviceState *proxy, VirtioBusState *bus, if (assign) { r = event_notifier_init(notifier, 1); if (r < 0) { - error_report("%s: unable to init event notifier: %d", __func__, r); + error_report("%s: unable to init event notifier: %s (%d)", + __func__, strerror(-r), r); return r; } virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); From e3aab6c7f37a288452218ebdbfe39db41382495d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 6 Sep 2016 13:36:51 +0300 Subject: [PATCH 02/14] virtio-pci: use size from correct structure PIO MR registration should use size from the correct notify struct. Doesn't affect any visible behaviour because the field values are the same (both are 4). Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 755f9218b7..268fd8ebb2 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1520,7 +1520,7 @@ static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy) ¬ify_pio_ops, virtio_bus_get_device(&proxy->bus), "virtio-pci-notify-pio", - proxy->notify.size); + proxy->notify_pio.size); } static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy, From a4d3c8347627203d8c17172a3f4d9c4f39fee6e2 Mon Sep 17 00:00:00 2001 From: "Longpeng(Mike)" Date: Wed, 7 Sep 2016 13:21:12 +0800 Subject: [PATCH 03/14] pc: Add 2.8 machine This will used by the next patch. Signed-off-by: Longpeng(Mike) Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 16 +++++++++++++--- hw/i386/pc_q35.c | 13 +++++++++++-- include/hw/i386/pc.h | 3 +++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 2af8888f0a..a54a468c0a 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -437,13 +437,25 @@ static void pc_i440fx_machine_options(MachineClass *m) m->default_display = "std"; } -static void pc_i440fx_2_7_machine_options(MachineClass *m) +static void pc_i440fx_2_8_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; } +DEFINE_I440FX_MACHINE(v2_8, "pc-i440fx-2.8", NULL, + pc_i440fx_2_8_machine_options); + + +static void pc_i440fx_2_7_machine_options(MachineClass *m) +{ + pc_i440fx_2_8_machine_options(m); + m->is_default = 0; + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_7); +} + DEFINE_I440FX_MACHINE(v2_7, "pc-i440fx-2.7", NULL, pc_i440fx_2_7_machine_options); @@ -452,8 +464,6 @@ static void pc_i440fx_2_6_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_2_7_machine_options(m); - m->is_default = 0; - m->alias = NULL; pcmc->legacy_cpu_hotplug = true; SET_MACHINE_COMPAT(m, PC_COMPAT_2_6); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 3cbcbb0c7e..0b214f24c9 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -293,12 +293,22 @@ static void pc_q35_machine_options(MachineClass *m) m->has_dynamic_sysbus = true; } -static void pc_q35_2_7_machine_options(MachineClass *m) +static void pc_q35_2_8_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v2_8, "pc-q35-2.8", NULL, + pc_q35_2_8_machine_options); + +static void pc_q35_2_7_machine_options(MachineClass *m) +{ + pc_q35_2_8_machine_options(m); + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_7); +} + DEFINE_Q35_MACHINE(v2_7, "pc-q35-2.7", NULL, pc_q35_2_7_machine_options); @@ -306,7 +316,6 @@ static void pc_q35_2_6_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_2_7_machine_options(m); - m->alias = NULL; pcmc->legacy_cpu_hotplug = true; SET_MACHINE_COMPAT(m, PC_COMPAT_2_6); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c4316c30a8..8ad6f15b3e 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -368,6 +368,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +#define PC_COMPAT_2_7 \ + HW_COMPAT_2_7 + #define PC_COMPAT_2_6 \ HW_COMPAT_2_6 \ {\ From 14c985cffa6cb177fc01a163d8bcf227c104718c Mon Sep 17 00:00:00 2001 From: "Longpeng(Mike)" Date: Wed, 7 Sep 2016 13:21:13 +0800 Subject: [PATCH 04/14] target-i386: present virtual L3 cache info for vcpus Some software algorithms are based on the hardware's cache info, for example, for x86 linux kernel, when cpu1 want to wakeup a task on cpu2, cpu1 will trigger a resched IPI and told cpu2 to do the wakeup if they don't share low level cache. Oppositely, cpu1 will access cpu2's runqueue directly if they share llc. The relevant linux-kernel code as bellow: static void ttwu_queue(struct task_struct *p, int cpu) { struct rq *rq = cpu_rq(cpu); ...... if (... && !cpus_share_cache(smp_processor_id(), cpu)) { ...... ttwu_queue_remote(p, cpu); /* will trigger RES IPI */ return; } ...... ttwu_do_activate(rq, p, 0); /* access target's rq directly */ ...... } In real hardware, the cpus on the same socket share L3 cache, so one won't trigger a resched IPIs when wakeup a task on others. But QEMU doesn't present a virtual L3 cache info for VM, then the linux guest will trigger lots of RES IPIs under some workloads even if the virtual cpus belongs to the same virtual socket. For KVM, there will be lots of vmexit due to guest send IPIs. The workload is a SAP HANA's testsuite, we run it one round(about 40 minuates) and observe the (Suse11sp3)Guest's amounts of RES IPIs which triggering during the period: No-L3 With-L3(applied this patch) cpu0: 363890 44582 cpu1: 373405 43109 cpu2: 340783 43797 cpu3: 333854 43409 cpu4: 327170 40038 cpu5: 325491 39922 cpu6: 319129 42391 cpu7: 306480 41035 cpu8: 161139 32188 cpu9: 164649 31024 cpu10: 149823 30398 cpu11: 149823 32455 cpu12: 164830 35143 cpu13: 172269 35805 cpu14: 179979 33898 cpu15: 194505 32754 avg: 268963.6 40129.8 The VM's topology is "1*socket 8*cores 2*threads". After present virtual L3 cache info for VM, the amounts of RES IPIs in guest reduce 85%. For KVM, vcpus send IPIs will cause vmexit which is expensive, so it can cause severe performance degradation. We had tested the overall system performance if vcpus actually run on sparate physical socket. With L3 cache, the performance improves 7.2%~33.1%(avg:15.7%). Signed-off-by: Longpeng(Mike) Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/i386/pc.h | 9 ++++++++ target-i386/cpu.c | 49 +++++++++++++++++++++++++++++++++++++++----- target-i386/cpu.h | 6 ++++++ 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 8ad6f15b3e..ebba151097 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -368,7 +368,16 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +#define PC_COMPAT_2_8 \ + {\ + .driver = TYPE_X86_CPU,\ + .property = "l3-cache",\ + .value = "off",\ + }, + + #define PC_COMPAT_2_7 \ + PC_COMPAT_2_8 \ HW_COMPAT_2_7 #define PC_COMPAT_2_6 \ diff --git a/target-i386/cpu.c b/target-i386/cpu.c index ec674dcb73..5a5299ad3c 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -57,6 +57,7 @@ #define CPUID_2_L1D_32KB_8WAY_64B 0x2c #define CPUID_2_L1I_32KB_8WAY_64B 0x30 #define CPUID_2_L2_2MB_8WAY_64B 0x7d +#define CPUID_2_L3_16MB_16WAY_64B 0x4d /* CPUID Leaf 4 constants: */ @@ -131,11 +132,18 @@ #define L2_LINES_PER_TAG 1 #define L2_SIZE_KB_AMD 512 -/* No L3 cache: */ +/* Level 3 unified cache: */ #define L3_SIZE_KB 0 /* disabled */ #define L3_ASSOCIATIVITY 0 /* disabled */ #define L3_LINES_PER_TAG 0 /* disabled */ #define L3_LINE_SIZE 0 /* disabled */ +#define L3_N_LINE_SIZE 64 +#define L3_N_ASSOCIATIVITY 16 +#define L3_N_SETS 16384 +#define L3_N_PARTITIONS 1 +#define L3_N_DESCRIPTOR CPUID_2_L3_16MB_16WAY_64B +#define L3_N_LINES_PER_TAG 1 +#define L3_N_SIZE_KB_AMD 16384 /* TLB definitions: */ @@ -2279,6 +2287,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, { X86CPU *cpu = x86_env_get_cpu(env); CPUState *cs = CPU(cpu); + uint32_t pkg_offset; /* test if maximum index reached */ if (index & 0x80000000) { @@ -2332,7 +2341,11 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } *eax = 1; /* Number of CPUID[EAX=2] calls required */ *ebx = 0; - *ecx = 0; + if (!cpu->enable_l3_cache) { + *ecx = 0; + } else { + *ecx = L3_N_DESCRIPTOR; + } *edx = (L1D_DESCRIPTOR << 16) | \ (L1I_DESCRIPTOR << 8) | \ (L2_DESCRIPTOR); @@ -2378,6 +2391,25 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = L2_SETS - 1; *edx = CPUID_4_NO_INVD_SHARING; break; + case 3: /* L3 cache info */ + if (!cpu->enable_l3_cache) { + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + break; + } + *eax |= CPUID_4_TYPE_UNIFIED | \ + CPUID_4_LEVEL(3) | \ + CPUID_4_SELF_INIT_LEVEL; + pkg_offset = apicid_pkg_offset(cs->nr_cores, cs->nr_threads); + *eax |= ((1 << pkg_offset) - 1) << 14; + *ebx = (L3_N_LINE_SIZE - 1) | \ + ((L3_N_PARTITIONS - 1) << 12) | \ + ((L3_N_ASSOCIATIVITY - 1) << 22); + *ecx = L3_N_SETS - 1; + *edx = CPUID_4_INCLUSIVE | CPUID_4_COMPLEX_IDX; + break; default: /* end of info */ *eax = 0; *ebx = 0; @@ -2589,9 +2621,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = (L2_SIZE_KB_AMD << 16) | \ (AMD_ENC_ASSOC(L2_ASSOCIATIVITY) << 12) | \ (L2_LINES_PER_TAG << 8) | (L2_LINE_SIZE); - *edx = ((L3_SIZE_KB/512) << 18) | \ - (AMD_ENC_ASSOC(L3_ASSOCIATIVITY) << 12) | \ - (L3_LINES_PER_TAG << 8) | (L3_LINE_SIZE); + if (!cpu->enable_l3_cache) { + *edx = ((L3_SIZE_KB / 512) << 18) | \ + (AMD_ENC_ASSOC(L3_ASSOCIATIVITY) << 12) | \ + (L3_LINES_PER_TAG << 8) | (L3_LINE_SIZE); + } else { + *edx = ((L3_N_SIZE_KB_AMD / 512) << 18) | \ + (AMD_ENC_ASSOC(L3_N_ASSOCIATIVITY) << 12) | \ + (L3_N_LINES_PER_TAG << 8) | (L3_N_LINE_SIZE); + } break; case 0x80000007: *eax = 0; @@ -3368,6 +3406,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), + DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/target-i386/cpu.h b/target-i386/cpu.h index cf14bcb6d0..bb3ffda244 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -1207,6 +1207,12 @@ struct X86CPU { */ bool enable_lmce; + /* Compatibility bits for old machine types. + * If true present virtual l3 cache for VM, the vcpus in the same virtual + * socket share an virtual l3 cache. + */ + bool enable_l3_cache; + /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb; From d9997d89a4a09a330a056929d06d4b7b0b7a8239 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Wed, 7 Sep 2016 18:02:25 +0300 Subject: [PATCH 05/14] virtio-pci: reduce modern_mem_bar size Currently each VQ Notification Virtio Capability is allocated on a different page. The idea is to enable split drivers within guests, however there are no known plans to do that. The allocation will result in a 8MB BAR, more than various guest firmwares pre-allocates for PCI Bridges hotplug process. Reserve 4 bytes per VQ by default and add a new parameter "page-per-vq" to be used with split drivers. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 22 +++++++++++++++------- hw/virtio/virtio-pci.h | 5 +++++ include/hw/compat.h | 6 +++++- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 268fd8ebb2..89bc196580 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -294,6 +294,12 @@ static void virtio_pci_ioeventfd_set_disabled(DeviceState *d, bool disabled) #define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000 +static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy) +{ + return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ? + QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4; +} + static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier, int n, bool assign) { @@ -307,7 +313,7 @@ static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier, MemoryRegion *modern_mr = &proxy->notify.mr; MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr; MemoryRegion *legacy_mr = &proxy->bar; - hwaddr modern_addr = QEMU_VIRTIO_PCI_QUEUE_MEM_MULT * + hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) * virtio_get_queue_index(vq); hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY; @@ -1370,7 +1376,8 @@ static void virtio_pci_notify_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { VirtIODevice *vdev = opaque; - unsigned queue = addr / QEMU_VIRTIO_PCI_QUEUE_MEM_MULT; + VirtIOPCIProxy *proxy = VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent); + unsigned queue = addr / virtio_pci_queue_mem_mult(proxy); if (queue < VIRTIO_QUEUE_MAX) { virtio_queue_notify(vdev, queue); @@ -1609,7 +1616,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) struct virtio_pci_notify_cap notify = { .cap.cap_len = sizeof notify, .notify_off_multiplier = - cpu_to_le32(QEMU_VIRTIO_PCI_QUEUE_MEM_MULT), + cpu_to_le32(virtio_pci_queue_mem_mult(proxy)), }; struct virtio_pci_cfg_cap cfg = { .cap.cap_len = sizeof cfg, @@ -1744,8 +1751,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG; proxy->notify.offset = 0x3000; - proxy->notify.size = - QEMU_VIRTIO_PCI_QUEUE_MEM_MULT * VIRTIO_QUEUE_MAX; + proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX; proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG; proxy->notify_pio.offset = 0x0; @@ -1754,8 +1760,8 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) /* subclasses can enforce modern, so do this unconditionally */ memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci", - 2 * QEMU_VIRTIO_PCI_QUEUE_MEM_MULT * - VIRTIO_QUEUE_MAX); + /* PCI BAR regions must be powers of 2 */ + pow2ceil(proxy->notify.offset + proxy->notify.size)); memory_region_init_alias(&proxy->modern_cfg, OBJECT(proxy), @@ -1833,6 +1839,8 @@ static Property virtio_pci_properties[] = { VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false), DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false), + DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 25fbf8a375..a7455126f5 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -64,6 +64,7 @@ enum { VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, + VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, }; /* Need to activate work-arounds for buggy guests at vmstate load. */ @@ -84,6 +85,10 @@ enum { #define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \ (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT) +/* page per vq flag to be used by split drivers within guests */ +#define VIRTIO_PCI_FLAG_PAGE_PER_VQ \ + (1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT) + typedef struct { MSIMessage msg; int virq; diff --git a/include/hw/compat.h b/include/hw/compat.h index 08dd4fbec2..a1d6694492 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -2,7 +2,11 @@ #define HW_COMPAT_H #define HW_COMPAT_2_7 \ - /* empty */ + {\ + .driver = "virtio-pci",\ + .property = "page-per-vq",\ + .value = "on",\ + }, #define HW_COMPAT_2_6 \ {\ From 4b7f91ed0270a371e1933efa21ba600b6da23ab9 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 7 Sep 2016 11:51:25 -0400 Subject: [PATCH 06/14] virtio: zero vq->inuse in virtio_reset() vq->inuse must be zeroed upon device reset like most other virtqueue fields. In theory, virtio_reset() just needs assert(vq->inuse == 0) since devices must clean up in-flight requests during reset (requests cannot not be leaked!). In practice, it is difficult to achieve vq->inuse == 0 across reset because balloon, blk, 9p, etc implement various different strategies for cleaning up requests. Most devices call g_free(elem) directly without telling virtio.c that the VirtQueueElement is cleaned up. Therefore vq->inuse is not decremented during reset. This patch zeroes vq->inuse and trusts that devices are not leaking VirtQueueElements across reset. I will send a follow-up series that refactors request life-cycle across all devices and converts vq->inuse = 0 into assert(vq->inuse == 0) but this more invasive approach is not appropriate for stable trees. Signed-off-by: Stefan Hajnoczi Cc: qemu-stable Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Ladi Prosek --- hw/virtio/virtio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 74c085c74d..e8a13a50bf 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -822,6 +822,7 @@ void virtio_reset(void *opaque) vdev->vq[i].signalled_used_valid = false; vdev->vq[i].notification = true; vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; + vdev->vq[i].inuse = 0; } } From 104e70cae78bd4afd95d948c6aff188f10508a9c Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Wed, 7 Sep 2016 17:20:47 +0200 Subject: [PATCH 07/14] virtio-balloon: discard virtqueue element on reset The one pending element is being freed but not discarded on device reset, which causes svq->inuse to creep up, eventually hitting the "Virtqueue size exceeded" error. Properly discarding the element on device reset makes sure that its buffers are unmapped and the inuse counter stays balanced. Cc: Michael S. Tsirkin Cc: Roman Kagan Cc: Stefan Hajnoczi Signed-off-by: Ladi Prosek Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 5af429a58a..ad4189a862 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -463,6 +463,7 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) VirtIOBalloon *s = VIRTIO_BALLOON(vdev); if (s->stats_vq_elem != NULL) { + virtqueue_discard(s->svq, s->stats_vq_elem, 0); g_free(s->stats_vq_elem); s->stats_vq_elem = NULL; } From 297a75e6c55d91db2704a3d6e4029d99c7df51fd Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 7 Sep 2016 17:20:48 +0200 Subject: [PATCH 08/14] virtio: add virtqueue_rewind() virtqueue_discard() requires a VirtQueueElement but virtio-balloon does not migrate its in-use element. Introduce a new function that is similar to virtqueue_discard() but doesn't require a VirtQueueElement. This will allow virtio-balloon to access element again after migration with the usual proviso that the guest may have modified the vring since last time. Cc: Michael S. Tsirkin Cc: Roman Kagan Cc: Stefan Hajnoczi Signed-off-by: Ladi Prosek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 22 ++++++++++++++++++++++ include/hw/virtio/virtio.h | 1 + 2 files changed, 23 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index e8a13a50bf..fcf3358d6c 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -272,6 +272,28 @@ void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, virtqueue_unmap_sg(vq, elem, len); } +/* virtqueue_rewind: + * @vq: The #VirtQueue + * @num: Number of elements to push back + * + * Pretend that elements weren't popped from the virtqueue. The next + * virtqueue_pop() will refetch the oldest element. + * + * Use virtqueue_discard() instead if you have a VirtQueueElement. + * + * Returns: true on success, false if @num is greater than the number of in use + * elements. + */ +bool virtqueue_rewind(VirtQueue *vq, unsigned int num) +{ + if (num > vq->inuse) { + return false; + } + vq->last_avail_idx -= num; + vq->inuse -= num; + return true; +} + void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index d2490c1975..f05559d569 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -154,6 +154,7 @@ void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, void virtqueue_flush(VirtQueue *vq, unsigned int count); void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len); +bool virtqueue_rewind(VirtQueue *vq, unsigned int num); void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx); From 4a1e48becab81020adfb74b22c76a595f2d02a01 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Wed, 7 Sep 2016 17:20:49 +0200 Subject: [PATCH 09/14] virtio-balloon: fix stats vq migration The statistics virtqueue is not migrated properly because virtio-balloon does not include s->stats_vq_elem in the migration stream. After migration the statistics virtqueue hangs because the host never completes the last element (s->stats_vq_elem is NULL on the destination QEMU). Therefore the guest never submits new elements and the virtqueue is hung. Instead of changing the migration stream format in an incompatible way, detect the migration case and rewind the virtqueue so the last element can be completed. Cc: Michael S. Tsirkin Cc: Roman Kagan Cc: Stefan Hajnoczi Suggested-by: Roman Kagan Signed-off-by: Ladi Prosek Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index ad4189a862..49a2f4aade 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -469,6 +469,18 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) } } +static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) +{ + VirtIOBalloon *s = VIRTIO_BALLOON(vdev); + + if (!s->stats_vq_elem && vdev->vm_running && + (status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) { + /* poll stats queue for the element we have discarded when the VM + * was stopped */ + virtio_balloon_receive_stats(vdev, s->svq); + } +} + static void virtio_balloon_instance_init(Object *obj) { VirtIOBalloon *s = VIRTIO_BALLOON(obj); @@ -506,6 +518,7 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data) vdc->get_features = virtio_balloon_get_features; vdc->save = virtio_balloon_save_device; vdc->load = virtio_balloon_load_device; + vdc->set_status = virtio_balloon_set_status; } static const TypeInfo virtio_balloon_info = { From 3eff376977c457475272a34d243dac6af7cd6a47 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 9 Sep 2016 11:00:59 +0200 Subject: [PATCH 10/14] virtio-pci: error out when both legacy and modern modes are disabled Without presuming if we got there because of a user mistake or some more subtle bug in the tooling, it really does not make sense to implement a non-functional device. Signed-off-by: Greg Kurz Reviewed-by: Marcel Apfelbaum Reviewed-by: Cornelia Huck Signed-off-by: Greg Kurz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 89bc196580..d70c9865fa 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1776,6 +1776,14 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } + if (!(virtio_pci_modern(proxy) || virtio_pci_legacy(proxy))) { + error_setg(errp, "device cannot work as neither modern nor legacy mode" + " is enabled"); + error_append_hint(errp, "Set either disable-modern or disable-legacy" + " to off\n"); + return; + } + if (pcie_port && pci_is_express(pci_dev)) { int pos; From 96a3d98d2cdbd897ff5ab33427aa4cfb94077665 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 1 Aug 2016 16:07:58 +0800 Subject: [PATCH 11/14] vhost: don't set vring call if no vector We used to set vring call fd unconditionally even if guest driver does not use MSIX for this vritqueue at all. This will cause lots of unnecessary userspace access and other checks for drivers does not use interrupt at all (e.g virtio-net pmd). So check and clean vring call fd if guest does not use any vector for this virtqueue at all. Perf diffs (on rx) shows lots of cpus wasted on vhost_signal() were saved: # 28.12% -27.82% [vhost] [k] vhost_signal 14.44% -1.69% [kernel.vmlinux] [k] copy_user_generic_string 7.05% +1.53% [kernel.vmlinux] [k] __free_page_frag 6.51% +5.53% [vhost] [k] vhost_get_vq_desc ... Pktgen tests shows 15.8% improvement on rx pps and 6.5% on tx pps. Before: RX 2.08Mpps TX 1.35Mpps After: RX 2.41Mpps TX 1.44Mpps Signed-off-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 3d0c807d0e..bd051ab2e1 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -822,6 +822,9 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, struct vhost_virtqueue *vq, unsigned idx) { + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusState *vbus = VIRTIO_BUS(qbus); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); hwaddr s, l, a; int r; int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); @@ -912,8 +915,19 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, vhost_virtqueue_mask(dev, vdev, idx, false); } + if (k->query_guest_notifiers && + k->query_guest_notifiers(qbus->parent) && + virtio_queue_vector(vdev, idx) == VIRTIO_NO_VECTOR) { + file.fd = -1; + r = dev->vhost_ops->vhost_set_vring_call(dev, &file); + if (r) { + goto fail_vector; + } + } + return 0; +fail_vector: fail_kick: fail_alloc: cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx), From 71d19fc51387ba599f6222057fd69c58b3c0ce7e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 9 Sep 2016 20:56:27 +0300 Subject: [PATCH 12/14] virtio-pci: minor refactoring !legacy && !modern is shorter than !(legacy || modern). I also perfer this (less ()s) as a matter of taste. Cc: Greg Kurz Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index d70c9865fa..bf19808e87 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1776,7 +1776,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } - if (!(virtio_pci_modern(proxy) || virtio_pci_legacy(proxy))) { + if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) { error_setg(errp, "device cannot work as neither modern nor legacy mode" " is enabled"); error_append_hint(errp, "Set either disable-modern or disable-legacy" From 947b205fdb46941453f0dc43316e13741d45834c Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 6 Sep 2016 22:19:22 +0300 Subject: [PATCH 13/14] tests/acpi: speedup acpi tests Use kvm acceleration if available. Disable kernel-irqchip and use qemu64 cpu for both kvm and tcg cases. Using kvm acceleration saves about a second and disabling kernel-irqchip has no visible performance impact. Acked-by: Michael S. Tsirkin Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/bios-tables-test.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index de4019e57d..7e27ea95ad 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -711,9 +711,12 @@ static void test_acpi_one(const char *params, test_data *data) { char *args; - args = g_strdup_printf("-net none -display none %s " + /* Disable kernel irqchip to be able to override apic irq0. */ + args = g_strdup_printf("-machine %s,accel=%s,kernel-irqchip=off " + "-net none -display none %s " "-drive id=hd0,if=none,file=%s,format=raw " "-device ide-hd,drive=hd0 ", + data->machine, "kvm:tcg", params ? params : "", disk); qtest_start(args); @@ -758,7 +761,7 @@ static void test_acpi_piix4_tcg(void) data.machine = MACHINE_PC; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); - test_acpi_one("-machine accel=tcg", &data); + test_acpi_one(NULL, &data); free_test_data(&data); } @@ -771,7 +774,7 @@ static void test_acpi_piix4_tcg_bridge(void) data.variant = ".bridge"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); - test_acpi_one("-machine accel=tcg -device pci-bridge,chassis_nr=1", &data); + test_acpi_one("-device pci-bridge,chassis_nr=1", &data); free_test_data(&data); } @@ -783,7 +786,7 @@ static void test_acpi_q35_tcg(void) data.machine = MACHINE_Q35; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); - test_acpi_one("-machine q35,accel=tcg", &data); + test_acpi_one(NULL, &data); free_test_data(&data); } @@ -796,7 +799,7 @@ static void test_acpi_q35_tcg_bridge(void) data.variant = ".bridge"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); - test_acpi_one("-machine q35,accel=tcg -device pci-bridge,chassis_nr=1", + test_acpi_one("-device pci-bridge,chassis_nr=1", &data); free_test_data(&data); } @@ -808,8 +811,7 @@ static void test_acpi_piix4_tcg_cphp(void) memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".cphp"; - test_acpi_one("-machine accel=tcg" - " -smp 2,cores=3,sockets=2,maxcpus=6", + test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6", &data); free_test_data(&data); } @@ -821,8 +823,7 @@ static void test_acpi_q35_tcg_cphp(void) memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".cphp"; - test_acpi_one("-machine q35,accel=tcg" - " -smp 2,cores=3,sockets=2,maxcpus=6", + test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6", &data); free_test_data(&data); } @@ -840,7 +841,7 @@ static void test_acpi_q35_tcg_ipmi(void) data.variant = ".ipmibt"; data.required_struct_types = ipmi_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(ipmi_required_struct_types); - test_acpi_one("-machine q35,accel=tcg -device ipmi-bmc-sim,id=bmc0" + test_acpi_one("-device ipmi-bmc-sim,id=bmc0" " -device isa-ipmi-bt,bmc=bmc0", &data); free_test_data(&data); @@ -858,7 +859,7 @@ static void test_acpi_piix4_tcg_ipmi(void) data.variant = ".ipmikcs"; data.required_struct_types = ipmi_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(ipmi_required_struct_types); - test_acpi_one("-machine accel=tcg -device ipmi-bmc-sim,id=bmc0" + test_acpi_one("-device ipmi-bmc-sim,id=bmc0" " -device isa-ipmi-kcs,irq=0,bmc=bmc0", &data); free_test_data(&data); @@ -876,14 +877,14 @@ int main(int argc, char *argv[]) g_test_init(&argc, &argv, NULL); if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { - qtest_add_func("acpi/piix4/tcg", test_acpi_piix4_tcg); - qtest_add_func("acpi/piix4/tcg/bridge", test_acpi_piix4_tcg_bridge); - qtest_add_func("acpi/q35/tcg", test_acpi_q35_tcg); - qtest_add_func("acpi/q35/tcg/bridge", test_acpi_q35_tcg_bridge); - qtest_add_func("acpi/piix4/tcg/ipmi", test_acpi_piix4_tcg_ipmi); - qtest_add_func("acpi/q35/tcg/ipmi", test_acpi_q35_tcg_ipmi); - qtest_add_func("acpi/piix4/tcg/cpuhp", test_acpi_piix4_tcg_cphp); - qtest_add_func("acpi/q35/tcg/cpuhp", test_acpi_q35_tcg_cphp); + qtest_add_func("acpi/piix4", test_acpi_piix4_tcg); + qtest_add_func("acpi/piix4/bridge", test_acpi_piix4_tcg_bridge); + qtest_add_func("acpi/q35", test_acpi_q35_tcg); + qtest_add_func("acpi/q35/bridge", test_acpi_q35_tcg_bridge); + qtest_add_func("acpi/piix4/ipmi", test_acpi_piix4_tcg_ipmi); + qtest_add_func("acpi/q35/ipmi", test_acpi_q35_tcg_ipmi); + qtest_add_func("acpi/piix4/cpuhp", test_acpi_piix4_tcg_cphp); + qtest_add_func("acpi/q35/cpuhp", test_acpi_q35_tcg_cphp); } ret = g_test_run(); boot_sector_cleanup(disk); From fc0b9b0e1cbb49017ea882758634cf876be17bc3 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 16 Aug 2016 13:27:22 +0100 Subject: [PATCH 14/14] vhost-vsock: add virtio sockets device Implement the new virtio sockets device for host<->guest communication using the Sockets API. Most of the work is done in a vhost kernel driver so that virtio-vsock can hook into the AF_VSOCK address family. The QEMU vhost-vsock device handles configuration and live migration while the rx/tx happens in the vhost_vsock.ko Linux kernel driver. The vsock device must be given a CID (host-wide unique address): # qemu -device vhost-vsock-pci,id=vhost-vsock-pci0,guest-cid=3 ... For more information see: http://qemu-project.org/Features/VirtioVsock [Endianness fixes and virtio-ccw support by Claudio Imbrenda ] Signed-off-by: Stefan Hajnoczi [mst: rebase to master] Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- configure | 10 + hw/s390x/virtio-ccw.c | 54 ++++ hw/s390x/virtio-ccw.h | 15 ++ hw/virtio/Makefile.objs | 2 + hw/virtio/vhost-backend.c | 17 ++ hw/virtio/vhost-vsock.c | 417 ++++++++++++++++++++++++++++++ hw/virtio/virtio-pci.c | 51 ++++ hw/virtio/virtio-pci.h | 18 ++ include/hw/pci/pci.h | 1 + include/hw/virtio/vhost-backend.h | 5 + include/hw/virtio/vhost-vsock.h | 41 +++ 11 files changed, 631 insertions(+) create mode 100644 hw/virtio/vhost-vsock.c create mode 100644 include/hw/virtio/vhost-vsock.h diff --git a/configure b/configure index 5a9bda18b5..331c36fb84 100755 --- a/configure +++ b/configure @@ -229,6 +229,7 @@ xfs="" vhost_net="no" vhost_scsi="no" +vhost_vsock="no" kvm="no" rdma="" gprof="no" @@ -674,6 +675,7 @@ Haiku) kvm="yes" vhost_net="yes" vhost_scsi="yes" + vhost_vsock="yes" QEMU_INCLUDES="-I\$(SRC_PATH)/linux-headers -I$(pwd)/linux-headers $QEMU_INCLUDES" ;; esac @@ -1017,6 +1019,10 @@ for opt do ;; --enable-vhost-scsi) vhost_scsi="yes" ;; + --disable-vhost-vsock) vhost_vsock="no" + ;; + --enable-vhost-vsock) vhost_vsock="yes" + ;; --disable-opengl) opengl="no" ;; --enable-opengl) opengl="yes" @@ -4883,6 +4889,7 @@ echo "uuid support $uuid" echo "libcap-ng support $cap_ng" echo "vhost-net support $vhost_net" echo "vhost-scsi support $vhost_scsi" +echo "vhost-vsock support $vhost_vsock" echo "Trace backends $trace_backends" if have_backend "simple"; then echo "Trace output file $trace_file-" @@ -5264,6 +5271,9 @@ fi if test "$vhost_net" = "yes" ; then echo "CONFIG_VHOST_NET_USED=y" >> $config_host_mak fi +if test "$vhost_vsock" = "yes" ; then + echo "CONFIG_VHOST_VSOCK=y" >> $config_host_mak +fi if test "$blobs" = "yes" ; then echo "INSTALL_BLOBS=yes" >> $config_host_mak fi diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index a554a24d06..96789569a7 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -1658,6 +1658,57 @@ static const TypeInfo virtio_ccw_9p_info = { }; #endif +#ifdef CONFIG_VHOST_VSOCK + +static Property vhost_vsock_ccw_properties[] = { + DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), + DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, + VIRTIO_CCW_MAX_REV), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_vsock_ccw_realize(VirtioCcwDevice *ccw_dev, Error **errp) +{ + VHostVSockCCWState *dev = VHOST_VSOCK_CCW(ccw_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + Error *err = NULL; + + qdev_set_parent_bus(vdev, BUS(&ccw_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", &err); + if (err) { + error_propagate(errp, err); + } +} + +static void vhost_vsock_ccw_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass); + + k->realize = vhost_vsock_ccw_realize; + k->exit = virtio_ccw_exit; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->props = vhost_vsock_ccw_properties; + dc->reset = virtio_ccw_reset; +} + +static void vhost_vsock_ccw_instance_init(Object *obj) +{ + VHostVSockCCWState *dev = VHOST_VSOCK_CCW(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_VSOCK); +} + +static const TypeInfo vhost_vsock_ccw_info = { + .name = TYPE_VHOST_VSOCK_CCW, + .parent = TYPE_VIRTIO_CCW_DEVICE, + .instance_size = sizeof(VHostVSockCCWState), + .instance_init = vhost_vsock_ccw_instance_init, + .class_init = vhost_vsock_ccw_class_init, +}; +#endif + static void virtio_ccw_register(void) { type_register_static(&virtio_ccw_bus_info); @@ -1674,6 +1725,9 @@ static void virtio_ccw_register(void) #ifdef CONFIG_VIRTFS type_register_static(&virtio_ccw_9p_info); #endif +#ifdef CONFIG_VHOST_VSOCK + type_register_static(&vhost_vsock_ccw_info); +#endif } type_init(virtio_ccw_register) diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h index 1c6bc86316..904e357581 100644 --- a/hw/s390x/virtio-ccw.h +++ b/hw/s390x/virtio-ccw.h @@ -23,6 +23,9 @@ #include "hw/virtio/virtio-balloon.h" #include "hw/virtio/virtio-rng.h" #include "hw/virtio/virtio-bus.h" +#ifdef CONFIG_VHOST_VSOCK +#include "hw/virtio/vhost-vsock.h" +#endif /* CONFIG_VHOST_VSOCK */ #include "hw/s390x/s390_flic.h" #include "hw/s390x/css.h" @@ -197,4 +200,16 @@ typedef struct V9fsCCWState { #endif /* CONFIG_VIRTFS */ +#ifdef CONFIG_VHOST_VSOCK +#define TYPE_VHOST_VSOCK_CCW "vhost-vsock-ccw" +#define VHOST_VSOCK_CCW(obj) \ + OBJECT_CHECK(VHostVSockCCWState, (obj), TYPE_VHOST_VSOCK_CCW) + +typedef struct VHostVSockCCWState { + VirtioCcwDevice parent_obj; + VHostVSock vdev; +} VHostVSockCCWState; + +#endif /* CONFIG_VHOST_VSOCK */ + #endif diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index 3e2b175da8..e71630812e 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -5,3 +5,5 @@ common-obj-y += virtio-mmio.o obj-y += virtio.o virtio-balloon.o obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o + +obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 7681f152f3..272a5ec584 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -172,6 +172,19 @@ static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx) return idx - dev->vq_index; } +#ifdef CONFIG_VHOST_VSOCK +static int vhost_kernel_vsock_set_guest_cid(struct vhost_dev *dev, + uint64_t guest_cid) +{ + return vhost_kernel_call(dev, VHOST_VSOCK_SET_GUEST_CID, &guest_cid); +} + +static int vhost_kernel_vsock_set_running(struct vhost_dev *dev, int start) +{ + return vhost_kernel_call(dev, VHOST_VSOCK_SET_RUNNING, &start); +} +#endif /* CONFIG_VHOST_VSOCK */ + static const VhostOps kernel_ops = { .backend_type = VHOST_BACKEND_TYPE_KERNEL, .vhost_backend_init = vhost_kernel_init, @@ -197,6 +210,10 @@ static const VhostOps kernel_ops = { .vhost_set_owner = vhost_kernel_set_owner, .vhost_reset_device = vhost_kernel_reset_device, .vhost_get_vq_index = vhost_kernel_get_vq_index, +#ifdef CONFIG_VHOST_VSOCK + .vhost_vsock_set_guest_cid = vhost_kernel_vsock_set_guest_cid, + .vhost_vsock_set_running = vhost_kernel_vsock_set_running, +#endif /* CONFIG_VHOST_VSOCK */ }; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c new file mode 100644 index 0000000000..bde2456621 --- /dev/null +++ b/hw/virtio/vhost-vsock.c @@ -0,0 +1,417 @@ +/* + * Virtio vsock device + * + * Copyright 2015 Red Hat, Inc. + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include +#include "qemu/osdep.h" +#include "standard-headers/linux/virtio_vsock.h" +#include "qapi/error.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "migration/migration.h" +#include "qemu/error-report.h" +#include "hw/virtio/vhost-vsock.h" +#include "qemu/iov.h" +#include "monitor/monitor.h" + +enum { + VHOST_VSOCK_SAVEVM_VERSION = 0, + + VHOST_VSOCK_QUEUE_SIZE = 128, +}; + +static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostVSock *vsock = VHOST_VSOCK(vdev); + struct virtio_vsock_config vsockcfg = {}; + + virtio_stq_p(vdev, &vsockcfg.guest_cid, vsock->conf.guest_cid); + memcpy(config, &vsockcfg, sizeof(vsockcfg)); +} + +static int vhost_vsock_set_guest_cid(VHostVSock *vsock) +{ + const VhostOps *vhost_ops = vsock->vhost_dev.vhost_ops; + int ret; + + if (!vhost_ops->vhost_vsock_set_guest_cid) { + return -ENOSYS; + } + + ret = vhost_ops->vhost_vsock_set_guest_cid(&vsock->vhost_dev, + vsock->conf.guest_cid); + if (ret < 0) { + return -errno; + } + return 0; +} + +static int vhost_vsock_set_running(VHostVSock *vsock, int start) +{ + const VhostOps *vhost_ops = vsock->vhost_dev.vhost_ops; + int ret; + + if (!vhost_ops->vhost_vsock_set_running) { + return -ENOSYS; + } + + ret = vhost_ops->vhost_vsock_set_running(&vsock->vhost_dev, start); + if (ret < 0) { + return -errno; + } + return 0; +} + +static void vhost_vsock_start(VirtIODevice *vdev) +{ + VHostVSock *vsock = VHOST_VSOCK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + int i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&vsock->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + vsock->vhost_dev.acked_features = vdev->guest_features; + ret = vhost_dev_start(&vsock->vhost_dev, vdev); + if (ret < 0) { + error_report("Error starting vhost: %d", -ret); + goto err_guest_notifiers; + } + + ret = vhost_vsock_set_running(vsock, 1); + if (ret < 0) { + error_report("Error starting vhost vsock: %d", -ret); + goto err_dev_start; + } + + /* guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < vsock->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&vsock->vhost_dev, vdev, i, false); + } + + return; + +err_dev_start: + vhost_dev_stop(&vsock->vhost_dev, vdev); +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&vsock->vhost_dev, vdev); +} + +static void vhost_vsock_stop(VirtIODevice *vdev) +{ + VHostVSock *vsock = VHOST_VSOCK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + ret = vhost_vsock_set_running(vsock, 0); + if (ret < 0) { + error_report("vhost vsock set running failed: %d", ret); + return; + } + + vhost_dev_stop(&vsock->vhost_dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&vsock->vhost_dev, vdev); +} + +static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostVSock *vsock = VHOST_VSOCK(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (vsock->vhost_dev.started == should_start) { + return; + } + + if (should_start) { + vhost_vsock_start(vdev); + } else { + vhost_vsock_stop(vdev); + } +} + +static uint64_t vhost_vsock_get_features(VirtIODevice *vdev, + uint64_t requested_features, + Error **errp) +{ + /* No feature bits used yet */ + return requested_features; +} + +static void vhost_vsock_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* Do nothing */ +} + +static void vhost_vsock_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) +{ + VHostVSock *vsock = VHOST_VSOCK(vdev); + + vhost_virtqueue_mask(&vsock->vhost_dev, vdev, idx, mask); +} + +static bool vhost_vsock_guest_notifier_pending(VirtIODevice *vdev, int idx) +{ + VHostVSock *vsock = VHOST_VSOCK(vdev); + + return vhost_virtqueue_pending(&vsock->vhost_dev, idx); +} + +static void vhost_vsock_send_transport_reset(VHostVSock *vsock) +{ + VirtQueueElement *elem; + VirtQueue *vq = vsock->event_vq; + struct virtio_vsock_event event = { + .id = cpu_to_le32(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET), + }; + + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + error_report("vhost-vsock missed transport reset event"); + return; + } + + if (elem->out_num) { + error_report("invalid vhost-vsock event virtqueue element with " + "out buffers"); + goto out; + } + + if (iov_from_buf(elem->in_sg, elem->in_num, 0, + &event, sizeof(event)) != sizeof(event)) { + error_report("vhost-vsock event virtqueue element is too short"); + goto out; + } + + virtqueue_push(vq, elem, sizeof(event)); + virtio_notify(VIRTIO_DEVICE(vsock), vq); + +out: + g_free(elem); +} + +static void vhost_vsock_save(QEMUFile *f, void *opaque, size_t size) +{ + VHostVSock *vsock = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(vsock); + + /* At this point, backend must be stopped, otherwise + * it might keep writing to memory. */ + assert(!vsock->vhost_dev.started); + virtio_save(vdev, f); +} + +static void vhost_vsock_post_load_timer_cleanup(VHostVSock *vsock) +{ + if (!vsock->post_load_timer) { + return; + } + + timer_del(vsock->post_load_timer); + timer_free(vsock->post_load_timer); + vsock->post_load_timer = NULL; +} + +static void vhost_vsock_post_load_timer_cb(void *opaque) +{ + VHostVSock *vsock = opaque; + + vhost_vsock_post_load_timer_cleanup(vsock); + vhost_vsock_send_transport_reset(vsock); +} + +static int vhost_vsock_load(QEMUFile *f, void *opaque, size_t size) +{ + VHostVSock *vsock = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(vsock); + int ret; + + ret = virtio_load(vdev, f, VHOST_VSOCK_SAVEVM_VERSION); + if (ret) { + return ret; + } + + if (virtio_queue_get_addr(vdev, 2)) { + /* Defer transport reset event to a vm clock timer so that virtqueue + * changes happen after migration has completed. + */ + assert(!vsock->post_load_timer); + vsock->post_load_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, + vhost_vsock_post_load_timer_cb, + vsock); + timer_mod(vsock->post_load_timer, 1); + } + + return 0; +} + +VMSTATE_VIRTIO_DEVICE(vhost_vsock, VHOST_VSOCK_SAVEVM_VERSION, + vhost_vsock_load, vhost_vsock_save); + +static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostVSock *vsock = VHOST_VSOCK(dev); + int vhostfd; + int ret; + + /* Refuse to use reserved CID numbers */ + if (vsock->conf.guest_cid <= 2) { + error_setg(errp, "guest-cid property must be greater than 2"); + return; + } + + if (vsock->conf.guest_cid > UINT32_MAX) { + error_setg(errp, "guest-cid property must be a 32-bit number"); + return; + } + + if (vsock->conf.vhostfd) { + vhostfd = monitor_fd_param(cur_mon, vsock->conf.vhostfd, errp); + if (vhostfd == -1) { + error_prepend(errp, "vhost-vsock: unable to parse vhostfd: "); + return; + } + } else { + vhostfd = open("/dev/vhost-vsock", O_RDWR); + if (vhostfd < 0) { + error_setg_errno(errp, -errno, + "vhost-vsock: failed to open vhost device"); + return; + } + } + + virtio_init(vdev, "vhost-vsock", VIRTIO_ID_VSOCK, + sizeof(struct virtio_vsock_config)); + + /* Receive and transmit queues belong to vhost */ + virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, vhost_vsock_handle_output); + virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, vhost_vsock_handle_output); + + /* The event queue belongs to QEMU */ + vsock->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_handle_output); + + vsock->vhost_dev.nvqs = ARRAY_SIZE(vsock->vhost_vqs); + vsock->vhost_dev.vqs = vsock->vhost_vqs; + ret = vhost_dev_init(&vsock->vhost_dev, (void *)(uintptr_t)vhostfd, + VHOST_BACKEND_TYPE_KERNEL, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed"); + goto err_virtio; + } + + ret = vhost_vsock_set_guest_cid(vsock); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost-vsock: unable to set guest cid"); + goto err_vhost_dev; + } + + vsock->post_load_timer = NULL; + return; + +err_vhost_dev: + vhost_dev_cleanup(&vsock->vhost_dev); +err_virtio: + virtio_cleanup(vdev); + close(vhostfd); + return; +} + +static void vhost_vsock_device_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostVSock *vsock = VHOST_VSOCK(dev); + + vhost_vsock_post_load_timer_cleanup(vsock); + + /* This will stop vhost backend if appropriate. */ + vhost_vsock_set_status(vdev, 0); + + vhost_dev_cleanup(&vsock->vhost_dev); + virtio_cleanup(vdev); +} + +static Property vhost_vsock_properties[] = { + DEFINE_PROP_UINT64("guest-cid", VHostVSock, conf.guest_cid, 0), + DEFINE_PROP_STRING("vhostfd", VHostVSock, conf.vhostfd), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_vsock_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + dc->props = vhost_vsock_properties; + dc->vmsd = &vmstate_virtio_vhost_vsock; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + vdc->realize = vhost_vsock_device_realize; + vdc->unrealize = vhost_vsock_device_unrealize; + vdc->get_features = vhost_vsock_get_features; + vdc->get_config = vhost_vsock_get_config; + vdc->set_status = vhost_vsock_set_status; + vdc->guest_notifier_mask = vhost_vsock_guest_notifier_mask; + vdc->guest_notifier_pending = vhost_vsock_guest_notifier_pending; +} + +static const TypeInfo vhost_vsock_info = { + .name = TYPE_VHOST_VSOCK, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostVSock), + .class_init = vhost_vsock_class_init, +}; + +static void vhost_vsock_register_types(void) +{ + type_register_static(&vhost_vsock_info); +} + +type_init(vhost_vsock_register_types) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index bf19808e87..dde71a5965 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2071,6 +2071,54 @@ static const TypeInfo vhost_scsi_pci_info = { }; #endif +/* vhost-vsock-pci */ + +#ifdef CONFIG_VHOST_VSOCK +static Property vhost_vsock_pci_properties[] = { + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostVSockPCI *dev = VHOST_VSOCK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void vhost_vsock_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_vsock_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->props = vhost_vsock_pci_properties; + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_VSOCK; + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_vsock_pci_instance_init(Object *obj) +{ + VHostVSockPCI *dev = VHOST_VSOCK_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_VSOCK); +} + +static const TypeInfo vhost_vsock_pci_info = { + .name = TYPE_VHOST_VSOCK_PCI, + .parent = TYPE_VIRTIO_PCI, + .instance_size = sizeof(VHostVSockPCI), + .instance_init = vhost_vsock_pci_instance_init, + .class_init = vhost_vsock_pci_class_init, +}; +#endif + /* virtio-balloon-pci */ static Property virtio_balloon_pci_properties[] = { @@ -2501,6 +2549,9 @@ static void virtio_pci_register_types(void) #ifdef CONFIG_VHOST_SCSI type_register_static(&vhost_scsi_pci_info); #endif +#ifdef CONFIG_VHOST_VSOCK + type_register_static(&vhost_vsock_pci_info); +#endif } type_init(virtio_pci_register_types) diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index a7455126f5..0698157b32 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -31,6 +31,9 @@ #ifdef CONFIG_VHOST_SCSI #include "hw/virtio/vhost-scsi.h" #endif +#ifdef CONFIG_VHOST_VSOCK +#include "hw/virtio/vhost-vsock.h" +#endif typedef struct VirtIOPCIProxy VirtIOPCIProxy; typedef struct VirtIOBlkPCI VirtIOBlkPCI; @@ -44,6 +47,7 @@ typedef struct VirtIOInputPCI VirtIOInputPCI; typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI; typedef struct VirtIOInputHostPCI VirtIOInputHostPCI; typedef struct VirtIOGPUPCI VirtIOGPUPCI; +typedef struct VHostVSockPCI VHostVSockPCI; /* virtio-pci-bus */ @@ -329,6 +333,20 @@ struct VirtIOGPUPCI { VirtIOGPU vdev; }; +#ifdef CONFIG_VHOST_VSOCK +/* + * vhost-vsock-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci" +#define VHOST_VSOCK_PCI(obj) \ + OBJECT_CHECK(VHostVSockPCI, (obj), TYPE_VHOST_VSOCK_PCI) + +struct VHostVSockPCI { + VirtIOPCIProxy parent_obj; + VHostVSock vdev; +}; +#endif + /* Virtio ABI version, if we increment this, we break the guest driver. */ #define VIRTIO_PCI_ABI_VERSION 0 diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 929ec2fb07..e8b83bbb1e 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -79,6 +79,7 @@ #define PCI_DEVICE_ID_VIRTIO_SCSI 0x1004 #define PCI_DEVICE_ID_VIRTIO_RNG 0x1005 #define PCI_DEVICE_ID_VIRTIO_9P 0x1009 +#define PCI_DEVICE_ID_VIRTIO_VSOCK 0x1012 #define PCI_VENDOR_ID_REDHAT 0x1b36 #define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001 diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index cf7f0b5a69..6e90703cad 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -73,6 +73,9 @@ typedef int (*vhost_migration_done_op)(struct vhost_dev *dev, typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev, uint64_t start1, uint64_t size1, uint64_t start2, uint64_t size2); +typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev, + uint64_t guest_cid); +typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start); typedef struct VhostOps { VhostBackendType backend_type; @@ -102,6 +105,8 @@ typedef struct VhostOps { vhost_requires_shm_log_op vhost_requires_shm_log; vhost_migration_done_op vhost_migration_done; vhost_backend_can_merge_op vhost_backend_can_merge; + vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid; + vhost_vsock_set_running_op vhost_vsock_set_running; } VhostOps; extern const VhostOps user_ops; diff --git a/include/hw/virtio/vhost-vsock.h b/include/hw/virtio/vhost-vsock.h new file mode 100644 index 0000000000..7b9205fe3f --- /dev/null +++ b/include/hw/virtio/vhost-vsock.h @@ -0,0 +1,41 @@ +/* + * Vhost vsock virtio device + * + * Copyright 2015 Red Hat, Inc. + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef _QEMU_VHOST_VSOCK_H +#define _QEMU_VHOST_VSOCK_H + +#include "hw/virtio/virtio.h" +#include "hw/virtio/vhost.h" + +#define TYPE_VHOST_VSOCK "vhost-vsock-device" +#define VHOST_VSOCK(obj) \ + OBJECT_CHECK(VHostVSock, (obj), TYPE_VHOST_VSOCK) + +typedef struct { + uint64_t guest_cid; + char *vhostfd; +} VHostVSockConf; + +typedef struct { + /*< private >*/ + VirtIODevice parent; + VHostVSockConf conf; + struct vhost_virtqueue vhost_vqs[2]; + struct vhost_dev vhost_dev; + VirtQueue *event_vq; + QEMUTimer *post_load_timer; + + /*< public >*/ +} VHostVSock; + +#endif /* _QEMU_VHOST_VSOCK_H */