diff --git a/MAINTAINERS b/MAINTAINERS index 1200b695ce..ea91f9e804 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2154,6 +2154,13 @@ T: git https://gitlab.com/cohuck/qemu.git s390-next T: git https://github.com/borntraeger/qemu.git s390-next L: qemu-s390x@nongnu.org +virtio-dmabuf +M: Albert Esteve +S: Supported +F: hw/display/virtio-dmabuf.c +F: include/hw/virtio/virtio-dmabuf.h +F: tests/unit/test-virtio-dmabuf.c + virtiofs M: Stefan Hajnoczi S: Supported diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 5a070adbc1..415bb47a19 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -1440,6 +1440,18 @@ Front-end message types query the back-end for its device status as defined in the Virtio specification. +``VHOST_USER_GET_SHARED_OBJECT`` + :id: 41 + :equivalent ioctl: N/A + :request payload: ``struct VhostUserShared`` + :reply payload: dmabuf fd + + When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol + feature has been successfully negotiated, and the UUID is found + in the exporters cache, this message is submitted by the front-end + to retrieve a given dma-buf fd from a given back-end, determined by + the requested UUID. Back-end will reply passing the fd when the operation + is successful, or no fd otherwise. Back-end message types ---------------------- @@ -1528,6 +1540,51 @@ is sent by the front-end. The state.num field is currently reserved and must be set to 0. +``VHOST_USER_BACKEND_SHARED_OBJECT_ADD`` + :id: 6 + :equivalent ioctl: N/A + :request payload: ``struct VhostUserShared`` + :reply payload: N/A + + When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol + feature has been successfully negotiated, this message can be submitted + by the backends to add themselves as exporters to the virtio shared lookup + table. The back-end device gets associated with a UUID in the shared table. + The back-end is responsible of keeping its own table with exported dma-buf fds. + When another back-end tries to import the resource associated with the UUID, + it will send a message to the front-end, which will act as a proxy to the + exporter back-end. If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, and + the back-end sets the ``VHOST_USER_NEED_REPLY`` flag, the front-end must + respond with zero when operation is successfully completed, or non-zero + otherwise. + +``VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE`` + :id: 7 + :equivalent ioctl: N/A + :request payload: ``struct VhostUserShared`` + :reply payload: N/A + + When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol + feature has been successfully negotiated, this message can be submitted + by the backend to remove themselves from to the virtio-dmabuf shared + table API. The shared table will remove the back-end device associated with + the UUID. If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, and the + back-end sets the ``VHOST_USER_NEED_REPLY`` flag, the front-end must respond + with zero when operation is successfully completed, or non-zero otherwise. + +``VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP`` + :id: 8 + :equivalent ioctl: N/A + :request payload: ``struct VhostUserShared`` + :reply payload: dmabuf fd and ``u64`` + + When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol + feature has been successfully negotiated, this message can be submitted + by the backends to retrieve a given dma-buf fd from the virtio-dmabuf + shared table given a UUID. Frontend will reply passing the fd and a zero + when the operation is successful, or non-zero otherwise. Note that if the + operation fails, no fd is sent to the backend. + .. _reply_ack: VHOST_USER_PROTOCOL_F_REPLY_ACK diff --git a/hw/acpi/acpi-x86-stub.c b/hw/acpi/acpi-x86-stub.c index d0d399d26b..9662a594ad 100644 --- a/hw/acpi/acpi-x86-stub.c +++ b/hw/acpi/acpi-x86-stub.c @@ -1,12 +1,6 @@ #include "qemu/osdep.h" -#include "hw/i386/pc.h" #include "hw/i386/acpi-build.h" -void pc_madt_cpu_entry(int uid, const CPUArchIdList *apic_ids, - GArray *entry, bool force_enabled) -{ -} - Object *acpi_get_i386_pci_host(void) { return NULL; diff --git a/hw/acpi/core.c b/hw/acpi/core.c index 00b1e79a30..ec5e127d17 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -32,6 +32,7 @@ #include "qemu/module.h" #include "qemu/option.h" #include "sysemu/runstate.h" +#include "trace.h" struct acpi_table_header { uint16_t _length; /* our length, not actual part of the hdr */ @@ -688,9 +689,11 @@ void acpi_gpe_ioport_writeb(ACPIREGS *ar, uint32_t addr, uint32_t val) cur = acpi_gpe_ioport_get_ptr(ar, addr); if (addr < ar->gpe.len / 2) { + trace_acpi_gpe_sts_ioport_writeb(addr, val); /* GPE_STS */ *cur = (*cur) & ~val; } else if (addr < ar->gpe.len) { + trace_acpi_gpe_en_ioport_writeb(addr - (ar->gpe.len / 2), val); /* GPE_EN */ *cur = val; } else { @@ -709,6 +712,12 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr) val = *cur; } + if (addr < ar->gpe.len / 2) { + trace_acpi_gpe_sts_ioport_readb(addr, val); + } else { + trace_acpi_gpe_en_ioport_readb(addr - (ar->gpe.len / 2), val); + } + return val; } diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 19c154d78f..011d2c6c2d 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "migration/vmstate.h" #include "hw/acpi/cpu.h" +#include "hw/core/cpu.h" #include "qapi/error.h" #include "qapi/qapi-events-acpi.h" #include "trace.h" @@ -338,7 +339,7 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPU_FW_EJECT_EVENT "CEJF" void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - hwaddr io_base, + build_madt_cpu_fn build_madt_cpu, hwaddr io_base, const char *res_root, const char *event_handler_method) { @@ -353,8 +354,6 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, MachineClass *mc = MACHINE_GET_CLASS(machine); const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine); char *cphp_res_path = g_strdup_printf("%s." CPUHP_RES_DEVICE, res_root); - Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, NULL); - AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj); cpu_ctrl_dev = aml_device("%s", cphp_res_path); { @@ -664,9 +663,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(dev, method); /* build _MAT object */ - assert(adevc && adevc->madt_cpu); - adevc->madt_cpu(i, arch_ids, madt_buf, - true); /* set enabled flag */ + build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */ aml_append(dev, aml_name_decl("_MAT", aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); g_array_free(madt_buf, true); diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c index 2d5e199ba9..3042d223c8 100644 --- a/hw/acpi/hmat.c +++ b/hw/acpi/hmat.c @@ -27,6 +27,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "sysemu/numa.h" +#include "hw/acpi/aml-build.h" #include "hw/acpi/hmat.h" /* diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h index b57f0e7e80..fd989cb661 100644 --- a/hw/acpi/hmat.h +++ b/hw/acpi/hmat.h @@ -27,7 +27,8 @@ #ifndef HMAT_H #define HMAT_H -#include "hw/acpi/aml-build.h" +#include "hw/acpi/bios-linker-loader.h" +#include "sysemu/numa.h" /* * ACPI 6.3: 5.2.27.3 Memory Proximity Domain Attributes Structure, diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c index d926f4f77d..0b883df813 100644 --- a/hw/acpi/memory_hotplug.c +++ b/hw/acpi/memory_hotplug.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "hw/acpi/memory_hotplug.h" #include "hw/mem/pc-dimm.h" +#include "hw/boards.h" #include "hw/qdev-core.h" #include "migration/vmstate.h" #include "trace.h" diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 63d2113b86..dd523d2e4c 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -20,7 +20,6 @@ */ #include "qemu/osdep.h" -#include "hw/i386/pc.h" #include "hw/irq.h" #include "hw/isa/apm.h" #include "hw/i2c/pm_smbus.h" @@ -43,7 +42,6 @@ #include "hw/acpi/acpi_dev_interface.h" #include "migration/vmstate.h" #include "hw/core/cpu.h" -#include "trace.h" #include "qom/object.h" #define GPE_BASE 0xafe0 @@ -518,7 +516,6 @@ static uint64_t gpe_readb(void *opaque, hwaddr addr, unsigned width) PIIX4PMState *s = opaque; uint32_t val = acpi_gpe_ioport_readb(&s->ar, addr); - trace_piix4_gpe_readb(addr, width, val); return val; } @@ -527,7 +524,6 @@ static void gpe_writeb(void *opaque, hwaddr addr, uint64_t val, { PIIX4PMState *s = opaque; - trace_piix4_gpe_writeb(addr, width, val); acpi_gpe_ioport_writeb(&s->ar, addr, val); acpi_update_sci(&s->ar, s->irq); } @@ -654,7 +650,6 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) hc->is_hotpluggable_bus = piix4_is_hotpluggable_bus; adevc->ospm_status = piix4_ospm_status; adevc->send_event = piix4_send_gpe; - adevc->madt_cpu = pc_madt_cpu_entry; } static const TypeInfo piix4_pm_info = { diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index 78e0a8670e..edc93e703c 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -17,6 +17,12 @@ mhp_acpi_clear_remove_evt(uint32_t slot) "slot[0x%"PRIx32"] clear remove event" mhp_acpi_pc_dimm_deleted(uint32_t slot) "slot[0x%"PRIx32"] pc-dimm deleted" mhp_acpi_pc_dimm_delete_failed(uint32_t slot) "slot[0x%"PRIx32"] pc-dimm delete failed" +# core.c +acpi_gpe_en_ioport_readb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " ==> 0x%02" PRIx8 +acpi_gpe_en_ioport_writeb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " <== 0x%02" PRIx8 +acpi_gpe_sts_ioport_readb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " ==> 0x%02" PRIx8 +acpi_gpe_sts_ioport_writeb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " <== 0x%02" PRIx8 + # cpu.c cpuhp_acpi_invalid_idx_selected(uint32_t idx) "0x%"PRIx32 cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"PRIx8 @@ -48,10 +54,6 @@ acpi_pci_sel_read(uint32_t val) "%" PRIu32 acpi_pci_ej_write(uint64_t addr, uint64_t data) "0x%" PRIx64 " <== %" PRIu64 acpi_pci_sel_write(uint64_t addr, uint64_t data) "0x%" PRIx64 " <== %" PRIu64 -# piix4.c -piix4_gpe_readb(uint64_t addr, unsigned width, uint64_t val) "addr: 0x%" PRIx64 " width: %d ==> 0x%" PRIx64 -piix4_gpe_writeb(uint64_t addr, unsigned width, uint64_t val) "addr: 0x%" PRIx64 " width: %d <== 0x%" PRIx64 - # tco.c tco_timer_reload(int ticks, int msec) "ticks=%d (%d ms)" tco_timer_expired(int timeouts_no, bool strap, bool no_reboot) "timeouts_no=%d no_reboot=%d/%d" diff --git a/hw/core/machine.c b/hw/core/machine.c index 281ef0dccd..cfd1edfe20 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -24,6 +24,7 @@ #include "sysemu/runstate.h" #include "sysemu/xen.h" #include "sysemu/qtest.h" +#include "hw/pci/pci_bridge.h" #include "hw/mem/nvdimm.h" #include "migration/global_state.h" #include "exec/confidential-guest-support.h" @@ -31,7 +32,9 @@ #include "hw/virtio/virtio-net.h" #include "audio/audio.h" -GlobalProperty hw_compat_8_1[] = {}; +GlobalProperty hw_compat_8_1[] = { + { TYPE_PCI_BRIDGE, "x-pci-express-writeable-slt-bug", "true" }, +}; const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1); GlobalProperty hw_compat_8_0[] = { diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index 378f1082ce..f3bbf0fd13 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -13,6 +13,54 @@ #include "hw/pci/pci.h" #include "hw/cxl/cxl.h" +/* CXL r3.0 Section 8.2.4.19.1 CXL HDM Decoder Capability Register */ +int cxl_decoder_count_enc(int count) +{ + switch (count) { + case 1: return 0x0; + case 2: return 0x1; + case 4: return 0x2; + case 6: return 0x3; + case 8: return 0x4; + case 10: return 0x5; + /* Switches and Host Bridges may have more than 10 decoders */ + case 12: return 0x6; + case 14: return 0x7; + case 16: return 0x8; + case 20: return 0x9; + case 24: return 0xa; + case 28: return 0xb; + case 32: return 0xc; + } + return 0; +} + +int cxl_decoder_count_dec(int enc_cnt) +{ + switch (enc_cnt) { + case 0x0: return 1; + case 0x1: return 2; + case 0x2: return 4; + case 0x3: return 6; + case 0x4: return 8; + case 0x5: return 10; + /* Switches and Host Bridges may have more than 10 decoders */ + case 0x6: return 12; + case 0x7: return 14; + case 0x8: return 16; + case 0x9: return 20; + case 0xa: return 24; + case 0xb: return 28; + case 0xc: return 32; + } + return 0; +} + +hwaddr cxl_decode_ig(int ig) +{ + return 1ULL << (ig + 8); +} + static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset, unsigned size) { @@ -42,6 +90,9 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset, switch (offset) { case A_CXL_HDM_DECODER0_CTRL: + case A_CXL_HDM_DECODER1_CTRL: + case A_CXL_HDM_DECODER2_CTRL: + case A_CXL_HDM_DECODER3_CTRL: should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); should_uncommit = !should_commit; break; @@ -81,7 +132,7 @@ static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value, } if (offset >= A_CXL_HDM_DECODER_CAPABILITY && - offset <= A_CXL_HDM_DECODER0_TARGET_LIST_HI) { + offset <= A_CXL_HDM_DECODER3_TARGET_LIST_HI) { dumb_hdm_handler(cxl_cstate, offset, value); } else { cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)] = value; @@ -161,7 +212,8 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk) static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, enum reg_type type) { - int decoder_count = 1; + int decoder_count = CXL_HDM_DECODER_COUNT; + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; int i; ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, DECODER_COUNT, @@ -174,19 +226,21 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, HDM_DECODER_ENABLE, 0); write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3; for (i = 0; i < decoder_count; i++) { - write_msk[R_CXL_HDM_DECODER0_BASE_LO + i * 0x20] = 0xf0000000; - write_msk[R_CXL_HDM_DECODER0_BASE_HI + i * 0x20] = 0xffffffff; - write_msk[R_CXL_HDM_DECODER0_SIZE_LO + i * 0x20] = 0xf0000000; - write_msk[R_CXL_HDM_DECODER0_SIZE_HI + i * 0x20] = 0xffffffff; - write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x13ff; + write_msk[R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc] = 0xf0000000; + write_msk[R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc] = 0xf0000000; + write_msk[R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc] = 0x13ff; if (type == CXL2_DEVICE || type == CXL2_TYPE3_DEVICE || type == CXL2_LOGICAL_DEVICE) { - write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * 0x20] = 0xf0000000; + write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * hdm_inc] = + 0xf0000000; } else { - write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * 0x20] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * hdm_inc] = + 0xffffffff; } - write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_HI + i * 0x20] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_HI + i * hdm_inc] = 0xffffffff; } } @@ -375,6 +429,7 @@ void cxl_component_create_dvsec(CXLComponentState *cxl, cxl->dvsec_offset += length; } +/* CXL r3.0 Section 8.2.4.19.7 CXL HDM Decoder n Control Register */ uint8_t cxl_interleave_ways_enc(int iw, Error **errp) { switch (iw) { @@ -392,6 +447,23 @@ uint8_t cxl_interleave_ways_enc(int iw, Error **errp) } } +int cxl_interleave_ways_dec(uint8_t iw_enc, Error **errp) +{ + switch (iw_enc) { + case 0x0: return 1; + case 0x1: return 2; + case 0x2: return 4; + case 0x3: return 8; + case 0x4: return 16; + case 0x8: return 3; + case 0x9: return 6; + case 0xa: return 12; + default: + error_setg(errp, "Encoded interleave ways: %d not supported", iw_enc); + return 0; + } +} + uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp) { switch (gran) { diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index f0920da956..2aa776c79c 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -97,33 +97,58 @@ void cxl_fmws_link_targets(CXLState *cxl_state, Error **errp) } } -/* TODO: support, multiple hdm decoders */ static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr, uint8_t *target) { - uint32_t ctrl; - uint32_t ig_enc; - uint32_t iw_enc; - uint32_t target_idx; + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; + unsigned int hdm_count; + bool found = false; + int i; + uint32_t cap; - ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL]; - if (!FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) { - return false; + cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY); + hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap, + CXL_HDM_DECODER_CAPABILITY, + DECODER_COUNT)); + for (i = 0; i < hdm_count; i++) { + uint32_t ctrl, ig_enc, iw_enc, target_idx; + uint32_t low, high; + uint64_t base, size; + + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc); + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc); + base = (low & 0xf0000000) | ((uint64_t)high << 32); + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc); + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc); + size = (low & 0xf0000000) | ((uint64_t)high << 32); + if (addr < base || addr >= base + size) { + continue; + } + + ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * hdm_inc); + if (!FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) { + return false; + } + found = true; + ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG); + iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW); + target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc); + + if (target_idx < 4) { + uint32_t val = ldl_le_p(cache_mem + + R_CXL_HDM_DECODER0_TARGET_LIST_LO + + i * hdm_inc); + *target = extract32(val, target_idx * 8, 8); + } else { + uint32_t val = ldl_le_p(cache_mem + + R_CXL_HDM_DECODER0_TARGET_LIST_HI + + i * hdm_inc); + *target = extract32(val, (target_idx - 4) * 8, 8); + } + break; } - ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG); - iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW); - target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc); - - if (target_idx < 4) { - *target = extract32(cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_LO], - target_idx * 8, 8); - } else { - *target = extract32(cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_HI], - (target_idx - 4) * 8, 8); - } - - return true; + return found; } static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) diff --git a/hw/display/meson.build b/hw/display/meson.build index 413ba4ab24..05619c6968 100644 --- a/hw/display/meson.build +++ b/hw/display/meson.build @@ -37,6 +37,7 @@ system_ss.add(when: 'CONFIG_MACFB', if_true: files('macfb.c')) system_ss.add(when: 'CONFIG_NEXTCUBE', if_true: files('next-fb.c')) system_ss.add(when: 'CONFIG_VGA', if_true: files('vga.c')) +system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('virtio-dmabuf.c')) if (config_all_devices.has_key('CONFIG_VGA_CIRRUS') or config_all_devices.has_key('CONFIG_VGA_PCI') or diff --git a/hw/display/virtio-dmabuf.c b/hw/display/virtio-dmabuf.c new file mode 100644 index 0000000000..4a8e430f3d --- /dev/null +++ b/hw/display/virtio-dmabuf.c @@ -0,0 +1,138 @@ +/* + * Virtio Shared dma-buf + * + * Copyright Red Hat, Inc. 2023 + * + * Authors: + * Albert Esteve + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "hw/virtio/virtio-dmabuf.h" + + +static GMutex lock; +static GHashTable *resource_uuids; + +/* + * uuid_equal_func: wrapper for UUID is_equal function to + * satisfy g_hash_table_new expected parameters signatures. + */ +static int uuid_equal_func(const void *lhv, const void *rhv) +{ + return qemu_uuid_is_equal(lhv, rhv); +} + +static bool virtio_add_resource(QemuUUID *uuid, VirtioSharedObject *value) +{ + bool result = false; + + g_mutex_lock(&lock); + if (resource_uuids == NULL) { + resource_uuids = g_hash_table_new_full(qemu_uuid_hash, + uuid_equal_func, + NULL, + g_free); + } + if (g_hash_table_lookup(resource_uuids, uuid) == NULL) { + result = g_hash_table_insert(resource_uuids, uuid, value); + } + g_mutex_unlock(&lock); + + return result; +} + +bool virtio_add_dmabuf(QemuUUID *uuid, int udmabuf_fd) +{ + bool result; + VirtioSharedObject *vso; + if (udmabuf_fd < 0) { + return false; + } + vso = g_new(VirtioSharedObject, 1); + vso->type = TYPE_DMABUF; + vso->value = GINT_TO_POINTER(udmabuf_fd); + result = virtio_add_resource(uuid, vso); + + return result; +} + +bool virtio_add_vhost_device(QemuUUID *uuid, struct vhost_dev *dev) +{ + bool result; + VirtioSharedObject *vso; + if (dev == NULL) { + return false; + } + vso = g_new(VirtioSharedObject, 1); + vso->type = TYPE_VHOST_DEV; + vso->value = dev; + result = virtio_add_resource(uuid, vso); + + return result; +} + +bool virtio_remove_resource(const QemuUUID *uuid) +{ + bool result; + g_mutex_lock(&lock); + result = g_hash_table_remove(resource_uuids, uuid); + g_mutex_unlock(&lock); + + return result; +} + +static VirtioSharedObject *get_shared_object(const QemuUUID *uuid) +{ + gpointer lookup_res = NULL; + + g_mutex_lock(&lock); + if (resource_uuids != NULL) { + lookup_res = g_hash_table_lookup(resource_uuids, uuid); + } + g_mutex_unlock(&lock); + + return (VirtioSharedObject *) lookup_res; +} + +int virtio_lookup_dmabuf(const QemuUUID *uuid) +{ + VirtioSharedObject *vso = get_shared_object(uuid); + if (vso == NULL) { + return -1; + } + assert(vso->type == TYPE_DMABUF); + return GPOINTER_TO_INT(vso->value); +} + +struct vhost_dev *virtio_lookup_vhost_device(const QemuUUID *uuid) +{ + VirtioSharedObject *vso = get_shared_object(uuid); + if (vso == NULL) { + return NULL; + } + assert(vso->type == TYPE_VHOST_DEV); + return (struct vhost_dev *) vso->value; +} + +SharedObjectType virtio_object_type(const QemuUUID *uuid) +{ + VirtioSharedObject *vso = get_shared_object(uuid); + if (vso == NULL) { + return TYPE_INVALID; + } + return vso->type; +} + +void virtio_free_resources(void) +{ + g_mutex_lock(&lock); + g_hash_table_destroy(resource_uuids); + /* Reference count shall be 0 after the implicit unref on destroy */ + resource_uuids = NULL; + g_mutex_unlock(&lock); +} diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 95199c8900..3f2b27cf75 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1495,14 +1495,14 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(crs, aml_io( AML_DECODE16, - ACPI_PORT_SMI_CMD, - ACPI_PORT_SMI_CMD, + pm->fadt.smi_cmd, + pm->fadt.smi_cmd, 1, 2) ); aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(dev, aml_operation_region("SMIR", AML_SYSTEM_IO, - aml_int(ACPI_PORT_SMI_CMD), 2)); + aml_int(pm->fadt.smi_cmd), 2)); field = aml_field("SMIR", AML_BYTE_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("SMIC", 8)); @@ -1549,8 +1549,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, .smi_path = pm->smi_on_cpuhp ? "\\_SB.PCI0.SMI0.SMIC" : NULL, .fw_unplugs_cpu = pm->smi_on_cpu_unplug, }; - build_cpus_aml(dsdt, machine, opts, pm->cpu_hp_io_base, - "\\_SB.PCI0", "\\_GPE._E02"); + build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, + pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02"); } if (pcms->memhp_io_base && nr_mem) { @@ -2547,8 +2547,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); acpi_build_madt(tables_blob, tables->linker, x86ms, - ACPI_DEVICE_IF(x86ms->acpi_dev), x86ms->oem_id, - x86ms->oem_table_id); + x86ms->oem_id, x86ms->oem_table_id); #ifdef CONFIG_ACPI_ERST { diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c index 8a0932fe84..43dc23f7e0 100644 --- a/hw/i386/acpi-common.c +++ b/hw/i386/acpi-common.c @@ -94,14 +94,13 @@ build_xrupt_override(GArray *entry, uint8_t src, uint32_t gsi, uint16_t flags) * 5.2.8 Multiple APIC Description Table */ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, - X86MachineState *x86ms, AcpiDeviceIf *adev, + X86MachineState *x86ms, const char *oem_id, const char *oem_table_id) { int i; bool x2apic_mode = false; MachineClass *mc = MACHINE_GET_CLASS(x86ms); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms)); - AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(adev); AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = oem_id, .oem_table_id = oem_table_id }; @@ -111,7 +110,7 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ for (i = 0; i < apic_ids->len; i++) { - adevc->madt_cpu(i, apic_ids, table_data, false); + pc_madt_cpu_entry(i, apic_ids, table_data, false); if (apic_ids->cpus[i].arch_id > 254) { x2apic_mode = true; } diff --git a/hw/i386/acpi-common.h b/hw/i386/acpi-common.h index a68825acf5..b3c56ee014 100644 --- a/hw/i386/acpi-common.h +++ b/hw/i386/acpi-common.h @@ -1,7 +1,6 @@ #ifndef HW_I386_ACPI_COMMON_H #define HW_I386_ACPI_COMMON_H -#include "hw/acpi/acpi_dev_interface.h" #include "hw/acpi/bios-linker-loader.h" #include "hw/i386/x86.h" @@ -9,7 +8,7 @@ #define ACPI_BUILD_IOAPIC_ID 0x0 void acpi_build_madt(GArray *table_data, BIOSLinker *linker, - X86MachineState *x86ms, AcpiDeviceIf *adev, + X86MachineState *x86ms, const char *oem_id, const char *oem_table_id); #endif diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c index a075360d85..fec22d85c1 100644 --- a/hw/i386/acpi-microvm.c +++ b/hw/i386/acpi-microvm.c @@ -214,8 +214,7 @@ static void acpi_build_microvm(AcpiBuildTables *tables, acpi_add_table(table_offsets, tables_blob); acpi_build_madt(tables_blob, tables->linker, X86_MACHINE(machine), - ACPI_DEVICE_IF(x86ms->acpi_dev), x86ms->oem_id, - x86ms->oem_table_id); + x86ms->oem_id, x86ms->oem_table_id); #ifdef CONFIG_ACPI_ERST { diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index c98a3c6e11..8d0f2f99dd 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1246,13 +1246,8 @@ static int amdvi_int_remap_msi(AMDVIState *iommu, return -AMDVI_IR_ERR; } - if (origin->address & AMDVI_MSI_ADDR_HI_MASK) { - trace_amdvi_err("MSI address high 32 bits non-zero when " - "Interrupt Remapping enabled."); - return -AMDVI_IR_ERR; - } - - if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) { + if (origin->address < AMDVI_INT_ADDR_FIRST || + origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) { trace_amdvi_err("MSI is not from IOAPIC."); return -AMDVI_IR_ERR; } diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 6da893ee57..c5065a3e27 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -210,8 +210,6 @@ #define AMDVI_INT_ADDR_FIRST 0xfee00000 #define AMDVI_INT_ADDR_LAST 0xfeefffff #define AMDVI_INT_ADDR_SIZE (AMDVI_INT_ADDR_LAST - AMDVI_INT_ADDR_FIRST + 1) -#define AMDVI_MSI_ADDR_HI_MASK (0xffffffff00000000ULL) -#define AMDVI_MSI_ADDR_LO_MASK (0x00000000ffffffffULL) /* SB IOAPIC is always on this device in AMD systems */ #define AMDVI_IOAPIC_SB_DEVID PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0)) diff --git a/hw/i386/generic_event_device_x86.c b/hw/i386/generic_event_device_x86.c deleted file mode 100644 index e26fb02a2e..0000000000 --- a/hw/i386/generic_event_device_x86.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * x86 variant of the generic event device for hw reduced acpi - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2 or later, as published by the Free Software Foundation. - */ - -#include "qemu/osdep.h" -#include "hw/acpi/generic_event_device.h" -#include "hw/i386/pc.h" - -static void acpi_ged_x86_class_init(ObjectClass *class, void *data) -{ - AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(class); - - adevc->madt_cpu = pc_madt_cpu_entry; -} - -static const TypeInfo acpi_ged_x86_info = { - .name = TYPE_ACPI_GED_X86, - .parent = TYPE_ACPI_GED, - .class_init = acpi_ged_x86_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_HOTPLUG_HANDLER }, - { TYPE_ACPI_DEVICE_IF }, - { } - } -}; - -static void acpi_ged_x86_register_types(void) -{ - type_register_static(&acpi_ged_x86_info); -} - -type_init(acpi_ged_x86_register_types) diff --git a/hw/i386/meson.build b/hw/i386/meson.build index cfdbfdcbcb..ff879069c9 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -20,7 +20,6 @@ i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'), if_false: files('sgx-stub.c')) i386_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-common.c')) -i386_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device_x86.c')) i386_ss.add(when: 'CONFIG_PC', if_true: files( 'pc.c', 'pc_sysfw.c', diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 8deeb62774..b9c93039e2 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -204,7 +204,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) /* Optional and legacy devices */ if (x86_machine_is_acpi_enabled(x86ms)) { - DeviceState *dev = qdev_new(TYPE_ACPI_GED_X86); + DeviceState *dev = qdev_new(TYPE_ACPI_GED); qdev_prop_set_uint32(dev, "ged-event", ACPI_GED_PWR_DOWN_EVT); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, GED_MMIO_BASE); /* sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, GED_MMIO_BASE_MEMHP); */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c index ec01d74482..aad7e8ccd1 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -814,13 +814,39 @@ static uint64_t pc_get_cxl_range_end(PCMachineState *pcms) static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size) { X86CPU *cpu = X86_CPU(first_cpu); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + MachineState *ms = MACHINE(pcms); - /* 32-bit systems don't have hole64 thus return max CPU address */ - if (cpu->phys_bits <= 32) { + if (cpu->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { + /* 64-bit systems */ + return pc_pci_hole64_start() + pci_hole64_size - 1; + } + + /* 32-bit systems */ + if (pcmc->broken_32bit_mem_addr_check) { + /* old value for compatibility reasons */ return ((hwaddr)1 << cpu->phys_bits) - 1; } - return pc_pci_hole64_start() + pci_hole64_size - 1; + /* + * 32-bit systems don't have hole64 but they might have a region for + * memory devices. Even if additional hotplugged memory devices might + * not be usable by most guest OSes, we need to still consider them for + * calculating the highest possible GPA so that we can properly report + * if someone configures them on a CPU that cannot possibly address them. + */ + if (pcmc->has_reserved_memory && + (ms->ram_size < ms->maxram_size)) { + hwaddr devmem_start; + ram_addr_t devmem_size; + + pc_get_device_memory_range(pcms, &devmem_start, &devmem_size); + devmem_start += devmem_size; + return devmem_start - 1; + } + + /* configuration without any memory hotplug */ + return pc_above_4g_end(pcms) - 1; } /* diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index ff8654ecda..e36a3262b2 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -496,9 +496,13 @@ DEFINE_I440FX_MACHINE(v8_2, "pc-i440fx-8.2", NULL, static void pc_i440fx_8_1_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_i440fx_8_2_machine_options(m); m->alias = NULL; m->is_default = false; + pcmc->broken_32bit_mem_addr_check = true; + compat_props_add(m->compat_props, hw_compat_8_1, hw_compat_8_1_len); compat_props_add(m->compat_props, pc_compat_8_1, pc_compat_8_1_len); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 2dd1158b70..a7386f2ca2 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -394,8 +394,10 @@ DEFINE_Q35_MACHINE(v8_2, "pc-q35-8.2", NULL, static void pc_q35_8_1_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_8_2_machine_options(m); m->alias = NULL; + pcmc->broken_32bit_mem_addr_check = true; compat_props_add(m->compat_props, hw_compat_8_1, hw_compat_8_1_len); compat_props_add(m->compat_props, pc_compat_8_1, pc_compat_8_1_len); } diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 9c47a2f6c7..3f59980aa0 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -304,6 +304,21 @@ static PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin) route.irq = -1; } } else { + /* + * Strictly speaking, this is wrong. The PIRQ should be routed + * to *both* the I/O APIC and the PIC, on different pins. The + * I/O APIC has a fixed mapping to IRQ16-23, while the PIC is + * routed according to the PIRQx_ROUT configuration. But QEMU + * doesn't (yet) cope with the concept of pin numbers differing + * between PIC and I/O APIC, and neither does the in-kernel KVM + * irqchip support. So we route to the I/O APIC *only* if the + * routing to the PIC is disabled in the PIRQx_ROUT settings. + * + * This seems to work even if we boot a Linux guest with 'noapic' + * to make it use the legacy PIC, and then kexec directly into a + * new kernel which uses the I/O APIC. The new kernel explicitly + * disables the PIRQ routing even though it doesn't need to care. + */ route.irq = ich9_pirq_to_gsi(pirq_pin); } @@ -876,7 +891,6 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) hc->is_hotpluggable_bus = ich9_pm_is_hotpluggable_bus; adevc->ospm_status = ich9_pm_ospm_status; adevc->send_event = ich9_send_gpe; - adevc->madt_cpu = pc_madt_cpu_entry; amldevc->build_dev_aml = build_ich9_isa_aml; } diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 4cdcb3f7e7..c02be4ce45 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -388,34 +388,32 @@ static void build_dvsecs(CXLType3Dev *ct3d) static void hdm_decoder_commit(CXLType3Dev *ct3d, int which) { + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; ComponentRegisters *cregs = &ct3d->cxl_cstate.crb; uint32_t *cache_mem = cregs->cache_mem_registers; uint32_t ctrl; - assert(which == 0); - - ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL); + ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc); /* TODO: Sanity checks that the decoder is possible */ ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0); ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); - stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL, ctrl); + stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl); } static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which) { + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; ComponentRegisters *cregs = &ct3d->cxl_cstate.crb; uint32_t *cache_mem = cregs->cache_mem_registers; uint32_t ctrl; - assert(which == 0); - - ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL); + ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc); ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0); ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 0); - stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL, ctrl); + stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl); } static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err) @@ -498,6 +496,21 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, should_uncommit = !should_commit; which_hdm = 0; break; + case A_CXL_HDM_DECODER1_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + should_uncommit = !should_commit; + which_hdm = 1; + break; + case A_CXL_HDM_DECODER2_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + should_uncommit = !should_commit; + which_hdm = 2; + break; + case A_CXL_HDM_DECODER3_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + should_uncommit = !should_commit; + which_hdm = 3; + break; case A_CXL_RAS_UNC_ERR_STATUS: { uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL); @@ -769,36 +782,63 @@ static void ct3_exit(PCIDevice *pci_dev) } } -/* TODO: Support multiple HDM decoders and DPA skip */ static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa) { + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers; - uint64_t decoder_base, decoder_size, hpa_offset; - uint32_t hdm0_ctrl; - int ig, iw; + unsigned int hdm_count; + uint32_t cap; + uint64_t dpa_base = 0; + int i; - decoder_base = (((uint64_t)cache_mem[R_CXL_HDM_DECODER0_BASE_HI] << 32) | - cache_mem[R_CXL_HDM_DECODER0_BASE_LO]); - if ((uint64_t)host_addr < decoder_base) { - return false; + cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY); + hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap, + CXL_HDM_DECODER_CAPABILITY, + DECODER_COUNT)); + + for (i = 0; i < hdm_count; i++) { + uint64_t decoder_base, decoder_size, hpa_offset, skip; + uint32_t hdm_ctrl, low, high; + int ig, iw; + + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc); + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc); + decoder_base = ((uint64_t)high << 32) | (low & 0xf0000000); + + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc); + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc); + decoder_size = ((uint64_t)high << 32) | (low & 0xf0000000); + + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_LO + + i * hdm_inc); + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_HI + + i * hdm_inc); + skip = ((uint64_t)high << 32) | (low & 0xf0000000); + dpa_base += skip; + + hpa_offset = (uint64_t)host_addr - decoder_base; + + hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * hdm_inc); + iw = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IW); + ig = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IG); + if (!FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) { + return false; + } + if (((uint64_t)host_addr < decoder_base) || + (hpa_offset >= decoder_size)) { + dpa_base += decoder_size / + cxl_interleave_ways_dec(iw, &error_fatal); + continue; + } + + *dpa = dpa_base + + ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) | + ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset) + >> iw)); + + return true; } - - hpa_offset = (uint64_t)host_addr - decoder_base; - - decoder_size = ((uint64_t)cache_mem[R_CXL_HDM_DECODER0_SIZE_HI] << 32) | - cache_mem[R_CXL_HDM_DECODER0_SIZE_LO]; - if (hpa_offset >= decoder_size) { - return false; - } - - hdm0_ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL]; - iw = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IW); - ig = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IG); - - *dpa = (MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) | - ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset) >> iw); - - return true; + return false; } static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d, diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 5a0201c423..29e33ea5ed 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -49,8 +49,6 @@ #define VIRTIO_NET_VM_VERSION 11 -#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ - /* previously fixed value */ #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 @@ -1029,9 +1027,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) vhost_net_save_acked_features(nc->peer); } - if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { - memset(n->vlans, 0, MAX_VLAN >> 3); - } else { + if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { memset(n->vlans, 0xff, MAX_VLAN >> 3); } diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c index 2b9cf0cc97..a57806fb31 100644 --- a/hw/pci-bridge/cxl_upstream.c +++ b/hw/pci-bridge/cxl_upstream.c @@ -14,14 +14,21 @@ #include "hw/pci/msi.h" #include "hw/pci/pcie.h" #include "hw/pci/pcie_port.h" +/* + * Null value of all Fs suggested by IEEE RA guidelines for use of + * EU, OUI and CID + */ +#define UI64_NULL (~0ULL) #define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 2 #define CXL_UPSTREAM_PORT_MSI_OFFSET 0x70 #define CXL_UPSTREAM_PORT_PCIE_CAP_OFFSET 0x90 #define CXL_UPSTREAM_PORT_AER_OFFSET 0x100 -#define CXL_UPSTREAM_PORT_DVSEC_OFFSET \ +#define CXL_UPSTREAM_PORT_SN_OFFSET \ (CXL_UPSTREAM_PORT_AER_OFFSET + PCI_ERR_SIZEOF) +#define CXL_UPSTREAM_PORT_DVSEC_OFFSET \ + (CXL_UPSTREAM_PORT_SN_OFFSET + PCI_EXT_CAP_DSN_SIZEOF) typedef struct CXLUpstreamPort { /*< private >*/ @@ -30,6 +37,7 @@ typedef struct CXLUpstreamPort { /*< public >*/ CXLComponentState cxl_cstate; DOECap doe_cdat; + uint64_t sn; } CXLUpstreamPort; CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp) @@ -326,7 +334,9 @@ static void cxl_usp_realize(PCIDevice *d, Error **errp) if (rc) { goto err_cap; } - + if (usp->sn != UI64_NULL) { + pcie_dev_ser_num_init(d, CXL_UPSTREAM_PORT_SN_OFFSET, usp->sn); + } cxl_cstate->dvsec_offset = CXL_UPSTREAM_PORT_DVSEC_OFFSET; cxl_cstate->pdev = d; build_dvsecs(cxl_cstate); @@ -366,6 +376,7 @@ static void cxl_usp_exitfn(PCIDevice *d) } static Property cxl_upstream_props[] = { + DEFINE_PROP_UINT64("sn", CXLUpstreamPort, sn, UI64_NULL), DEFINE_PROP_STRING("cdat", CXLUpstreamPort, cxl_cstate.cdat.filename), DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 881d774fb6..b0d21bf43a 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -893,7 +893,7 @@ static void pci_init_w1cmask(PCIDevice *dev) static void pci_init_mask_bridge(PCIDevice *d) { /* PCI_PRIMARY_BUS, PCI_SECONDARY_BUS, PCI_SUBORDINATE_BUS and - PCI_SEC_LETENCY_TIMER */ + PCI_SEC_LATENCY_TIMER */ memset(d->wmask + PCI_PRIMARY_BUS, 0xff, 4); /* base and limit */ diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index e7b9345615..6a4e38856d 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -38,6 +38,7 @@ #include "qapi/error.h" #include "hw/acpi/acpi_aml_interface.h" #include "hw/acpi/pci.h" +#include "hw/qdev-properties.h" /* PCI bridge subsystem vendor ID helper functions */ #define PCI_SSVID_SIZEOF 8 @@ -385,6 +386,11 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) pci_bridge_region_init(br); QLIST_INIT(&sec_bus->child); QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); + + /* For express secondary buses, secondary latency timer is RO 0 */ + if (pci_bus_is_express(sec_bus) && !br->pcie_writeable_slt_bug) { + dev->wmask[PCI_SEC_LATENCY_TIMER] = 0; + } } /* default qdev clean up function for PCI-to-PCI bridge */ @@ -466,10 +472,18 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, return 0; } +static Property pci_bridge_properties[] = { + DEFINE_PROP_BOOL("x-pci-express-writeable-slt-bug", PCIBridge, + pcie_writeable_slt_bug, false), + DEFINE_PROP_END_OF_LIST(), +}; + static void pci_bridge_class_init(ObjectClass *klass, void *data) { AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass); + DeviceClass *k = DEVICE_CLASS(klass); + device_class_set_props(k, pci_bridge_properties); adevc->build_dev_aml = build_pci_bridge_aml; } diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 76a3b6917e..5ef8950940 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -196,19 +196,16 @@ static void register_vfs(PCIDevice *dev) static void unregister_vfs(PCIDevice *dev) { - Error *local_err = NULL; uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; uint16_t i; trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { + Error *err = NULL; PCIDevice *vf = dev->exp.sriov_pf.vf[i]; - object_property_set_bool(OBJECT(vf), "realized", false, &local_err); - if (local_err) { - fprintf(stderr, "Failed to unplug: %s\n", - error_get_pretty(local_err)); - error_free(local_err); + if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) { + error_reportf_err(err, "Failed to unplug: "); } object_unparent(OBJECT(vf)); object_unref(OBJECT(vf)); diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index ee99b19e7a..df6b66cc1a 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -39,10 +39,6 @@ static const int user_feature_bits[] = { VHOST_INVALID_FEATURE_BIT }; -enum VhostUserProtocolFeature { - VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, -}; - static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserSCSI *s = (VHostUserSCSI *)vdev; diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 13e7c6c272..c0055a7832 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -1,27 +1,31 @@ -softmmu_virtio_ss = ss.source_set() -softmmu_virtio_ss.add(files('virtio-bus.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) +system_virtio_ss = ss.source_set() +system_virtio_ss.add(files('virtio-bus.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) +system_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) +system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) specific_virtio_ss = ss.source_set() specific_virtio_ss.add(files('virtio.c')) specific_virtio_ss.add(files('virtio-config-io.c', 'virtio-qmp.c')) if have_vhost - softmmu_virtio_ss.add(files('vhost.c')) + system_virtio_ss.add(files('vhost.c')) specific_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) if have_vhost_user + # fixme - this really should be generic specific_virtio_ss.add(files('vhost-user.c')) + system_virtio_ss.add(files('vhost-user-device.c')) + system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) endif if have_vhost_vdpa - specific_virtio_ss.add(files('vhost-vdpa.c', 'vhost-shadow-virtqueue.c')) + system_virtio_ss.add(files('vhost-vdpa.c')) + specific_virtio_ss.add(files('vhost-shadow-virtqueue.c')) endif else - softmmu_virtio_ss.add(files('vhost-stub.c')) + system_virtio_ss.add(files('vhost-stub.c')) endif specific_virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) @@ -67,7 +71,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MD', if_true: files('virtio-md-pci.c')) specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) -system_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) +system_ss.add_all(when: 'CONFIG_VIRTIO', if_true: system_virtio_ss) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('virtio-stub.c')) system_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 7109cf1a3b..1cb9027d1e 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -48,7 +48,7 @@ vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32 vhost_vdpa_reset_device(void *dev) "dev: %p" vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d" -vhost_vdpa_set_vring_ready(void *dev) "dev: %p" +vhost_vdpa_set_vring_ready(void *dev, unsigned i, int r) "dev: %p, idx: %u, r: %d" vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 363b625243..f22d5d5bc0 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -255,6 +255,9 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) error_setg_errno(errp, -ret, "Error starting vhost"); goto err_guest_notifiers; } + for (i = 0; i < s->dev.nvqs; ++i) { + vhost_vdpa_set_vring_ready(&s->vdpa, i); + } s->started = true; /* diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 49e5aed931..e731b1d2ea 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -514,29 +514,37 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, } /** - * Poll the SVQ for one device used buffer. + * Poll the SVQ to wait for the device to use the specified number + * of elements and return the total length written by the device. * * This function race with main event loop SVQ polling, so extra * synchronization is needed. * - * Return the length written by the device. + * @svq: The svq + * @num: The number of elements that need to be used */ -size_t vhost_svq_poll(VhostShadowVirtqueue *svq) +size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num) { - int64_t start_us = g_get_monotonic_time(); - uint32_t len = 0; + size_t len = 0; + uint32_t r; - do { - if (vhost_svq_more_used(svq)) { - break; - } + while (num--) { + int64_t start_us = g_get_monotonic_time(); - if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { - return 0; - } - } while (true); + do { + if (vhost_svq_more_used(svq)) { + break; + } + + if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { + return len; + } + } while (true); + + vhost_svq_get_buf(svq, &r); + len += r; + } - vhost_svq_get_buf(svq, &len); return len; } diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index 6efe051a70..5bce67837b 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -119,7 +119,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, size_t out_num, const struct iovec *in_sg, size_t in_num, VirtQueueElement *elem); -size_t vhost_svq_poll(VhostShadowVirtqueue *svq); +size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num); void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); diff --git a/hw/virtio/vhost-user-device-pci.c b/hw/virtio/vhost-user-device-pci.c new file mode 100644 index 0000000000..41f9b7905b --- /dev/null +++ b/hw/virtio/vhost-user-device-pci.c @@ -0,0 +1,71 @@ +/* + * Vhost-user generic virtio device PCI glue + * + * Copyright (c) 2023 Linaro Ltd + * Author: Alex Bennée + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-device.h" +#include "hw/virtio/virtio-pci.h" + +struct VHostUserDevicePCI { + VirtIOPCIProxy parent_obj; + VHostUserBase vub; +}; + +typedef struct VHostUserDevicePCI VHostUserDevicePCI; + +#define TYPE_VHOST_USER_DEVICE_PCI "vhost-user-device-pci-base" + +DECLARE_INSTANCE_CHECKER(VHostUserDevicePCI, + VHOST_USER_DEVICE_PCI, + TYPE_VHOST_USER_DEVICE_PCI) + +static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vub); + + vpci_dev->nvectors = 1; + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); +} + +static void vhost_user_device_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_device_pci_realize; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */ + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_user_device_pci_instance_init(Object *obj) +{ + VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(obj); + + virtio_instance_init_common(obj, &dev->vub, sizeof(dev->vub), + TYPE_VHOST_USER_DEVICE); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_device_pci_info = { + .base_name = TYPE_VHOST_USER_DEVICE_PCI, + .non_transitional_name = "vhost-user-device-pci", + .instance_size = sizeof(VHostUserDevicePCI), + .instance_init = vhost_user_device_pci_instance_init, + .class_init = vhost_user_device_pci_class_init, +}; + +static void vhost_user_device_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_device_pci_info); +} + +type_init(vhost_user_device_pci_register); diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c new file mode 100644 index 0000000000..2b028cae08 --- /dev/null +++ b/hw/virtio/vhost-user-device.c @@ -0,0 +1,380 @@ +/* + * Generic vhost-user stub. This can be used to connect to any + * vhost-user backend. All configuration details must be handled by + * the vhost-user daemon itself + * + * Copyright (c) 2023 Linaro Ltd + * Author: Alex Bennée + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/vhost-user-device.h" +#include "qemu/error-report.h" + +static void vub_start(VirtIODevice *vdev) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + int ret, i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&vub->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + vub->vhost_dev.acked_features = vdev->guest_features; + + ret = vhost_dev_start(&vub->vhost_dev, vdev, true); + if (ret < 0) { + error_report("Error starting vhost-user-device: %d", -ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < vub->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&vub->vhost_dev, vdev, i, false); + } + + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); +} + +static void vub_stop(VirtIODevice *vdev) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&vub->vhost_dev, vdev, true); + + ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); +} + +static void vub_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + bool should_start = virtio_device_should_start(vdev, status); + + if (vhost_dev_is_started(&vub->vhost_dev) == should_start) { + return; + } + + if (should_start) { + vub_start(vdev); + } else { + vub_stop(vdev); + } +} + +/* + * For an implementation where everything is delegated to the backend + * we don't do anything other than return the full feature set offered + * by the daemon (module the reserved feature bit). + */ +static uint64_t vub_get_features(VirtIODevice *vdev, + uint64_t requested_features, Error **errp) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + /* This should be set when the vhost connection initialises */ + g_assert(vub->vhost_dev.features); + return vub->vhost_dev.features & ~(1ULL << VHOST_USER_F_PROTOCOL_FEATURES); +} + +/* + * To handle VirtIO config we need to know the size of the config + * space. We don't cache the config but re-fetch it from the guest + * every time in case something has changed. + */ +static void vub_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + Error *local_err = NULL; + + /* + * There will have been a warning during vhost_dev_init, but lets + * assert here as nothing will go right now. + */ + g_assert(vub->config_size && vub->vhost_user.supports_config == true); + + if (vhost_dev_get_config(&vub->vhost_dev, config, + vub->config_size, &local_err)) { + error_report_err(local_err); + } +} + +/* + * When the daemon signals an update to the config we just need to + * signal the guest as we re-read the config on demand above. + */ +static int vub_config_notifier(struct vhost_dev *dev) +{ + virtio_notify_config(dev->vdev); + return 0; +} + +const VhostDevConfigOps vub_config_ops = { + .vhost_dev_config_notifier = vub_config_notifier, +}; + +static void vub_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* + * Not normally called; it's the daemon that handles the queue; + * however virtio's cleanup path can call this. + */ +} + +static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserBase *vub) +{ + vhost_user_cleanup(&vub->vhost_user); + + for (int i = 0; i < vub->num_vqs; i++) { + VirtQueue *vq = g_ptr_array_index(vub->vqs, i); + virtio_delete_queue(vq); + } + + virtio_cleanup(vdev); +} + +static int vub_connect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + struct vhost_dev *vhost_dev = &vub->vhost_dev; + + if (vub->connected) { + return 0; + } + vub->connected = true; + + /* + * If we support VHOST_USER_GET_CONFIG we must enable the notifier + * so we can ping the guest when it updates. + */ + if (vub->vhost_user.supports_config) { + vhost_dev_set_config_notifier(vhost_dev, &vub_config_ops); + } + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + vub_start(vdev); + } + + return 0; +} + +static void vub_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + + if (!vub->connected) { + return; + } + vub->connected = false; + + if (vhost_dev_is_started(&vub->vhost_dev)) { + vub_stop(vdev); + } +} + +static void vub_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + + switch (event) { + case CHR_EVENT_OPENED: + if (vub_connect(dev) < 0) { + qemu_chr_fe_disconnect(&vub->chardev); + return; + } + break; + case CHR_EVENT_CLOSED: + vub_disconnect(dev); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static void vub_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(dev); + int ret; + + if (!vub->chardev.chr) { + error_setg(errp, "vhost-user-device: missing chardev"); + return; + } + + if (!vub->virtio_id) { + error_setg(errp, "vhost-user-device: need to define device id"); + return; + } + + if (!vub->num_vqs) { + vub->num_vqs = 1; /* reasonable default? */ + } + + /* + * We can't handle config requests unless we know the size of the + * config region, specialisations of the vhost-user-device will be + * able to set this. + */ + if (vub->config_size) { + vub->vhost_user.supports_config = true; + } + + if (!vhost_user_init(&vub->vhost_user, &vub->chardev, errp)) { + return; + } + + virtio_init(vdev, vub->virtio_id, vub->config_size); + + /* + * Disable guest notifiers, by default all notifications will be via the + * asynchronous vhost-user socket. + */ + vdev->use_guest_notifier_mask = false; + + /* Allocate queues */ + vub->vqs = g_ptr_array_sized_new(vub->num_vqs); + for (int i = 0; i < vub->num_vqs; i++) { + g_ptr_array_add(vub->vqs, + virtio_add_queue(vdev, 4, vub_handle_output)); + } + + vub->vhost_dev.nvqs = vub->num_vqs; + vub->vhost_dev.vqs = g_new0(struct vhost_virtqueue, vub->vhost_dev.nvqs); + + /* connect to backend */ + ret = vhost_dev_init(&vub->vhost_dev, &vub->vhost_user, + VHOST_BACKEND_TYPE_USER, 0, errp); + + if (ret < 0) { + do_vhost_user_cleanup(vdev, vub); + } + + qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, NULL, + dev, NULL, true); +} + +static void vub_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(dev); + struct vhost_virtqueue *vhost_vqs = vub->vhost_dev.vqs; + + /* This will stop vhost backend if appropriate. */ + vub_set_status(vdev, 0); + vhost_dev_cleanup(&vub->vhost_dev); + g_free(vhost_vqs); + do_vhost_user_cleanup(vdev, vub); +} + +static void vub_class_init(ObjectClass *klass, void *data) +{ + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + vdc->realize = vub_device_realize; + vdc->unrealize = vub_device_unrealize; + vdc->get_features = vub_get_features; + vdc->get_config = vub_get_config; + vdc->set_status = vub_set_status; +} + +static const TypeInfo vub_info = { + .name = TYPE_VHOST_USER_BASE, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserBase), + .class_init = vub_class_init, + .class_size = sizeof(VHostUserBaseClass), + .abstract = true +}; + + +/* + * The following is a concrete implementation of the base class which + * allows the user to define the key parameters via the command line. + */ + +static const VMStateDescription vud_vmstate = { + .name = "vhost-user-device", + .unmigratable = 1, +}; + +static Property vud_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), + DEFINE_PROP_UINT16("virtio-id", VHostUserBase, virtio_id, 0), + DEFINE_PROP_UINT32("num_vqs", VHostUserBase, num_vqs, 1), + DEFINE_PROP_UINT32("config_size", VHostUserBase, config_size, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vud_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, vud_properties); + dc->vmsd = &vud_vmstate; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); +} + +static const TypeInfo vud_info = { + .name = TYPE_VHOST_USER_DEVICE, + .parent = TYPE_VHOST_USER_BASE, + .instance_size = sizeof(VHostUserBase), + .class_init = vud_class_init, + .class_size = sizeof(VHostUserBaseClass), +}; + +static void vu_register_types(void) +{ + type_register_static(&vub_info); + type_register_static(&vud_info); +} + +type_init(vu_register_types) diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c index 3b013f2d0f..3d7fae3984 100644 --- a/hw/virtio/vhost-user-gpio.c +++ b/hw/virtio/vhost-user-gpio.c @@ -205,6 +205,12 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask); } +static struct vhost_dev *vu_gpio_get_vhost(VirtIODevice *vdev) +{ + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + return &gpio->vhost_dev; +} + static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserGPIO *gpio) { virtio_delete_queue(gpio->command_vq); @@ -413,6 +419,7 @@ static void vu_gpio_class_init(ObjectClass *klass, void *data) vdc->get_config = vu_gpio_get_config; vdc->set_status = vu_gpio_set_status; vdc->guest_notifier_mask = vu_gpio_guest_notifier_mask; + vdc->get_vhost = vu_gpio_get_vhost; } static const TypeInfo vu_gpio_info = { diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 8dcf049d42..3766b415f8 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -10,6 +10,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "hw/virtio/virtio-dmabuf.h" #include "hw/virtio/vhost.h" #include "hw/virtio/virtio-crypto.h" #include "hw/virtio/vhost-user.h" @@ -21,6 +22,7 @@ #include "sysemu/kvm.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" +#include "qemu/uuid.h" #include "qemu/sockets.h" #include "sysemu/runstate.h" #include "sysemu/cryptodev.h" @@ -56,27 +58,6 @@ */ #define VHOST_USER_MAX_CONFIG_SIZE 256 -enum VhostUserProtocolFeature { - VHOST_USER_PROTOCOL_F_MQ = 0, - VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, - VHOST_USER_PROTOCOL_F_RARP = 2, - VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, - VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, - VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, - VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, - VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, - VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, - VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, - VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, - /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ - VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, - VHOST_USER_PROTOCOL_F_STATUS = 16, - VHOST_USER_PROTOCOL_F_MAX -}; - #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) typedef enum VhostUserRequest { @@ -121,6 +102,7 @@ typedef enum VhostUserRequest { VHOST_USER_REM_MEM_REG = 38, VHOST_USER_SET_STATUS = 39, VHOST_USER_GET_STATUS = 40, + VHOST_USER_GET_SHARED_OBJECT = 41, VHOST_USER_MAX } VhostUserRequest; @@ -129,6 +111,9 @@ typedef enum VhostUserBackendRequest { VHOST_USER_BACKEND_IOTLB_MSG = 1, VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, + VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, + VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, VHOST_USER_BACKEND_MAX } VhostUserBackendRequest; @@ -202,6 +187,10 @@ typedef struct VhostUserInflight { uint16_t queue_size; } VhostUserInflight; +typedef struct VhostUserShared { + unsigned char uuid[16]; +} VhostUserShared; + typedef struct { VhostUserRequest request; @@ -226,6 +215,7 @@ typedef union { VhostUserCryptoSession session; VhostUserVringArea area; VhostUserInflight inflight; + VhostUserShared object; } VhostUserPayload; typedef struct VhostUserMsg { @@ -1601,6 +1591,139 @@ static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, return 0; } +static int +vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev, + VhostUserShared *object) +{ + QemuUUID uuid; + + memcpy(uuid.data, object->uuid, sizeof(object->uuid)); + return virtio_add_vhost_device(&uuid, dev); +} + +static int +vhost_user_backend_handle_shared_object_remove(VhostUserShared *object) +{ + QemuUUID uuid; + + memcpy(uuid.data, object->uuid, sizeof(object->uuid)); + return virtio_remove_resource(&uuid); +} + +static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, + VhostUserPayload *payload, Error **errp) +{ + struct iovec iov[] = { + { .iov_base = hdr, .iov_len = VHOST_USER_HDR_SIZE }, + { .iov_base = payload, .iov_len = hdr->size }, + }; + + hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK; + hdr->flags |= VHOST_USER_REPLY_MASK; + + return !qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), errp); +} + +static bool +vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr, + VhostUserPayload *payload, Error **errp) +{ + hdr->size = sizeof(payload->u64); + return vhost_user_send_resp(ioc, hdr, payload, errp); +} + +int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, + int *dmabuf_fd) +{ + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->user->chr; + int ret; + VhostUserMsg msg = { + .hdr.request = VHOST_USER_GET_SHARED_OBJECT, + .hdr.flags = VHOST_USER_VERSION, + }; + memcpy(msg.payload.object.uuid, uuid, sizeof(msg.payload.object.uuid)); + + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; + } + + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + return ret; + } + + if (msg.hdr.request != VHOST_USER_GET_SHARED_OBJECT) { + error_report("Received unexpected msg type. " + "Expected %d received %d", + VHOST_USER_GET_SHARED_OBJECT, msg.hdr.request); + return -EPROTO; + } + + *dmabuf_fd = qemu_chr_fe_get_msgfd(chr); + if (*dmabuf_fd < 0) { + error_report("Failed to get dmabuf fd"); + return -EIO; + } + + return 0; +} + +static int +vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, + QIOChannel *ioc, + VhostUserHeader *hdr, + VhostUserPayload *payload) +{ + QemuUUID uuid; + CharBackend *chr = u->user->chr; + Error *local_err = NULL; + int dmabuf_fd = -1; + int fd_num = 0; + + memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid)); + + payload->u64 = 0; + switch (virtio_object_type(&uuid)) { + case TYPE_DMABUF: + dmabuf_fd = virtio_lookup_dmabuf(&uuid); + break; + case TYPE_VHOST_DEV: + { + struct vhost_dev *dev = virtio_lookup_vhost_device(&uuid); + if (dev == NULL) { + payload->u64 = -EINVAL; + break; + } + int ret = vhost_user_get_shared_object(dev, uuid.data, &dmabuf_fd); + if (ret < 0) { + payload->u64 = ret; + } + break; + } + case TYPE_INVALID: + payload->u64 = -EINVAL; + break; + } + + if (dmabuf_fd != -1) { + fd_num++; + } + + if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) { + error_report("Failed to set msg fds."); + payload->u64 = -EINVAL; + } + + if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload, &local_err)) { + error_report_err(local_err); + return -EINVAL; + } + + return 0; +} + static void close_backend_channel(struct vhost_user *u) { g_source_destroy(u->backend_src); @@ -1658,6 +1781,16 @@ static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, fd ? fd[0] : -1); break; + case VHOST_USER_BACKEND_SHARED_OBJECT_ADD: + ret = vhost_user_backend_handle_shared_object_add(dev, &payload.object); + break; + case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE: + ret = vhost_user_backend_handle_shared_object_remove(&payload.object); + break; + case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP: + ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc, + &hdr, &payload); + break; default: error_report("Received unexpected msg type: %d.", hdr.request); ret = -EINVAL; @@ -1668,21 +1801,10 @@ static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, * directly in their request handlers. */ if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { - struct iovec iovec[2]; - - - hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; - hdr.flags |= VHOST_USER_REPLY_MASK; - payload.u64 = !!ret; hdr.size = sizeof(payload.u64); - iovec[0].iov_base = &hdr; - iovec[0].iov_len = VHOST_USER_HDR_SIZE; - iovec[1].iov_base = &payload; - iovec[1].iov_len = hdr.size; - - if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { + if (!vhost_user_send_resp(ioc, &hdr, &payload, &local_err)) { error_report_err(local_err); goto err; } diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 42f2a4bae9..89ff02a999 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -14,6 +14,7 @@ #include #include #include +#include "exec/target_page.h" #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio-net.h" @@ -23,7 +24,6 @@ #include "migration/blocker.h" #include "qemu/cutils.h" #include "qemu/main-loop.h" -#include "cpu.h" #include "trace.h" #include "qapi/error.h" @@ -31,18 +31,20 @@ * Return one past the end of the end of section. Be careful with uint64_t * conversions! */ -static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) +static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, + int page_mask) { Int128 llend = int128_make64(section->offset_within_address_space); llend = int128_add(llend, section->size); - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + llend = int128_and(llend, int128_exts64(page_mask)); return llend; } static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, uint64_t iova_min, - uint64_t iova_max) + uint64_t iova_max, + int page_mask) { Int128 llend; @@ -68,7 +70,7 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, */ if (!memory_region_is_iommu(section->mr)) { - llend = vhost_vdpa_section_end(section); + llend = vhost_vdpa_section_end(section, page_mask); if (int128_gt(llend, int128_make64(iova_max))) { error_report("RAM section out of device range (max=0x%" PRIx64 ", end addr=0x%" PRIx64 ")", @@ -311,9 +313,11 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, Int128 llend, llsize; void *vaddr; int ret; + int page_size = qemu_target_page_size(); + int page_mask = -page_size; if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, - v->iova_range.last)) { + v->iova_range.last, page_mask)) { return; } if (memory_region_is_iommu(section->mr)) { @@ -321,16 +325,16 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, return; } - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { + if (unlikely((section->offset_within_address_space & ~page_mask) != + (section->offset_within_region & ~page_mask))) { trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, - section->offset_within_address_space & ~TARGET_PAGE_MASK, - section->offset_within_region & ~TARGET_PAGE_MASK); + section->offset_within_address_space & ~page_mask, + section->offset_within_region & ~page_mask); return; } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); - llend = vhost_vdpa_section_end(section); + iova = ROUND_UP(section->offset_within_address_space, page_size); + llend = vhost_vdpa_section_end(section, page_mask); if (int128_ge(int128_make64(iova), llend)) { return; } @@ -396,25 +400,27 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, hwaddr iova; Int128 llend, llsize; int ret; + int page_size = qemu_target_page_size(); + int page_mask = -page_size; if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, - v->iova_range.last)) { + v->iova_range.last, page_mask)) { return; } if (memory_region_is_iommu(section->mr)) { vhost_vdpa_iommu_region_del(listener, section); } - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { + if (unlikely((section->offset_within_address_space & ~page_mask) != + (section->offset_within_region & ~page_mask))) { trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, - section->offset_within_address_space & ~TARGET_PAGE_MASK, - section->offset_within_region & ~TARGET_PAGE_MASK); + section->offset_within_address_space & ~page_mask, + section->offset_within_region & ~page_mask); return; } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); - llend = vhost_vdpa_section_end(section); + iova = ROUND_UP(section->offset_within_address_space, page_size); + llend = vhost_vdpa_section_end(section, page_mask); trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(int128_sub(llend, int128_one()))); @@ -876,18 +882,17 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) return idx; } -static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) +int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) { - int i; - trace_vhost_vdpa_set_vring_ready(dev); - for (i = 0; i < dev->nvqs; ++i) { - struct vhost_vring_state state = { - .index = dev->vq_index + i, - .num = 1, - }; - vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); - } - return 0; + struct vhost_dev *dev = v->dev; + struct vhost_vring_state state = { + .index = idx, + .num = 1, + }; + int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); + + trace_vhost_vdpa_set_vring_ready(dev, idx, r); + return r; } static int vhost_vdpa_set_config_call(struct vhost_dev *dev, @@ -1298,7 +1303,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) if (unlikely(!ok)) { return -1; } - vhost_vdpa_set_vring_ready(dev); } else { vhost_vdpa_suspend(dev); vhost_vdpa_svqs_stop(dev); diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index 7515b0947b..1dd96ed20f 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -17,6 +17,7 @@ #include "qapi/qapi-commands-qom.h" #include "qapi/qmp/qobject.h" #include "qapi/qmp/qjson.h" +#include "hw/virtio/vhost-user.h" #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/vhost_types.h" @@ -30,32 +31,13 @@ #include "standard-headers/linux/virtio_iommu.h" #include "standard-headers/linux/virtio_mem.h" #include "standard-headers/linux/virtio_vsock.h" +#include "standard-headers/linux/virtio_gpio.h" #include CONFIG_DEVICES #define FEATURE_ENTRY(name, desc) (qmp_virtio_feature_map_t) \ { .virtio_bit = name, .feature_desc = desc } -enum VhostUserProtocolFeature { - VHOST_USER_PROTOCOL_F_MQ = 0, - VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, - VHOST_USER_PROTOCOL_F_RARP = 2, - VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, - VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, - VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, - VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, - VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, - VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, - VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, - VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, - VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, - VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, - VHOST_USER_PROTOCOL_F_MAX -}; - /* Virtio transport features mapping */ static const qmp_virtio_feature_map_t virtio_transport_map[] = { /* Virtio device transport features */ @@ -136,6 +118,9 @@ static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = { FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS, \ "VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS: Configuration for " "memory slots supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_STATUS, \ + "VHOST_USER_PROTOCOL_F_STATUS: Querying and notifying back-end " + "device status supported"), { -1, "" } }; @@ -178,6 +163,8 @@ static const qmp_virtio_feature_map_t virtio_blk_feature_map[] = { "VIRTIO_BLK_F_DISCARD: Discard command supported"), FEATURE_ENTRY(VIRTIO_BLK_F_WRITE_ZEROES, \ "VIRTIO_BLK_F_WRITE_ZEROES: Write zeroes command supported"), + FEATURE_ENTRY(VIRTIO_BLK_F_SECURE_ERASE, \ + "VIRTIO_BLK_F_SECURE_ERASE: Secure erase supported"), FEATURE_ENTRY(VIRTIO_BLK_F_ZONED, \ "VIRTIO_BLK_F_ZONED: Zoned block devices"), #ifndef VIRTIO_BLK_NO_LEGACY @@ -301,6 +288,14 @@ static const qmp_virtio_feature_map_t virtio_net_feature_map[] = { FEATURE_ENTRY(VIRTIO_NET_F_CTRL_MAC_ADDR, \ "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control " "channel"), + FEATURE_ENTRY(VIRTIO_NET_F_NOTF_COAL, \ + "VIRTIO_NET_F_NOTF_COAL: Device supports coalescing notifications"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_USO4, \ + "VIRTIO_NET_F_GUEST_USO4: Driver can receive USOv4"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_USO6, \ + "VIRTIO_NET_F_GUEST_USO4: Driver can receive USOv6"), + FEATURE_ENTRY(VIRTIO_NET_F_HOST_USO, \ + "VIRTIO_NET_F_HOST_USO: Device can receive USO"), FEATURE_ENTRY(VIRTIO_NET_F_HASH_REPORT, \ "VIRTIO_NET_F_HASH_REPORT: Hash reporting supported"), FEATURE_ENTRY(VIRTIO_NET_F_RSS, \ @@ -471,6 +466,18 @@ static const qmp_virtio_feature_map_t virtio_rng_feature_map[] = { }; #endif +/* virtio/vhost-gpio features mapping */ +#ifdef CONFIG_VHOST_USER_GPIO +static const qmp_virtio_feature_map_t virtio_gpio_feature_map[] = { + FEATURE_ENTRY(VIRTIO_GPIO_F_IRQ, \ + "VIRTIO_GPIO_F_IRQ: Device supports interrupts on GPIO lines"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; +#endif + #define CONVERT_FEATURES(type, map, is_status, bitmap) \ ({ \ type *list = NULL; \ @@ -627,6 +634,12 @@ VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap) features->dev_features = CONVERT_FEATURES(strList, virtio_rng_feature_map, 0, bitmap); break; +#endif +#ifdef CONFIG_VHOST_USER_GPIO + case VIRTIO_ID_GPIO: + features->dev_features = + CONVERT_FEATURES(strList, virtio_gpio_feature_map, 0, bitmap); + break; #endif /* No features */ case VIRTIO_ID_9P: @@ -653,7 +666,6 @@ VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap) case VIRTIO_ID_DMABUF: case VIRTIO_ID_PARAM_SERV: case VIRTIO_ID_AUDIO_POLICY: - case VIRTIO_ID_GPIO: break; default: g_assert_not_reached(); @@ -667,70 +679,43 @@ VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap) return features; } +static int query_dev_child(Object *child, void *opaque) +{ + VirtioInfoList **vdevs = opaque; + Object *dev = object_dynamic_cast(child, TYPE_VIRTIO_DEVICE); + if (dev != NULL && DEVICE(dev)->realized) { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtioInfo *info = g_new(VirtioInfo, 1); + + /* Get canonical path & name of device */ + info->path = object_get_canonical_path(dev); + info->name = g_strdup(vdev->name); + QAPI_LIST_PREPEND(*vdevs, info); + } + return 0; +} + VirtioInfoList *qmp_x_query_virtio(Error **errp) { - VirtioInfoList *list = NULL; - VirtioInfo *node; - VirtIODevice *vdev; + VirtioInfoList *vdevs = NULL; - QTAILQ_FOREACH(vdev, &virtio_list, next) { - DeviceState *dev = DEVICE(vdev); - Error *err = NULL; - QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err); - - if (err == NULL) { - GString *is_realized = qobject_to_json_pretty(obj, true); - /* virtio device is NOT realized, remove it from list */ - if (!strncmp(is_realized->str, "false", 4)) { - QTAILQ_REMOVE(&virtio_list, vdev, next); - } else { - node = g_new(VirtioInfo, 1); - node->path = g_strdup(dev->canonical_path); - node->name = g_strdup(vdev->name); - QAPI_LIST_PREPEND(list, node); - } - g_string_free(is_realized, true); - } - qobject_unref(obj); + /* Query the QOM composition tree recursively for virtio devices */ + object_child_foreach_recursive(object_get_root(), query_dev_child, &vdevs); + if (vdevs == NULL) { + error_setg(errp, "No virtio devices found"); } - - return list; + return vdevs; } VirtIODevice *qmp_find_virtio_device(const char *path) { - VirtIODevice *vdev; - - QTAILQ_FOREACH(vdev, &virtio_list, next) { - DeviceState *dev = DEVICE(vdev); - - if (strcmp(dev->canonical_path, path) != 0) { - continue; - } - - Error *err = NULL; - QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err); - if (err == NULL) { - GString *is_realized = qobject_to_json_pretty(obj, true); - /* virtio device is NOT realized, remove it from list */ - if (!strncmp(is_realized->str, "false", 4)) { - g_string_free(is_realized, true); - qobject_unref(obj); - QTAILQ_REMOVE(&virtio_list, vdev, next); - return NULL; - } - g_string_free(is_realized, true); - } else { - /* virtio device doesn't exist in QOM tree */ - QTAILQ_REMOVE(&virtio_list, vdev, next); - qobject_unref(obj); - return NULL; - } - /* device exists in QOM tree & is realized */ - qobject_unref(obj); - return vdev; + /* Verify the canonical path is a realized virtio device */ + Object *dev = object_dynamic_cast(object_resolve_path(path, NULL), + TYPE_VIRTIO_DEVICE); + if (!dev || !DEVICE(dev)->realized) { + return NULL; } - return NULL; + return VIRTIO_DEVICE(dev); } VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp) @@ -740,7 +725,7 @@ VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp) vdev = qmp_find_virtio_device(path); if (vdev == NULL) { - error_setg(errp, "Path %s is not a VirtIODevice", path); + error_setg(errp, "Path %s is not a realized VirtIODevice", path); return NULL; } diff --git a/hw/virtio/virtio-qmp.h b/hw/virtio/virtio-qmp.h index 8af5f5e65a..245a446a56 100644 --- a/hw/virtio/virtio-qmp.h +++ b/hw/virtio/virtio-qmp.h @@ -15,13 +15,6 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/vhost.h" -#include "qemu/queue.h" - -typedef QTAILQ_HEAD(QmpVirtIODeviceList, VirtIODevice) QmpVirtIODeviceList; - -/* QAPI list of realized VirtIODevices */ -extern QmpVirtIODeviceList virtio_list; - VirtIODevice *qmp_find_virtio_device(const char *path); VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap); VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap); diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 4577f3f5b3..6facd64fbc 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -45,8 +45,6 @@ #include "standard-headers/linux/virtio_mem.h" #include "standard-headers/linux/virtio_vsock.h" -QmpVirtIODeviceList virtio_list; - /* * Maximum size of virtio device config space */ @@ -999,7 +997,12 @@ void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, /* Called within rcu_read_lock(). */ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) { - uint16_t num_heads = vring_avail_idx(vq) - idx; + uint16_t avail_idx, num_heads; + + /* Use shadow index whenever possible. */ + avail_idx = (vq->shadow_avail_idx != idx) ? vq->shadow_avail_idx + : vring_avail_idx(vq); + num_heads = avail_idx - idx; /* Check it isn't doing very strange things with descriptor numbers. */ if (num_heads > vq->vring.num) { @@ -1007,8 +1010,15 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) idx, vq->shadow_avail_idx); return -EINVAL; } - /* On success, callers read a descriptor at vq->last_avail_idx. - * Make sure descriptor read does not bypass avail index read. */ + /* + * On success, callers read a descriptor at vq->last_avail_idx. + * Make sure descriptor read does not bypass avail index read. + * + * This is necessary even if we are using a shadow index, since + * the shadow index could have been initialized by calling + * vring_avail_idx() outside of this function, i.e., by a guest + * memory read not accompanied by a barrier. + */ if (num_heads) { smp_rmb(); } @@ -1039,9 +1049,10 @@ enum { VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */ }; +/* Reads the 'desc->next' descriptor into '*desc'. */ static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, MemoryRegionCache *desc_cache, - unsigned int max, unsigned int *next) + unsigned int max) { /* If this descriptor says it doesn't chain, we're done. */ if (!(desc->flags & VRING_DESC_F_NEXT)) { @@ -1049,16 +1060,12 @@ static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, } /* Check they're not leading us off end of descriptors. */ - *next = desc->next; - /* Make sure compiler knows to grab that: we don't want it changing! */ - smp_wmb(); - - if (*next >= max) { - virtio_error(vdev, "Desc next is %u", *next); + if (desc->next >= max) { + virtio_error(vdev, "Desc next is %u", desc->next); return VIRTQUEUE_READ_DESC_ERROR; } - vring_split_desc_read(vdev, desc, desc_cache, *next); + vring_split_desc_read(vdev, desc, desc_cache, desc->next); return VIRTQUEUE_READ_DESC_MORE; } @@ -1071,10 +1078,12 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq, VirtIODevice *vdev = vq->vdev; unsigned int idx; unsigned int total_bufs, in_total, out_total; - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + MemoryRegionCache indirect_desc_cache; int64_t len = 0; int rc; + address_space_cache_init_empty(&indirect_desc_cache); + idx = vq->last_avail_idx; total_bufs = in_total = out_total = 0; @@ -1136,7 +1145,7 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq, goto done; } - rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i); + rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max); } while (rc == VIRTQUEUE_READ_DESC_MORE); if (rc == VIRTQUEUE_READ_DESC_ERROR) { @@ -1207,12 +1216,14 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, VirtIODevice *vdev = vq->vdev; unsigned int idx; unsigned int total_bufs, in_total, out_total; + MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; int64_t len = 0; VRingPackedDesc desc; bool wrap_counter; + address_space_cache_init_empty(&indirect_desc_cache); + idx = vq->last_avail_idx; wrap_counter = vq->last_avail_wrap_counter; total_bufs = in_total = out_total = 0; @@ -1487,7 +1498,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) { unsigned int i, head, max; VRingMemoryRegionCaches *caches; - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; int64_t len; VirtIODevice *vdev = vq->vdev; @@ -1498,6 +1509,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) VRingDesc desc; int rc; + address_space_cache_init_empty(&indirect_desc_cache); + RCU_READ_LOCK_GUARD(); if (virtio_queue_empty_rcu(vq)) { goto done; @@ -1587,7 +1600,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) goto err_undo_map; } - rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i); + rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max); } while (rc == VIRTQUEUE_READ_DESC_MORE); if (rc == VIRTQUEUE_READ_DESC_ERROR) { @@ -1624,7 +1637,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) { unsigned int i, max; VRingMemoryRegionCaches *caches; - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; int64_t len; VirtIODevice *vdev = vq->vdev; @@ -1636,6 +1649,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) uint16_t id; int rc; + address_space_cache_init_empty(&indirect_desc_cache); + RCU_READ_LOCK_GUARD(); if (virtio_queue_packed_empty_rcu(vq)) { goto done; @@ -3651,7 +3666,6 @@ static void virtio_device_realize(DeviceState *dev, Error **errp) vdev->listener.commit = virtio_memory_listener_commit; vdev->listener.name = "virtio"; memory_listener_register(&vdev->listener, vdev->dma_as); - QTAILQ_INSERT_TAIL(&virtio_list, vdev, next); } static void virtio_device_unrealize(DeviceState *dev) @@ -3666,7 +3680,6 @@ static void virtio_device_unrealize(DeviceState *dev) vdc->unrealize(dev); } - QTAILQ_REMOVE(&virtio_list, vdev, next); g_free(vdev->bus_name); vdev->bus_name = NULL; } @@ -3840,8 +3853,6 @@ static void virtio_device_class_init(ObjectClass *klass, void *data) vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl; vdc->legacy_features |= VIRTIO_LEGACY_FEATURES; - - QTAILQ_INIT(&virtio_list); } bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev) @@ -3970,13 +3981,15 @@ VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path, } else { unsigned int head, i, max; VRingMemoryRegionCaches *caches; - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; VRingDesc desc; VirtioRingDescList *list = NULL; VirtioRingDescList *node; int rc; int ndescs; + address_space_cache_init_empty(&indirect_desc_cache); + RCU_READ_LOCK_GUARD(); max = vq->vring.num; @@ -4041,8 +4054,7 @@ VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path, list = node; ndescs++; - rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, - max, &i); + rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max); } while (rc == VIRTQUEUE_READ_DESC_MORE); element->descs = list; done: diff --git a/include/exec/memory.h b/include/exec/memory.h index ef23d65afc..c99842d2fc 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -2671,9 +2671,6 @@ struct MemoryRegionCache { bool is_write; }; -#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mrs.mr = NULL }) - - /* address_space_ld*_cached: load from a cached #MemoryRegion * address_space_st*_cached: store into a cached #MemoryRegion * @@ -2762,6 +2759,19 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, hwaddr len, bool is_write); +/** + * address_space_cache_init_empty: Initialize empty #MemoryRegionCache + * + * @cache: The #MemoryRegionCache to operate on. + * + * Initializes #MemoryRegionCache structure without memory region attached. + * Cache initialized this way can only be safely destroyed, but not used. + */ +static inline void address_space_cache_init_empty(MemoryRegionCache *cache) +{ + cache->mrs.mr = NULL; +} + /** * address_space_cache_invalidate: complete a write to a #MemoryRegionCache * diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h index a1648220ff..68d9d15f50 100644 --- a/include/hw/acpi/acpi_dev_interface.h +++ b/include/hw/acpi/acpi_dev_interface.h @@ -3,7 +3,6 @@ #include "qapi/qapi-types-acpi.h" #include "qom/object.h" -#include "hw/boards.h" #include "hw/qdev-core.h" /* These values are part of guest ABI, and can not be changed */ @@ -52,7 +51,5 @@ struct AcpiDeviceIfClass { /* */ void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev); - void (*madt_cpu)(int uid, const CPUArchIdList *apic_ids, GArray *entry, - bool force_enabled); }; #endif diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h index 999caaf510..bc901660fb 100644 --- a/include/hw/acpi/cpu.h +++ b/include/hw/acpi/cpu.h @@ -15,6 +15,7 @@ #include "hw/qdev-core.h" #include "hw/acpi/acpi.h" #include "hw/acpi/aml-build.h" +#include "hw/boards.h" #include "hw/hotplug.h" typedef struct AcpiCpuStatus { @@ -55,8 +56,11 @@ typedef struct CPUHotplugFeatures { const char *smi_path; } CPUHotplugFeatures; +typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids, + GArray *entry, bool force_enabled); + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - hwaddr io_base, + build_madt_cpu_fn build_madt_cpu, hwaddr io_base, const char *res_root, const char *event_handler_method); diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h index d831bbd889..ba84ce0214 100644 --- a/include/hw/acpi/generic_event_device.h +++ b/include/hw/acpi/generic_event_device.h @@ -69,8 +69,6 @@ #define TYPE_ACPI_GED "acpi-ged" OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED) -#define TYPE_ACPI_GED_X86 "acpi-ged-x86" - #define ACPI_GED_EVT_SEL_OFFSET 0x0 #define ACPI_GED_EVT_SEL_LEN 0x4 diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index 42c7e581a7..3c795a6278 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -135,6 +135,10 @@ REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18) REG32(CXL_HDM_DECODER##n##_TARGET_LIST_LO, \ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x24) \ REG32(CXL_HDM_DECODER##n##_TARGET_LIST_HI, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x28) \ + REG32(CXL_HDM_DECODER##n##_DPA_SKIP_LO, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x24) \ + REG32(CXL_HDM_DECODER##n##_DPA_SKIP_HI, \ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x28) REG32(CXL_HDM_DECODER_CAPABILITY, CXL_HDM_REGISTERS_OFFSET) @@ -147,7 +151,13 @@ REG32(CXL_HDM_DECODER_GLOBAL_CONTROL, CXL_HDM_REGISTERS_OFFSET + 4) FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, POISON_ON_ERR_EN, 0, 1) FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, HDM_DECODER_ENABLE, 1, 1) +/* Support 4 decoders at all levels of topology */ +#define CXL_HDM_DECODER_COUNT 4 + HDM_DECODER_INIT(0); +HDM_DECODER_INIT(1); +HDM_DECODER_INIT(2); +HDM_DECODER_INIT(3); /* 8.2.5.13 - CXL Extended Security Capability Structure (Root complex only) */ #define EXTSEC_ENTRY_MAX 256 @@ -225,26 +235,14 @@ void cxl_component_create_dvsec(CXLComponentState *cxl_cstate, enum reg_type cxl_dev_type, uint16_t length, uint16_t type, uint8_t rev, uint8_t *body); -static inline int cxl_decoder_count_enc(int count) -{ - switch (count) { - case 1: return 0; - case 2: return 1; - case 4: return 2; - case 6: return 3; - case 8: return 4; - case 10: return 5; - } - return 0; -} +int cxl_decoder_count_enc(int count); +int cxl_decoder_count_dec(int enc_cnt); uint8_t cxl_interleave_ways_enc(int iw, Error **errp); +int cxl_interleave_ways_dec(uint8_t iw_enc, Error **errp); uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp); -static inline hwaddr cxl_decode_ig(int ig) -{ - return 1ULL << (ig + 8); -} +hwaddr cxl_decode_ig(int ig); CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb); bool cxl_get_hb_passthrough(PCIHostState *hb); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 0fabece236..bec38cb92c 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -129,6 +129,12 @@ struct PCMachineClass { /* resizable acpi blob compat */ bool resizable_acpi_blob; + + /* + * whether the machine type implements broken 32-bit address space bound + * check for memory. + */ + bool broken_32bit_mem_addr_check; }; #define TYPE_PC_MACHINE "generic-pc-machine" diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index ea54a81a15..5cd452115a 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -77,6 +77,9 @@ struct PCIBridge { pci_map_irq_fn map_irq; const char *bus_name; + + /* SLT is RO for PCIE to PCIE bridges, but old QEMU versions had it RW */ + bool pcie_writeable_slt_bug; }; #define PCI_BRIDGE_DEV_PROP_CHASSIS_NR "chassis_nr" diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 31a251a9f5..1860b541d8 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -196,4 +196,7 @@ int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd); +int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, + int *dmabuf_fd); + #endif /* VHOST_BACKEND_H */ diff --git a/include/hw/virtio/vhost-user-device.h b/include/hw/virtio/vhost-user-device.h new file mode 100644 index 0000000000..3ddf88a146 --- /dev/null +++ b/include/hw/virtio/vhost-user-device.h @@ -0,0 +1,46 @@ +/* + * Vhost-user generic virtio device + * + * Copyright (c) 2023 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef QEMU_VHOST_USER_DEVICE_H +#define QEMU_VHOST_USER_DEVICE_H + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" + +#define TYPE_VHOST_USER_BASE "vhost-user-base" + +OBJECT_DECLARE_TYPE(VHostUserBase, VHostUserBaseClass, VHOST_USER_BASE) + +struct VHostUserBase { + VirtIODevice parent; + /* Properties */ + CharBackend chardev; + uint16_t virtio_id; + uint32_t num_vqs; + uint32_t config_size; + /* State tracking */ + VhostUserState vhost_user; + struct vhost_virtqueue *vhost_vq; + struct vhost_dev vhost_dev; + GPtrArray *vqs; + bool connected; +}; + + /* needed so we can use the base realize after specialisation + tweaks */ +struct VHostUserBaseClass { + /*< private >*/ + VirtioDeviceClass parent_class; + /*< public >*/ + DeviceRealize parent_realize; +}; + +/* shared for the benefit of the derived pci class */ +#define TYPE_VHOST_USER_DEVICE "vhost-user-device" + +#endif /* QEMU_VHOST_USER_DEVICE_H */ diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index 191216a74f..9f9ddf878d 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -11,6 +11,28 @@ #include "chardev/char-fe.h" #include "hw/virtio/virtio.h" +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_MQ = 0, + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, + VHOST_USER_PROTOCOL_F_RARP = 2, + VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, + VHOST_USER_PROTOCOL_F_NET_MTU = 4, + VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, + VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, + VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, + VHOST_USER_PROTOCOL_F_CONFIG = 9, + VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, + VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, + VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, + VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, + VHOST_USER_PROTOCOL_F_STATUS = 16, + VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17, + VHOST_USER_PROTOCOL_F_MAX +}; + /** * VhostUserHostNotifier - notifier information for one queue * @rcu: rcu_head for cleanup diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index e64bfc7f98..5407d54fd7 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -57,6 +57,7 @@ typedef struct vhost_vdpa { } VhostVDPA; int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); +int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, hwaddr size, void *vaddr, bool readonly); diff --git a/include/hw/virtio/virtio-dmabuf.h b/include/hw/virtio/virtio-dmabuf.h new file mode 100644 index 0000000000..627c3b6db7 --- /dev/null +++ b/include/hw/virtio/virtio-dmabuf.h @@ -0,0 +1,100 @@ +/* + * Virtio Shared dma-buf + * + * Copyright Red Hat, Inc. 2023 + * + * Authors: + * Albert Esteve + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef VIRTIO_DMABUF_H +#define VIRTIO_DMABUF_H + +#include "qemu/uuid.h" +#include "vhost.h" + +typedef enum SharedObjectType { + TYPE_INVALID = 0, + TYPE_DMABUF, + TYPE_VHOST_DEV, +} SharedObjectType; + +typedef struct VirtioSharedObject { + SharedObjectType type; + gpointer value; +} VirtioSharedObject; + +/** + * virtio_add_dmabuf() - Add a new dma-buf resource to the lookup table + * @uuid: new resource's UUID + * @dmabuf_fd: the dma-buf descriptor that will be stored and shared with + * other virtio devices. The caller retains ownership over the + * descriptor and its lifecycle. + * + * Note: @dmabuf_fd must be a valid (non-negative) file descriptor. + * + * Return: true if the UUID did not exist and the resource has been added, + * false if another resource with the same UUID already existed. + * Note that if it finds a repeated UUID, the resource is not inserted in + * the lookup table. + */ +bool virtio_add_dmabuf(QemuUUID *uuid, int dmabuf_fd); + +/** + * virtio_add_vhost_device() - Add a new exporter vhost device that holds the + * resource with the associated UUID + * @uuid: new resource's UUID + * @dev: the pointer to the vhost device that holds the resource. The caller + * retains ownership over the device struct and its lifecycle. + * + * Return: true if the UUID did not exist and the device has been tracked, + * false if another resource with the same UUID already existed. + * Note that if it finds a repeated UUID, the resource is not inserted in + * the lookup table. + */ +bool virtio_add_vhost_device(QemuUUID *uuid, struct vhost_dev *dev); + +/** + * virtio_remove_resource() - Removes a resource from the lookup table + * @uuid: resource's UUID + * + * Return: true if the UUID has been found and removed from the lookup table. + */ +bool virtio_remove_resource(const QemuUUID *uuid); + +/** + * virtio_lookup_dmabuf() - Looks for a dma-buf resource in the lookup table + * @uuid: resource's UUID + * + * Return: the dma-buf file descriptor integer, or -1 if the key is not found. + */ +int virtio_lookup_dmabuf(const QemuUUID *uuid); + +/** + * virtio_lookup_vhost_device() - Looks for an exporter vhost device in the + * lookup table + * @uuid: resource's UUID + * + * Return: pointer to the vhost_dev struct, or NULL if the key is not found. + */ +struct vhost_dev *virtio_lookup_vhost_device(const QemuUUID *uuid); + +/** + * virtio_object_type() - Looks for the type of resource in the lookup table + * @uuid: resource's UUID + * + * Return: the type of resource associated with the UUID, or TYPE_INVALID if + * the key is not found. + */ +SharedObjectType virtio_object_type(const QemuUUID *uuid); + +/** + * virtio_free_resources() - Destroys all keys and values of the shared + * resources lookup table, and frees them + */ +void virtio_free_resources(void); + +#endif /* VIRTIO_DMABUF_H */ diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index e07a723027..55977f01f0 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -38,6 +38,12 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) /* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */ #define MAC_TABLE_ENTRIES 64 +/* + * The maximum number of VLANs in the VLAN filter table + * added by VIRTIO_NET_CTRL_VLAN_ADD + */ +#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ + typedef struct virtio_net_conf { uint32_t txtimer; diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h index dc40ee1fc9..e24a1099e4 100644 --- a/include/qemu/uuid.h +++ b/include/qemu/uuid.h @@ -96,4 +96,6 @@ int qemu_uuid_parse(const char *str, QemuUUID *uuid); QemuUUID qemu_uuid_bswap(QemuUUID uuid); +uint32_t qemu_uuid_hash(const void *uuid); + #endif diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 4e94c50bc7..939c984d5b 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -114,6 +114,7 @@ static const uint64_t vdpa_svq_device_features = BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | BIT_ULL(VIRTIO_NET_F_CTRL_RX) | + BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_F_ANY_LAYOUT) | @@ -374,6 +375,22 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) return 0; } +static int vhost_vdpa_net_data_load(NetClientState *nc) +{ + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + struct vhost_vdpa *v = &s->vhost_vdpa; + bool has_cvq = v->dev->vq_index_end % 2; + + if (has_cvq) { + return 0; + } + + for (int i = 0; i < v->dev->nvqs; ++i) { + vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); + } + return 0; +} + static void vhost_vdpa_net_client_stop(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); @@ -388,6 +405,8 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc) dev = s->vhost_vdpa.dev; if (dev->vq_index + dev->nvqs == dev->vq_index_end) { g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); + } else { + s->vhost_vdpa.iova_tree = NULL; } } @@ -396,6 +415,7 @@ static NetClientInfo net_vhost_vdpa_info = { .size = sizeof(VhostVDPAState), .receive = vhost_vdpa_receive, .start = vhost_vdpa_net_data_start, + .load = vhost_vdpa_net_data_load, .stop = vhost_vdpa_net_client_stop, .cleanup = vhost_vdpa_cleanup, .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, @@ -508,7 +528,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) s0 = vhost_vdpa_net_first_nc_vdpa(s); v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; - v->shadow_vqs_enabled = s->always_svq; + v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled; s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; if (s->vhost_vdpa.shadow_data) { @@ -627,7 +647,7 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, * descriptor. Also, we need to take the answer before SVQ pulls by itself, * when BQL is released */ - return vhost_svq_poll(svq); + return vhost_svq_poll(svq, 1); } static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, @@ -968,7 +988,51 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, return 0; } -static int vhost_vdpa_net_load(NetClientState *nc) +static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s, + const VirtIONet *n, + uint16_t vid) +{ + const struct iovec data = { + .iov_base = &vid, + .iov_len = sizeof(vid), + }; + ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_VLAN, + VIRTIO_NET_CTRL_VLAN_ADD, + &data, 1); + if (unlikely(dev_written < 0)) { + return dev_written; + } + if (unlikely(*s->status != VIRTIO_NET_OK)) { + return -EIO; + } + + return 0; +} + +static int vhost_vdpa_net_load_vlan(VhostVDPAState *s, + const VirtIONet *n) +{ + int r; + + if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_VLAN)) { + return 0; + } + + for (int i = 0; i < MAX_VLAN >> 5; i++) { + for (int j = 0; n->vlans[i] && j <= 0x1f; j++) { + if (n->vlans[i] & (1U << j)) { + r = vhost_vdpa_net_load_single_vlan(s, n, (i << 5) + j); + if (unlikely(r != 0)) { + return r; + } + } + } + } + + return 0; +} + +static int vhost_vdpa_net_cvq_load(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); struct vhost_vdpa *v = &s->vhost_vdpa; @@ -977,26 +1041,34 @@ static int vhost_vdpa_net_load(NetClientState *nc) assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - if (!v->shadow_vqs_enabled) { - return 0; + vhost_vdpa_set_vring_ready(v, v->dev->vq_index); + + if (v->shadow_vqs_enabled) { + n = VIRTIO_NET(v->dev->vdev); + r = vhost_vdpa_net_load_mac(s, n); + if (unlikely(r < 0)) { + return r; + } + r = vhost_vdpa_net_load_mq(s, n); + if (unlikely(r)) { + return r; + } + r = vhost_vdpa_net_load_offloads(s, n); + if (unlikely(r)) { + return r; + } + r = vhost_vdpa_net_load_rx(s, n); + if (unlikely(r)) { + return r; + } + r = vhost_vdpa_net_load_vlan(s, n); + if (unlikely(r)) { + return r; + } } - n = VIRTIO_NET(v->dev->vdev); - r = vhost_vdpa_net_load_mac(s, n); - if (unlikely(r < 0)) { - return r; - } - r = vhost_vdpa_net_load_mq(s, n); - if (unlikely(r)) { - return r; - } - r = vhost_vdpa_net_load_offloads(s, n); - if (unlikely(r)) { - return r; - } - r = vhost_vdpa_net_load_rx(s, n); - if (unlikely(r)) { - return r; + for (int i = 0; i < v->dev->vq_index; ++i) { + vhost_vdpa_set_vring_ready(v, i); } return 0; @@ -1007,7 +1079,7 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { .size = sizeof(VhostVDPAState), .receive = vhost_vdpa_receive, .start = vhost_vdpa_net_cvq_start, - .load = vhost_vdpa_net_load, + .load = vhost_vdpa_net_cvq_load, .stop = vhost_vdpa_net_cvq_stop, .cleanup = vhost_vdpa_cleanup, .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, @@ -1273,8 +1345,7 @@ static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, uint64_t backend_features; int64_t cvq_group; uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | - VIRTIO_CONFIG_S_DRIVER | - VIRTIO_CONFIG_S_FEATURES_OK; + VIRTIO_CONFIG_S_DRIVER; int r; ERRP_GUARD(); @@ -1289,14 +1360,22 @@ static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, return 0; } - r = ioctl(device_fd, VHOST_SET_FEATURES, &features); - if (unlikely(r)) { - error_setg_errno(errp, errno, "Cannot set features"); - } - r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); if (unlikely(r)) { - error_setg_errno(errp, -r, "Cannot set device features"); + error_setg_errno(errp, -r, "Cannot set device status"); + goto out; + } + + r = ioctl(device_fd, VHOST_SET_FEATURES, &features); + if (unlikely(r)) { + error_setg_errno(errp, -r, "Cannot set features"); + goto out; + } + + status |= VIRTIO_CONFIG_S_FEATURES_OK; + r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); + if (unlikely(r)) { + error_setg_errno(errp, -r, "Cannot set device status"); goto out; } @@ -1355,7 +1434,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, VhostVDPAState *s; int ret = 0; assert(name); - int cvq_isolated; + int cvq_isolated = 0; if (is_datapath) { nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, @@ -1395,18 +1474,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; s->vhost_vdpa.shadow_vq_ops_opaque = s; s->cvq_isolated = cvq_isolated; - - /* - * TODO: We cannot migrate devices with CVQ and no x-svq enabled as - * there is no way to set the device state (MAC, MQ, etc) before - * starting the datapath. - * - * Migration blocker ownership now belongs to s->vhost_vdpa. - */ - if (!svq) { - error_setg(&s->vhost_vdpa.migration_blocker, - "net vdpa cannot migrate with CVQ feature"); - } } ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); if (ret) { diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 0469a50101..051a611da3 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -161,6 +161,7 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_GET_MAX_MEM_SLOTS), REQ(VHOST_USER_ADD_MEM_REG), REQ(VHOST_USER_REM_MEM_REG), + REQ(VHOST_USER_GET_SHARED_OBJECT), REQ(VHOST_USER_MAX), }; #undef REQ @@ -322,6 +323,7 @@ vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { fd_size = cmsg->cmsg_len - CMSG_LEN(0); vmsg->fd_num = fd_size / sizeof(int); + assert(fd_size < VHOST_MEMORY_BASELINE_NREGIONS); memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); break; } @@ -900,6 +902,24 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { return false; } +static bool +vu_get_shared_object(VuDev *dev, VhostUserMsg *vmsg) +{ + int fd_num = 0; + int dmabuf_fd = -1; + if (dev->iface->get_shared_object) { + dmabuf_fd = dev->iface->get_shared_object( + dev, &vmsg->payload.object.uuid[0]); + } + if (dmabuf_fd != -1) { + DPRINT("dmabuf_fd found for requested UUID\n"); + vmsg->fds[fd_num++] = dmabuf_fd; + } + vmsg->fd_num = fd_num; + + return true; +} + static bool vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) { @@ -1403,6 +1423,105 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, return vu_process_message_reply(dev, &vmsg); } +bool +vu_lookup_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN], + int *dmabuf_fd) +{ + bool result = false; + VhostUserMsg msg_reply; + VhostUserMsg msg = { + .request = VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP, + .size = sizeof(msg.payload.object), + .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + }; + + memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN); + + if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) { + return false; + } + + pthread_mutex_lock(&dev->backend_mutex); + if (!vu_message_write(dev, dev->backend_fd, &msg)) { + goto out; + } + + if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) { + goto out; + } + + if (msg_reply.request != msg.request) { + DPRINT("Received unexpected msg type. Expected %d, received %d", + msg.request, msg_reply.request); + goto out; + } + + if (msg_reply.fd_num != 1) { + DPRINT("Received unexpected number of fds. Expected 1, received %d", + msg_reply.fd_num); + goto out; + } + + *dmabuf_fd = msg_reply.fds[0]; + result = *dmabuf_fd > 0 && msg_reply.payload.u64 == 0; +out: + pthread_mutex_unlock(&dev->backend_mutex); + + return result; +} + +static bool +vu_send_message(VuDev *dev, VhostUserMsg *vmsg) +{ + bool result = false; + pthread_mutex_lock(&dev->backend_mutex); + if (!vu_message_write(dev, dev->backend_fd, vmsg)) { + goto out; + } + + result = true; +out: + pthread_mutex_unlock(&dev->backend_mutex); + + return result; +} + +bool +vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]) +{ + VhostUserMsg msg = { + .request = VHOST_USER_BACKEND_SHARED_OBJECT_ADD, + .size = sizeof(msg.payload.object), + .flags = VHOST_USER_VERSION, + }; + + memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN); + + if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) { + return false; + } + + return vu_send_message(dev, &msg); +} + +bool +vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]) +{ + VhostUserMsg msg = { + .request = VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE, + .size = sizeof(msg.payload.object), + .flags = VHOST_USER_VERSION, + }; + + memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN); + + if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) { + return false; + } + + return vu_send_message(dev, &msg); +} + static bool vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg) { @@ -1943,6 +2062,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_add_mem_reg(dev, vmsg); case VHOST_USER_REM_MEM_REG: return vu_rem_mem_reg(dev, vmsg); + case VHOST_USER_GET_SHARED_OBJECT: + return vu_get_shared_object(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h index 708370c5f5..b36a42a7ca 100644 --- a/subprojects/libvhost-user/libvhost-user.h +++ b/subprojects/libvhost-user/libvhost-user.h @@ -64,7 +64,8 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, - + /* Feature 16 is reserved for VHOST_USER_PROTOCOL_F_STATUS. */ + VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17, VHOST_USER_PROTOCOL_F_MAX }; @@ -109,6 +110,7 @@ typedef enum VhostUserRequest { VHOST_USER_GET_MAX_MEM_SLOTS = 36, VHOST_USER_ADD_MEM_REG = 37, VHOST_USER_REM_MEM_REG = 38, + VHOST_USER_GET_SHARED_OBJECT = 41, VHOST_USER_MAX } VhostUserRequest; @@ -119,6 +121,9 @@ typedef enum VhostUserBackendRequest { VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, VHOST_USER_BACKEND_VRING_CALL = 4, VHOST_USER_BACKEND_VRING_ERR = 5, + VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, + VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, + VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, VHOST_USER_BACKEND_MAX } VhostUserBackendRequest; @@ -172,6 +177,12 @@ typedef struct VhostUserInflight { uint16_t queue_size; } VhostUserInflight; +#define UUID_LEN 16 + +typedef struct VhostUserShared { + unsigned char uuid[UUID_LEN]; +} VhostUserShared; + #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) # define VU_PACKED __attribute__((gcc_struct, packed)) #else @@ -199,6 +210,7 @@ typedef struct VhostUserMsg { VhostUserConfig config; VhostUserVringArea area; VhostUserInflight inflight; + VhostUserShared object; } payload; int fds[VHOST_MEMORY_BASELINE_NREGIONS]; @@ -232,6 +244,7 @@ typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len); typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data, uint32_t offset, uint32_t size, uint32_t flags); +typedef int (*vu_get_shared_object_cb) (VuDev *dev, const unsigned char *uuid); typedef struct VuDevIface { /* called by VHOST_USER_GET_FEATURES to get the features bitmask */ @@ -258,6 +271,8 @@ typedef struct VuDevIface { vu_get_config_cb get_config; /* set the config space of the device */ vu_set_config_cb set_config; + /* get virtio shared object from the underlying vhost implementation. */ + vu_get_shared_object_cb get_shared_object; } VuDevIface; typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx); @@ -541,6 +556,44 @@ void vu_set_queue_handler(VuDev *dev, VuVirtq *vq, bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, int size, int offset); +/** + * vu_lookup_shared_object: + * @dev: a VuDev context + * @uuid: UUID of the shared object + * @dmabuf_fd: output dma-buf file descriptor + * + * Lookup for a virtio shared object (i.e., dma-buf fd) associated with the + * received UUID. Result, if found, is stored in the dmabuf_fd argument. + * + * Returns: whether the virtio object was found. + */ +bool vu_lookup_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN], + int *dmabuf_fd); + +/** + * vu_add_shared_object: + * @dev: a VuDev context + * @uuid: UUID of the shared object + * + * Registers this back-end as the exporter for the object associated with + * the received UUID. + * + * Returns: TRUE on success, FALSE on failure. + */ +bool vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]); + +/** + * vu_rm_shared_object: + * @dev: a VuDev context + * @uuid: UUID of the shared object + * + * Removes a shared object entry (i.e., back-end entry) associated with the + * received UUID key from the hash table. + * + * Returns: TRUE on success, FALSE on failure. + */ +bool vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]); + /** * vu_queue_set_notification: * @dev: a VuDev context diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index d1b80149f2..f8e03dfd46 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -2080,7 +2080,6 @@ int main(int argc, char *argv[]) test_acpi_piix4_no_acpi_pci_hotplug); qtest_add_func("acpi/piix4/ipmi", test_acpi_piix4_tcg_ipmi); qtest_add_func("acpi/piix4/cpuhp", test_acpi_piix4_tcg_cphp); - qtest_add_func("acpi/piix4/memhp", test_acpi_piix4_tcg_memhp); qtest_add_func("acpi/piix4/numamem", test_acpi_piix4_tcg_numamem); qtest_add_func("acpi/piix4/nosmm", test_acpi_piix4_tcg_nosmm); qtest_add_func("acpi/piix4/smm-compat", @@ -2088,9 +2087,15 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/piix4/smm-compat-nosmm", test_acpi_piix4_tcg_smm_compat_nosmm); qtest_add_func("acpi/piix4/nohpet", test_acpi_piix4_tcg_nohpet); - qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm); - qtest_add_func("acpi/piix4/acpihmat", - test_acpi_piix4_tcg_acpi_hmat); + + /* i386 does not support memory hotplug */ + if (strcmp(arch, "i386")) { + qtest_add_func("acpi/piix4/memhp", test_acpi_piix4_tcg_memhp); + qtest_add_func("acpi/piix4/dimmpxm", + test_acpi_piix4_tcg_dimm_pxm); + qtest_add_func("acpi/piix4/acpihmat", + test_acpi_piix4_tcg_acpi_hmat); + } #ifdef CONFIG_POSIX qtest_add_func("acpi/piix4/acpierst", test_acpi_piix4_acpi_erst); #endif @@ -2108,11 +2113,9 @@ int main(int argc, char *argv[]) test_acpi_q35_tcg_no_acpi_hotplug); qtest_add_func("acpi/q35/multif-bridge", test_acpi_q35_multif_bridge); - qtest_add_func("acpi/q35/mmio64", test_acpi_q35_tcg_mmio64); qtest_add_func("acpi/q35/ipmi", test_acpi_q35_tcg_ipmi); qtest_add_func("acpi/q35/smbus/ipmi", test_acpi_q35_tcg_smbus_ipmi); qtest_add_func("acpi/q35/cpuhp", test_acpi_q35_tcg_cphp); - qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp); qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem); qtest_add_func("acpi/q35/nosmm", test_acpi_q35_tcg_nosmm); qtest_add_func("acpi/q35/smm-compat", @@ -2120,10 +2123,17 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/q35/smm-compat-nosmm", test_acpi_q35_tcg_smm_compat_nosmm); qtest_add_func("acpi/q35/nohpet", test_acpi_q35_tcg_nohpet); - qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm); - qtest_add_func("acpi/q35/acpihmat", test_acpi_q35_tcg_acpi_hmat); qtest_add_func("acpi/q35/acpihmat-noinitiator", test_acpi_q35_tcg_acpi_hmat_noinitiator); + + /* i386 does not support memory hotplug */ + if (strcmp(arch, "i386")) { + qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp); + qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm); + qtest_add_func("acpi/q35/acpihmat", + test_acpi_q35_tcg_acpi_hmat); + qtest_add_func("acpi/q35/mmio64", test_acpi_q35_tcg_mmio64); + } #ifdef CONFIG_POSIX qtest_add_func("acpi/q35/acpierst", test_acpi_q35_acpi_erst); #endif diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c index c5eb13f349..4f4404a4b1 100644 --- a/tests/qtest/numa-test.c +++ b/tests/qtest/numa-test.c @@ -568,7 +568,7 @@ int main(int argc, char **argv) qtest_add_data_func("/numa/mon/cpus/partial", args, test_mon_partial); qtest_add_data_func("/numa/qmp/cpus/query-cpus", args, test_query_cpus); - if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) { + if (!strcmp(arch, "x86_64")) { qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu); qtest_add_data_func("/numa/pc/dynamic/cpu", args, pc_dynamic_cpu_cfg); qtest_add_data_func("/numa/pc/hmat/build", args, pc_hmat_build_cfg); @@ -576,6 +576,11 @@ int main(int argc, char **argv) qtest_add_data_func("/numa/pc/hmat/erange", args, pc_hmat_erange_cfg); } + if (!strcmp(arch, "i386")) { + qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu); + qtest_add_data_func("/numa/pc/dynamic/cpu", args, pc_dynamic_cpu_cfg); + } + if (!strcmp(arch, "ppc64")) { qtest_add_data_func("/numa/spapr/cpu/explicit", args, spapr_numa_cpu); } diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 0299ef6906..1977b302e2 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -50,6 +50,7 @@ tests = { 'test-qapi-util': [], 'test-interval-tree': [], 'test-xs-node': [qom], + 'test-virtio-dmabuf': [meson.project_source_root() / 'hw/display/virtio-dmabuf.c'], } if have_system or have_tools diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c index c111de5fc1..aedc125ae9 100644 --- a/tests/unit/test-uuid.c +++ b/tests/unit/test-uuid.c @@ -171,6 +171,32 @@ static void test_uuid_unparse_strdup(void) } } +static void test_uuid_hash(void) +{ + QemuUUID uuid; + int i; + + for (i = 0; i < 100; i++) { + qemu_uuid_generate(&uuid); + /* Obtain the UUID hash */ + uint32_t hash_a = qemu_uuid_hash(&uuid); + int data_idx = g_random_int_range(0, 15); + /* Change a single random byte of the UUID */ + if (uuid.data[data_idx] < 0xFF) { + uuid.data[data_idx]++; + } else { + uuid.data[data_idx]--; + } + /* Obtain the UUID hash again */ + uint32_t hash_b = qemu_uuid_hash(&uuid); + /* + * Both hashes shall be different (avoid collision) + * for any change in the UUID fields + */ + g_assert_cmpint(hash_a, !=, hash_b); + } +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); @@ -179,6 +205,7 @@ int main(int argc, char **argv) g_test_add_func("/uuid/parse", test_uuid_parse); g_test_add_func("/uuid/unparse", test_uuid_unparse); g_test_add_func("/uuid/unparse_strdup", test_uuid_unparse_strdup); + g_test_add_func("/uuid/hash", test_uuid_hash); return g_test_run(); } diff --git a/tests/unit/test-virtio-dmabuf.c b/tests/unit/test-virtio-dmabuf.c new file mode 100644 index 0000000000..a45ec52f42 --- /dev/null +++ b/tests/unit/test-virtio-dmabuf.c @@ -0,0 +1,137 @@ +/* + * QEMU tests for shared dma-buf API + * + * Copyright (c) 2023 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "hw/virtio/virtio-dmabuf.h" + + +static void test_add_remove_resources(void) +{ + QemuUUID uuid; + int i, dmabuf_fd; + + for (i = 0; i < 100; ++i) { + qemu_uuid_generate(&uuid); + dmabuf_fd = g_random_int_range(3, 500); + /* Add a new resource */ + g_assert(virtio_add_dmabuf(&uuid, dmabuf_fd)); + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, dmabuf_fd); + /* Remove the resource */ + g_assert(virtio_remove_resource(&uuid)); + /* Resource is not found anymore */ + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, -1); + } +} + +static void test_add_remove_dev(void) +{ + QemuUUID uuid; + struct vhost_dev *dev = g_new0(struct vhost_dev, 1); + int i; + + for (i = 0; i < 100; ++i) { + qemu_uuid_generate(&uuid); + virtio_add_vhost_device(&uuid, dev); + /* vhost device is found */ + g_assert(virtio_lookup_vhost_device(&uuid) != NULL); + /* Remove the vhost device */ + g_assert(virtio_remove_resource(&uuid)); + /* vhost device is not found anymore */ + g_assert(virtio_lookup_vhost_device(&uuid) == NULL); + } + g_free(dev); +} + +static void test_remove_invalid_resource(void) +{ + QemuUUID uuid; + int i; + + for (i = 0; i < 20; ++i) { + qemu_uuid_generate(&uuid); + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, -1); + /* Removing a resource that does not exist returns false */ + g_assert_false(virtio_remove_resource(&uuid)); + } +} + +static void test_add_invalid_resource(void) +{ + QemuUUID uuid; + struct vhost_dev *dev = NULL; + int i, dmabuf_fd = -2, alt_dmabuf = 2; + + for (i = 0; i < 20; ++i) { + qemu_uuid_generate(&uuid); + /* Add a new resource with invalid (negative) resource fd */ + g_assert_false(virtio_add_dmabuf(&uuid, dmabuf_fd)); + /* Resource is not found */ + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, -1); + /* Add a new vhost device with invalid (NULL) pointer */ + g_assert_false(virtio_add_vhost_device(&uuid, dev)); + /* vhost device is not found */ + g_assert(virtio_lookup_vhost_device(&uuid) == NULL); + } + + for (i = 0; i < 20; ++i) { + /* Add a valid resource */ + qemu_uuid_generate(&uuid); + dmabuf_fd = g_random_int_range(3, 500); + g_assert(virtio_add_dmabuf(&uuid, dmabuf_fd)); + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, dmabuf_fd); + /* Add a new resource with repeated uuid returns false */ + g_assert_false(virtio_add_dmabuf(&uuid, alt_dmabuf)); + /* The value for the uuid key is not replaced */ + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, dmabuf_fd); + } +} + +static void test_free_resources(void) +{ + QemuUUID uuids[20]; + int i, dmabuf_fd; + + for (i = 0; i < ARRAY_SIZE(uuids); ++i) { + qemu_uuid_generate(&uuids[i]); + dmabuf_fd = g_random_int_range(3, 500); + g_assert(virtio_add_dmabuf(&uuids[i], dmabuf_fd)); + g_assert_cmpint(virtio_lookup_dmabuf(&uuids[i]), ==, dmabuf_fd); + } + virtio_free_resources(); + for (i = 0; i < ARRAY_SIZE(uuids); ++i) { + /* None of the resources is found after free'd */ + g_assert_cmpint(virtio_lookup_dmabuf(&uuids[i]), ==, -1); + } + +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/virtio-dmabuf/add_rm_res", test_add_remove_resources); + g_test_add_func("/virtio-dmabuf/add_rm_dev", test_add_remove_dev); + g_test_add_func("/virtio-dmabuf/rm_invalid_res", + test_remove_invalid_resource); + g_test_add_func("/virtio-dmabuf/add_invalid_res", + test_add_invalid_resource); + g_test_add_func("/virtio-dmabuf/free_res", test_free_resources); + + return g_test_run(); +} diff --git a/util/uuid.c b/util/uuid.c index b1108dde78..d71aa79e5e 100644 --- a/util/uuid.c +++ b/util/uuid.c @@ -116,3 +116,17 @@ QemuUUID qemu_uuid_bswap(QemuUUID uuid) bswap16s(&uuid.fields.time_high_and_version); return uuid; } + +/* djb2 hash algorithm */ +uint32_t qemu_uuid_hash(const void *uuid) +{ + QemuUUID *qid = (QemuUUID *) uuid; + uint32_t h = 5381; + int i; + + for (i = 0; i < ARRAY_SIZE(qid->data); i++) { + h = (h << 5) + h + qid->data[i]; + } + + return h; +}