diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events index 04e60c5bb8..b83c3eb965 100644 --- a/hw/i386/kvm/trace-events +++ b/hw/i386/kvm/trace-events @@ -2,3 +2,4 @@ kvm_xen_map_pirq(int pirq, int gsi) "pirq %d gsi %d" kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d" kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d" kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d" +kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d" diff --git a/hw/i386/kvm/xen-stubs.c b/hw/i386/kvm/xen-stubs.c index 720590aedd..ae406e0b02 100644 --- a/hw/i386/kvm/xen-stubs.c +++ b/hw/i386/kvm/xen-stubs.c @@ -14,6 +14,22 @@ #include "qapi/error.h" #include "qapi/qapi-commands-misc-target.h" +#include "xen_evtchn.h" + +void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector, + uint64_t addr, uint32_t data, bool is_masked) +{ +} + +void xen_evtchn_remove_pci_device(PCIDevice *dev) +{ +} + +bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data) +{ + return false; +} + #ifdef TARGET_I386 EvtchnInfoList *qmp_xen_event_list(Error **errp) { diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index f2c4b43871..69c0204d4f 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -31,6 +31,8 @@ #include "hw/i386/x86.h" #include "hw/i386/pc.h" #include "hw/pci/pci.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" #include "hw/irq.h" #include "xen_evtchn.h" @@ -45,6 +47,9 @@ #include "hw/xen/interface/memory.h" #include "hw/xen/interface/hvm/params.h" +/* XX: For kvm_update_msi_routes_all() */ +#include "target/i386/kvm/kvm_i386.h" + #define TYPE_XEN_EVTCHN "xen-evtchn" OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN) @@ -119,6 +124,11 @@ struct xenevtchn_handle { struct pirq_info { int gsi; uint16_t port; + PCIDevice *dev; + int vector; + bool is_msix; + bool is_masked; + bool is_translated; }; struct XenEvtchnState { @@ -151,7 +161,7 @@ struct XenEvtchnState { /* Per-GSI assertion state (serialized) */ uint32_t pirq_gsi_set; - /* Per-PIRQ information (rebuilt on migration) */ + /* Per-PIRQ information (rebuilt on migration, protected by BQL) */ struct pirq_info *pirq; }; @@ -1007,16 +1017,23 @@ static bool virq_is_global(uint32_t virq) } } -static int close_port(XenEvtchnState *s, evtchn_port_t port) +static int close_port(XenEvtchnState *s, evtchn_port_t port, + bool *flush_kvm_routes) { XenEvtchnPort *p = &s->port_table[port]; + /* Because it *might* be a PIRQ port */ + assert(qemu_mutex_iothread_locked()); + switch (p->type) { case EVTCHNSTAT_closed: return -ENOENT; case EVTCHNSTAT_pirq: s->pirq[p->type_val].port = 0; + if (s->pirq[p->type_val].is_translated) { + *flush_kvm_routes = true; + } break; case EVTCHNSTAT_virq: @@ -1065,6 +1082,7 @@ static int close_port(XenEvtchnState *s, evtchn_port_t port) int xen_evtchn_soft_reset(void) { XenEvtchnState *s = xen_evtchn_singleton; + bool flush_kvm_routes; int i; if (!s) { @@ -1073,10 +1091,16 @@ int xen_evtchn_soft_reset(void) assert(qemu_mutex_iothread_locked()); - QEMU_LOCK_GUARD(&s->port_lock); + qemu_mutex_lock(&s->port_lock); for (i = 0; i < s->nr_ports; i++) { - close_port(s, i); + close_port(s, i, &flush_kvm_routes); + } + + qemu_mutex_unlock(&s->port_lock); + + if (flush_kvm_routes) { + kvm_update_msi_routes_all(NULL, true, 0, 0); } return 0; @@ -1094,6 +1118,7 @@ int xen_evtchn_reset_op(struct evtchn_reset *reset) int xen_evtchn_close_op(struct evtchn_close *close) { XenEvtchnState *s = xen_evtchn_singleton; + bool flush_kvm_routes = false; int ret; if (!s) { @@ -1104,12 +1129,17 @@ int xen_evtchn_close_op(struct evtchn_close *close) return -EINVAL; } + QEMU_IOTHREAD_LOCK_GUARD(); qemu_mutex_lock(&s->port_lock); - ret = close_port(s, close->port); + ret = close_port(s, close->port, &flush_kvm_routes); qemu_mutex_unlock(&s->port_lock); + if (flush_kvm_routes) { + kvm_update_msi_routes_all(NULL, true, 0, 0); + } + return ret; } @@ -1226,21 +1256,54 @@ int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq) return -EINVAL; } - QEMU_LOCK_GUARD(&s->port_lock); + QEMU_IOTHREAD_LOCK_GUARD(); if (s->pirq[pirq->pirq].port) { return -EBUSY; } + qemu_mutex_lock(&s->port_lock); + ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq, &pirq->port); if (ret) { + qemu_mutex_unlock(&s->port_lock); return ret; } s->pirq[pirq->pirq].port = pirq->port; trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port); + qemu_mutex_unlock(&s->port_lock); + + /* + * Need to do the unmask outside port_lock because it may call + * back into the MSI translate function. + */ + if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) { + if (s->pirq[pirq->pirq].is_masked) { + PCIDevice *dev = s->pirq[pirq->pirq].dev; + int vector = s->pirq[pirq->pirq].vector; + char *dev_path = qdev_get_dev_path(DEVICE(dev)); + + trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector); + g_free(dev_path); + + if (s->pirq[pirq->pirq].is_msix) { + msix_set_mask(dev, vector, false); + } else { + msi_set_mask(dev, vector, false, NULL); + } + } else if (s->pirq[pirq->pirq].is_translated) { + /* + * If KVM had attempted to translate this one before, make it try + * again. If we unmasked, then the notifier on the MSI(-X) vector + * will already have had the same effect. + */ + kvm_update_msi_routes_all(NULL, true, 0, 0); + } + } + return ret; } @@ -1559,6 +1622,179 @@ bool xen_evtchn_set_gsi(int gsi, int level) return true; } +static uint32_t msi_pirq_target(uint64_t addr, uint32_t data) +{ + /* The vector (in low 8 bits of data) must be zero */ + if (data & 0xff) { + return 0; + } + + uint32_t pirq = (addr & 0xff000) >> 12; + pirq |= (addr >> 32) & 0xffffff00; + + return pirq; +} + +static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector, + int except_pirq) +{ + uint32_t pirq; + + for (pirq = 0; pirq < s->nr_pirqs; pirq++) { + /* + * We could be cleverer here, but it isn't really a fast path, and + * this trivial optimisation is enough to let us skip the big gap + * in the middle a bit quicker (in terms of both loop iterations, + * and cache lines). + */ + if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) { + pirq += 64; + continue; + } + if (except_pirq && pirq == except_pirq) { + continue; + } + if (s->pirq[pirq].dev != dev) { + continue; + } + if (vector != -1 && s->pirq[pirq].vector != vector) { + continue; + } + + /* It could theoretically be bound to a port already, but that is OK. */ + s->pirq[pirq].dev = dev; + s->pirq[pirq].gsi = IRQ_UNBOUND; + s->pirq[pirq].is_msix = false; + s->pirq[pirq].vector = 0; + s->pirq[pirq].is_masked = false; + s->pirq[pirq].is_translated = false; + } +} + +void xen_evtchn_remove_pci_device(PCIDevice *dev) +{ + XenEvtchnState *s = xen_evtchn_singleton; + + if (!s) { + return; + } + + QEMU_LOCK_GUARD(&s->port_lock); + do_remove_pci_vector(s, dev, -1, 0); +} + +void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector, + uint64_t addr, uint32_t data, bool is_masked) +{ + XenEvtchnState *s = xen_evtchn_singleton; + uint32_t pirq; + + if (!s) { + return; + } + + assert(qemu_mutex_iothread_locked()); + + pirq = msi_pirq_target(addr, data); + + /* + * The PIRQ# must be sane, and there must be an allocated PIRQ in + * IRQ_UNBOUND or IRQ_MSI_EMU state to match it. + */ + if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) || + (s->pirq[pirq].gsi != IRQ_UNBOUND && + s->pirq[pirq].gsi != IRQ_MSI_EMU)) { + pirq = 0; + } + + if (pirq) { + s->pirq[pirq].dev = dev; + s->pirq[pirq].gsi = IRQ_MSI_EMU; + s->pirq[pirq].is_msix = is_msix; + s->pirq[pirq].vector = vector; + s->pirq[pirq].is_masked = is_masked; + } + + /* Remove any (other) entries for this {device, vector} */ + do_remove_pci_vector(s, dev, vector, pirq); +} + +int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data) +{ + XenEvtchnState *s = xen_evtchn_singleton; + uint32_t pirq, port; + CPUState *cpu; + + if (!s) { + return 1; /* Not a PIRQ */ + } + + assert(qemu_mutex_iothread_locked()); + + pirq = msi_pirq_target(address, data); + if (!pirq || pirq >= s->nr_pirqs) { + return 1; /* Not a PIRQ */ + } + + if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) { + return -ENOTSUP; + } + + if (s->pirq[pirq].gsi != IRQ_MSI_EMU) { + return -EINVAL; + } + + /* Remember that KVM tried to translate this. It might need to try again. */ + s->pirq[pirq].is_translated = true; + + QEMU_LOCK_GUARD(&s->port_lock); + + port = s->pirq[pirq].port; + if (!valid_port(port)) { + return -EINVAL; + } + + cpu = qemu_get_cpu(s->port_table[port].vcpu); + if (!cpu) { + return -EINVAL; + } + + route->type = KVM_IRQ_ROUTING_XEN_EVTCHN; + route->u.xen_evtchn.port = port; + route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu); + route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; + + return 0; /* Handled */ +} + +bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data) +{ + XenEvtchnState *s = xen_evtchn_singleton; + uint32_t pirq, port; + + if (!s) { + return false; + } + + assert(qemu_mutex_iothread_locked()); + + pirq = msi_pirq_target(address, data); + if (!pirq || pirq >= s->nr_pirqs) { + return false; + } + + QEMU_LOCK_GUARD(&s->port_lock); + + port = s->pirq[pirq].port; + if (!valid_port(port)) { + return false; + } + + set_port_pending(s, port); + return true; +} + int xen_physdev_map_pirq(struct physdev_map_pirq *map) { XenEvtchnState *s = xen_evtchn_singleton; @@ -1569,6 +1805,7 @@ int xen_physdev_map_pirq(struct physdev_map_pirq *map) return -ENOTSUP; } + QEMU_IOTHREAD_LOCK_GUARD(); QEMU_LOCK_GUARD(&s->port_lock); if (map->domid != DOMID_SELF && map->domid != xen_domid) { @@ -1628,9 +1865,11 @@ int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap) return -EINVAL; } - QEMU_LOCK_GUARD(&s->port_lock); + QEMU_IOTHREAD_LOCK_GUARD(); + qemu_mutex_lock(&s->port_lock); if (!pirq_inuse(s, pirq)) { + qemu_mutex_unlock(&s->port_lock); return -ENOENT; } @@ -1638,6 +1877,7 @@ int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap) /* We can only unmap GSI PIRQs */ if (gsi < 0) { + qemu_mutex_unlock(&s->port_lock); return -EINVAL; } @@ -1646,6 +1886,12 @@ int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap) pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq); trace_kvm_xen_unmap_pirq(pirq, gsi); + qemu_mutex_unlock(&s->port_lock); + + if (gsi == IRQ_MSI_EMU) { + kvm_update_msi_routes_all(NULL, true, 0, 0); + } + return 0; } @@ -1659,6 +1905,7 @@ int xen_physdev_eoi_pirq(struct physdev_eoi *eoi) return -ENOTSUP; } + QEMU_IOTHREAD_LOCK_GUARD(); QEMU_LOCK_GUARD(&s->port_lock); if (!pirq_inuse(s, pirq)) { @@ -1690,6 +1937,7 @@ int xen_physdev_query_pirq(struct physdev_irq_status_query *query) return -ENOTSUP; } + QEMU_IOTHREAD_LOCK_GUARD(); QEMU_LOCK_GUARD(&s->port_lock); if (!pirq_inuse(s, pirq)) { diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h index 95400b7fbf..bfb67ac2bc 100644 --- a/hw/i386/kvm/xen_evtchn.h +++ b/hw/i386/kvm/xen_evtchn.h @@ -25,6 +25,14 @@ void xen_evtchn_set_callback_level(int level); int xen_evtchn_set_port(uint16_t port); bool xen_evtchn_set_gsi(int gsi, int level); +void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector, + uint64_t addr, uint32_t data, bool is_masked); +void xen_evtchn_remove_pci_device(PCIDevice *dev); +struct kvm_irq_routing_entry; +int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data); +bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data); + /* * These functions mirror the libxenevtchn library API, providing the QEMU diff --git a/hw/pci/msi.c b/hw/pci/msi.c index 1cadf150bc..041b0bdbec 100644 --- a/hw/pci/msi.c +++ b/hw/pci/msi.c @@ -24,6 +24,8 @@ #include "qemu/range.h" #include "qapi/error.h" +#include "hw/i386/kvm/xen_evtchn.h" + /* PCI_MSI_ADDRESS_LO */ #define PCI_MSI_ADDRESS_LO_MASK (~0x3) @@ -414,6 +416,15 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) fprintf(stderr, "\n"); #endif + if (xen_mode == XEN_EMULATE) { + for (vector = 0; vector < msi_nr_vectors(flags); vector++) { + MSIMessage msg = msi_prepare_message(dev, vector); + + xen_evtchn_snoop_msi(dev, false, vector, msg.address, msg.data, + msi_is_masked(dev, vector)); + } + } + if (!(flags & PCI_MSI_FLAGS_ENABLE)) { return; } diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 9e70fcd6fa..ab8869d9d0 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -26,6 +26,8 @@ #include "qapi/error.h" #include "trace.h" +#include "hw/i386/kvm/xen_evtchn.h" + /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */ #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1) #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) @@ -124,6 +126,13 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) { bool is_masked = msix_is_masked(dev, vector); + if (xen_mode == XEN_EMULATE) { + MSIMessage msg = msix_prepare_message(dev, vector); + + xen_evtchn_snoop_msi(dev, true, vector, msg.address, msg.data, + is_masked); + } + if (is_masked == was_masked) { return; } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index bad8e63db3..10c980b9f5 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -49,6 +49,9 @@ #include "qemu/cutils.h" #include "pci-internal.h" +#include "hw/xen/xen.h" +#include "hw/i386/kvm/xen_evtchn.h" + //#define DEBUG_PCI #ifdef DEBUG_PCI # define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__) @@ -319,6 +322,17 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg) { MemTxAttrs attrs = {}; + /* + * Xen uses the high bits of the address to contain some of the bits + * of the PIRQ#. Therefore we can't just send the write cycle and + * trust that it's caught by the APIC at 0xfee00000 because the + * target of the write might be e.g. 0x0x1000fee46000 for PIRQ#4166. + * So we intercept the delivery here instead of in kvm_send_msi(). + */ + if (xen_mode == XEN_EMULATE && + xen_evtchn_deliver_pirq_msi(msg.address, msg.data)) { + return; + } attrs.requester_id = pci_requester_id(dev); address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, attrs, NULL); @@ -988,6 +1002,9 @@ static void do_pci_unregister_device(PCIDevice *pci_dev) pci_get_bus(pci_dev)->devices[pci_dev->devfn] = NULL; pci_config_free(pci_dev); + if (xen_mode == XEN_EMULATE) { + xen_evtchn_remove_pci_device(pci_dev); + } if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) { memory_region_del_subregion(&pci_dev->bus_master_container_region, &pci_dev->bus_master_enable_region); diff --git a/include/hw/pci/msi.h b/include/hw/pci/msi.h index ee8ee469a6..abcfd13925 100644 --- a/include/hw/pci/msi.h +++ b/include/hw/pci/msi.h @@ -33,6 +33,7 @@ extern bool msi_nonbroken; void msi_set_message(PCIDevice *dev, MSIMessage msg); MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector); bool msi_enabled(const PCIDevice *dev); +void msi_set_enabled(PCIDevice *dev); int msi_init(struct PCIDevice *dev, uint8_t offset, unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask, Error **errp); diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index a73c49aabb..d390137f02 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -44,6 +44,7 @@ #include "qemu/error-report.h" #include "qemu/memalign.h" #include "hw/i386/x86.h" +#include "hw/i386/kvm/xen_evtchn.h" #include "hw/i386/pc.h" #include "hw/i386/apic.h" #include "hw/i386/apic_internal.h" @@ -5654,6 +5655,20 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, } } +#ifdef CONFIG_XEN_EMU + if (xen_mode == XEN_EMULATE) { + int handled = xen_evtchn_translate_pirq_msi(route, address, data); + + /* + * If it was a PIRQ and successfully routed (handled == 0) or it was + * an error (handled < 0), return. If it wasn't a PIRQ, keep going. + */ + if (handled <= 0) { + return handled; + } + } +#endif + address = kvm_swizzle_msi_ext_dest_id(address); route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT; route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK; @@ -5673,8 +5688,8 @@ struct MSIRouteEntry { static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \ QLIST_HEAD_INITIALIZER(msi_route_list); -static void kvm_update_msi_routes_all(void *private, bool global, - uint32_t index, uint32_t mask) +void kvm_update_msi_routes_all(void *private, bool global, + uint32_t index, uint32_t mask) { int cnt = 0, vector; MSIRouteEntry *entry; diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 6a5c24e3dc..e24753abfe 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -51,6 +51,8 @@ bool kvm_hv_vpindex_settable(void); bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); +void kvm_update_msi_routes_all(void *private, bool global, + uint32_t index, uint32_t mask); bool kvm_enable_sgx_provisioning(KVMState *s); void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index 0e81e5b6b1..96a9082196 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -267,7 +267,8 @@ static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu, 1 << XENFEAT_auto_translated_physmap | 1 << XENFEAT_supervisor_mode_kernel | 1 << XENFEAT_hvm_callback_vector | - 1 << XENFEAT_hvm_safe_pvclock; + 1 << XENFEAT_hvm_safe_pvclock | + 1 << XENFEAT_hvm_pirqs; } err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));