diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index 6b0bdba65d..f39b751ff9 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -28,6 +28,8 @@ #include "hw/sysbus.h" #include "hw/xen/xen.h" #include "hw/i386/x86.h" +#include "hw/i386/pc.h" +#include "hw/pci/pci.h" #include "hw/irq.h" #include "xen_evtchn.h" @@ -101,6 +103,7 @@ struct XenEvtchnState { uint64_t callback_param; bool evtchn_in_kernel; + uint32_t callback_gsi; QEMUBH *gsi_bh; @@ -217,11 +220,41 @@ static void xen_evtchn_register_types(void) type_init(xen_evtchn_register_types) +static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param) +{ + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + uint8_t pin = param & 3; + uint8_t devfn = (param >> 8) & 0xff; + uint16_t bus = (param >> 16) & 0xffff; + uint16_t domain = (param >> 32) & 0xffff; + PCIDevice *pdev; + PCIINTxRoute r; + + if (domain || !pcms) { + return 0; + } + + pdev = pci_find_device(pcms->bus, bus, devfn); + if (!pdev) { + return 0; + } + + r = pci_device_route_intx_to_irq(pdev, pin); + if (r.mode != PCI_INTX_ENABLED) { + return 0; + } + + /* + * Hm, can we be notified of INTX routing changes? Not without + * *owning* the device and being allowed to overwrite its own + * ->intx_routing_notifier, AFAICT. So let's not. + */ + return r.irq; +} + void xen_evtchn_set_callback_level(int level) { XenEvtchnState *s = xen_evtchn_singleton; - uint32_t param; - if (!s) { return; } @@ -260,18 +293,12 @@ void xen_evtchn_set_callback_level(int level) return; } - param = (uint32_t)s->callback_param; - - switch (s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) { - case HVM_PARAM_CALLBACK_TYPE_GSI: - if (param < IOAPIC_NUM_PINS) { - qemu_set_irq(s->gsis[param], level); - if (level) { - /* Ensure the vCPU polls for deassertion */ - kvm_xen_set_callback_asserted(); - } + if (s->callback_gsi && s->callback_gsi < IOAPIC_NUM_PINS) { + qemu_set_irq(s->gsis[s->callback_gsi], level); + if (level) { + /* Ensure the vCPU polls for deassertion */ + kvm_xen_set_callback_asserted(); } - break; } } @@ -283,15 +310,22 @@ int xen_evtchn_set_callback_param(uint64_t param) .u.vector = 0, }; bool in_kernel = false; + uint32_t gsi = 0; + int type = param >> CALLBACK_VIA_TYPE_SHIFT; int ret; if (!s) { return -ENOTSUP; } + /* + * We need the BQL because set_callback_pci_intx() may call into PCI code, + * and because we may need to manipulate the old and new GSI levels. + */ + assert(qemu_mutex_iothread_locked()); qemu_mutex_lock(&s->port_lock); - switch (param >> CALLBACK_VIA_TYPE_SHIFT) { + switch (type) { case HVM_PARAM_CALLBACK_TYPE_VECTOR: { xa.u.vector = (uint8_t)param, @@ -299,10 +333,17 @@ int xen_evtchn_set_callback_param(uint64_t param) if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) { in_kernel = true; } + gsi = 0; break; } + case HVM_PARAM_CALLBACK_TYPE_PCI_INTX: + gsi = set_callback_pci_intx(s, param); + ret = gsi ? 0 : -EINVAL; + break; + case HVM_PARAM_CALLBACK_TYPE_GSI: + gsi = (uint32_t)param; ret = 0; break; @@ -320,6 +361,17 @@ int xen_evtchn_set_callback_param(uint64_t param) } s->callback_param = param; s->evtchn_in_kernel = in_kernel; + + if (gsi != s->callback_gsi) { + struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0); + + xen_evtchn_set_callback_level(0); + s->callback_gsi = gsi; + + if (gsi && vi && vi->evtchn_upcall_pending) { + kvm_xen_inject_vcpu_callback_vector(0, type); + } + } } qemu_mutex_unlock(&s->port_lock); diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index b52617df54..9e22c9fa02 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -131,6 +131,38 @@ int kvm_xen_init(KVMState *s, uint32_t hypercall_msr) return ret; } + /* If called a second time, don't repeat the rest of the setup. */ + if (s->xen_caps) { + return 0; + } + + /* + * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info + * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared. + * + * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows + * such things to be polled at precisely the right time. We *could* do + * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at + * the moment the IRQ is acked, and see if it should be reasserted. + * + * But the in-kernel irqchip is deprecated, so we're unlikely to add + * that support in the kernel. Insist on using the split irqchip mode + * instead. + * + * This leaves us polling for the level going low in QEMU, which lacks + * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a + * spurious 'ack' to an INTX IRQ every time there's any MMIO access to + * the device (for which it has to unmap the device and trap access, for + * some period after an IRQ!!). In the Xen case, we do it on exit from + * KVM_RUN, if the flag is set to say that the GSI is currently asserted. + * Which is kind of icky, but less so than the VFIO one. I may fix them + * both later... + */ + if (!kvm_kernel_irqchip_split()) { + error_report("kvm: Xen support requires kernel-irqchip=split"); + return -EINVAL; + } + s->xen_caps = xen_caps; return 0; } @@ -684,7 +716,9 @@ static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu, switch (hp.index) { case HVM_PARAM_CALLBACK_IRQ: + qemu_mutex_lock_iothread(); err = xen_evtchn_set_callback_param(hp.value); + qemu_mutex_unlock_iothread(); xen_set_long_mode(exit->u.hcall.longmode); break; default: