diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c61934fbf22a..64a619d47d34 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -47,8 +47,9 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern unsigned long set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e); -extern int m2p_add_override(unsigned long mfn, struct page *page); -extern int m2p_remove_override(struct page *page); +extern int m2p_add_override(unsigned long mfn, struct page *page, + bool clear_pte); +extern int m2p_remove_override(struct page *page, bool clear_pte); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index aa8620989162..4fbda9a3f339 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void) #endif #if defined(CONFIG_XEN_DOM0) void __init xen_setup_pirqs(void); +int xen_find_device_domain_owner(struct pci_dev *dev); +int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); +int xen_unregister_device_domain_owner(struct pci_dev *dev); #else static inline void __init xen_setup_pirqs(void) { } +static inline int xen_find_device_domain_owner(struct pci_dev *dev) +{ + return -1; +} +static inline int xen_register_device_domain_owner(struct pci_dev *dev, + uint16_t domain) +{ + return -1; +} +static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) +{ + return -1; +} #endif #if defined(CONFIG_PCI_MSI) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e37b407a0ee8..8214724ce54d 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) } irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, (type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi"); + "msi-x" : "msi", + DOMID_SELF); if (irq < 0) goto error; dev_dbg(&dev->dev, @@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, (type == PCI_CAP_ID_MSIX) ? "pcifront-msi-x" : - "pcifront-msi"); + "pcifront-msi", + DOMID_SELF); if (irq < 0) goto free; i++; @@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) list_for_each_entry(msidesc, &dev->msi_list, list) { struct physdev_map_pirq map_irq; + domid_t domid; + + domid = ret = xen_find_device_domain_owner(dev); + /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED, + * hence check ret value for < 0. */ + if (ret < 0) + domid = DOMID_SELF; memset(&map_irq, 0, sizeof(map_irq)); - map_irq.domid = DOMID_SELF; + map_irq.domid = domid; map_irq.type = MAP_PIRQ_TYPE_MSI; map_irq.index = -1; map_irq.pirq = -1; @@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); if (ret) { - dev_warn(&dev->dev, "xen map irq failed %d\n", ret); + dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n", + ret, domid); goto out; } ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq, map_irq.index, (type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi"); + "msi-x" : "msi", + domid); if (ret < 0) goto out; } @@ -461,3 +472,78 @@ void __init xen_setup_pirqs(void) } } #endif + +#ifdef CONFIG_XEN_DOM0 +struct xen_device_domain_owner { + domid_t domain; + struct pci_dev *dev; + struct list_head list; +}; + +static DEFINE_SPINLOCK(dev_domain_list_spinlock); +static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); + +static struct xen_device_domain_owner *find_device(struct pci_dev *dev) +{ + struct xen_device_domain_owner *owner; + + list_for_each_entry(owner, &dev_domain_list, list) { + if (owner->dev == dev) + return owner; + } + return NULL; +} + +int xen_find_device_domain_owner(struct pci_dev *dev) +{ + struct xen_device_domain_owner *owner; + int domain = -ENODEV; + + spin_lock(&dev_domain_list_spinlock); + owner = find_device(dev); + if (owner) + domain = owner->domain; + spin_unlock(&dev_domain_list_spinlock); + return domain; +} +EXPORT_SYMBOL_GPL(xen_find_device_domain_owner); + +int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) +{ + struct xen_device_domain_owner *owner; + + owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); + if (!owner) + return -ENODEV; + + spin_lock(&dev_domain_list_spinlock); + if (find_device(dev)) { + spin_unlock(&dev_domain_list_spinlock); + kfree(owner); + return -EEXIST; + } + owner->domain = domain; + owner->dev = dev; + list_add_tail(&owner->list, &dev_domain_list); + spin_unlock(&dev_domain_list_spinlock); + return 0; +} +EXPORT_SYMBOL_GPL(xen_register_device_domain_owner); + +int xen_unregister_device_domain_owner(struct pci_dev *dev) +{ + struct xen_device_domain_owner *owner; + + spin_lock(&dev_domain_list_spinlock); + owner = find_device(dev); + if (!owner) { + spin_unlock(&dev_domain_list_spinlock); + return -ENODEV; + } + list_del(&owner->list); + spin_unlock(&dev_domain_list_spinlock); + kfree(owner); + return 0; +} +EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); +#endif diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 141eb0de8b06..c851397e657c 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -650,7 +650,7 @@ static unsigned long mfn_hash(unsigned long mfn) } /* Add an MFN override for a particular page */ -int m2p_add_override(unsigned long mfn, struct page *page) +int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) { unsigned long flags; unsigned long pfn; @@ -662,7 +662,6 @@ int m2p_add_override(unsigned long mfn, struct page *page) if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); ptep = lookup_address(address, &level); - if (WARN(ptep == NULL || level != PG_LEVEL_4K, "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; @@ -674,18 +673,17 @@ int m2p_add_override(unsigned long mfn, struct page *page) if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) return -ENOMEM; - if (!PageHighMem(page)) + if (clear_pte && !PageHighMem(page)) /* Just zap old mapping for now */ pte_clear(&init_mm, address, ptep); - spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); return 0; } - -int m2p_remove_override(struct page *page) +EXPORT_SYMBOL_GPL(m2p_add_override); +int m2p_remove_override(struct page *page, bool clear_pte) { unsigned long flags; unsigned long mfn; @@ -713,7 +711,7 @@ int m2p_remove_override(struct page *page) spin_unlock_irqrestore(&m2p_override_lock, flags); set_phys_to_machine(pfn, page->index); - if (!PageHighMem(page)) + if (clear_pte && !PageHighMem(page)) set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); /* No tlb flush necessary because the caller already @@ -721,6 +719,7 @@ int m2p_remove_override(struct page *page) return 0; } +EXPORT_SYMBOL_GPL(m2p_remove_override); struct page *m2p_find_override(unsigned long mfn) { diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 33167b43ac7e..35e02a10110b 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -101,6 +101,7 @@ struct irq_info unsigned short gsi; unsigned char vector; unsigned char flags; + uint16_t domid; } pirq; } u; }; @@ -184,6 +185,7 @@ static void xen_irq_info_pirq_init(unsigned irq, unsigned short pirq, unsigned short gsi, unsigned short vector, + uint16_t domid, unsigned char flags) { struct irq_info *info = info_for_irq(irq); @@ -193,6 +195,7 @@ static void xen_irq_info_pirq_init(unsigned irq, info->u.pirq.pirq = pirq; info->u.pirq.gsi = gsi; info->u.pirq.vector = vector; + info->u.pirq.domid = domid; info->u.pirq.flags = flags; } @@ -655,7 +658,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, goto out; } - xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, + xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF, shareable ? PIRQ_SHAREABLE : 0); out: @@ -680,7 +683,8 @@ int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) } int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, - int pirq, int vector, const char *name) + int pirq, int vector, const char *name, + domid_t domid) { int irq, ret; @@ -693,7 +697,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq, name); - xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, 0); + xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0); ret = irq_set_msi_desc(irq, msidesc); if (ret < 0) goto error_irq; @@ -722,9 +726,16 @@ int xen_destroy_irq(int irq) if (xen_initial_domain()) { unmap_irq.pirq = info->u.pirq.pirq; - unmap_irq.domid = DOMID_SELF; + unmap_irq.domid = info->u.pirq.domid; rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); - if (rc) { + /* If another domain quits without making the pci_disable_msix + * call, the Xen hypervisor takes care of freeing the PIRQs + * (free_domain_pirqs). + */ + if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF)) + printk(KERN_INFO "domain %d does not have %d anymore\n", + info->u.pirq.domid, info->u.pirq.pirq); + else if (rc) { printk(KERN_WARNING "unmap irq failed %d\n", rc); goto out; } @@ -759,6 +770,12 @@ out: return irq; } + +int xen_pirq_from_irq(unsigned irq) +{ + return pirq_from_irq(irq); +} +EXPORT_SYMBOL_GPL(xen_pirq_from_irq); int bind_evtchn_to_irq(unsigned int evtchn) { int irq; @@ -1502,6 +1519,18 @@ void xen_poll_irq(int irq) xen_poll_irq_timeout(irq, 0 /* no timeout */); } +/* Check whether the IRQ line is shared with other guests. */ +int xen_test_irq_shared(int irq) +{ + struct irq_info *info = info_for_irq(irq); + struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq }; + + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) + return 0; + return !(irq_status.flags & XENIRQSTAT_shared); +} +EXPORT_SYMBOL_GPL(xen_test_irq_shared); + void xen_irq_resume(void) { unsigned int cpu, evtchn; diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index a7ffdfe19fc9..f6832f46aea4 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -427,6 +427,17 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd, return 0; } +static void gntalloc_vma_open(struct vm_area_struct *vma) +{ + struct gntalloc_gref *gref = vma->vm_private_data; + if (!gref) + return; + + spin_lock(&gref_lock); + gref->users++; + spin_unlock(&gref_lock); +} + static void gntalloc_vma_close(struct vm_area_struct *vma) { struct gntalloc_gref *gref = vma->vm_private_data; @@ -441,6 +452,7 @@ static void gntalloc_vma_close(struct vm_area_struct *vma) } static struct vm_operations_struct gntalloc_vmops = { + .open = gntalloc_vma_open, .close = gntalloc_vma_close, }; @@ -471,8 +483,6 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_private_data = gref; vma->vm_flags |= VM_RESERVED; - vma->vm_flags |= VM_DONTCOPY; - vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; vma->vm_ops = &gntalloc_vmops; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index b0f9e8fb0052..f914b26cf0c2 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -330,17 +330,26 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages) /* ------------------------------------------------------------------ */ +static void gntdev_vma_open(struct vm_area_struct *vma) +{ + struct grant_map *map = vma->vm_private_data; + + pr_debug("gntdev_vma_open %p\n", vma); + atomic_inc(&map->users); +} + static void gntdev_vma_close(struct vm_area_struct *vma) { struct grant_map *map = vma->vm_private_data; - pr_debug("close %p\n", vma); + pr_debug("gntdev_vma_close %p\n", vma); map->vma = NULL; vma->vm_private_data = NULL; gntdev_put_map(map); } static struct vm_operations_struct gntdev_vmops = { + .open = gntdev_vma_open, .close = gntdev_vma_close, }; @@ -652,7 +661,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; + vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND; + + if (use_ptemod) + vma->vm_flags |= VM_DONTCOPY|VM_PFNMAP; vma->vm_private_data = map; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 3745a318defc..fd725cde6ad1 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -466,13 +466,30 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (map_ops[i].status) continue; - /* m2p override only supported for GNTMAP_contains_pte mappings */ - if (!(map_ops[i].flags & GNTMAP_contains_pte)) - continue; - pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + + if (map_ops[i].flags & GNTMAP_contains_pte) { + pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + (map_ops[i].host_addr & ~PAGE_MASK)); - mfn = pte_mfn(*pte); - ret = m2p_add_override(mfn, pages[i]); + mfn = pte_mfn(*pte); + } else { + /* If you really wanted to do this: + * mfn = PFN_DOWN(map_ops[i].dev_bus_addr); + * + * The reason we do not implement it is b/c on the + * unmap path (gnttab_unmap_refs) we have no means of + * checking whether the page is !GNTMAP_contains_pte. + * + * That is without some extra data-structure to carry + * the struct page, bool clear_pte, and list_head next + * tuples and deal with allocation/delallocation, etc. + * + * The users of this API set the GNTMAP_contains_pte + * flag so lets just return not supported until it + * becomes neccessary to implement. + */ + return -EOPNOTSUPP; + } + ret = m2p_add_override(mfn, pages[i], + map_ops[i].flags & GNTMAP_contains_pte); if (ret) return ret; } @@ -494,7 +511,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, return ret; for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i]); + ret = m2p_remove_override(pages[i], true /* clear the PTE */); if (ret) return ret; } diff --git a/include/xen/events.h b/include/xen/events.h index f1b87ad48ac7..9af21e19545a 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -85,7 +85,8 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc); /* Bind an PSI pirq to an irq. */ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, - int pirq, int vector, const char *name); + int pirq, int vector, const char *name, + domid_t domid); #endif /* De-allocates the above mentioned physical interrupt. */ @@ -94,4 +95,10 @@ int xen_destroy_irq(int irq); /* Return irq from pirq */ int xen_irq_from_pirq(unsigned pirq); +/* Return the pirq allocated to the irq. */ +int xen_pirq_from_irq(unsigned irq); + +/* Determine whether to ignore this IRQ if it is passed to a guest. */ +int xen_test_irq_shared(int irq); + #endif /* _XEN_EVENTS_H */