From 8dcf94bcff5bf3b54380ae2a17b034fb3b9d58e5 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 19 Dec 2013 10:17:11 -0700 Subject: [PATCH 1/3] misc: Reserve minor for VFIO VFIO currently allocates it's own dynamic chardev range, reserving the first minor for the control part of the interface (/dev/vfio/vfio) and the remainder for VFIO groups (/dev/vfio/$GROUP). This works, but it doesn't support auto loading. For instance when libvirt checks for VFIO support it looks for /dev/vfio/vfio, which currently doesn't exist unless the vfio module is loaded. By converting the control device to a misc driver and reserving a static minor, we can enable auto loading. Reserving the minor is a prerequist to that conversion. Minor 196 is unused by anything currently in the kernel. Suggested-by: Paolo Bonzini Signed-off-by: Alex Williamson Acked-by: Greg Kroah-Hartman --- Documentation/devices.txt | 1 + include/linux/miscdevice.h | 1 + 2 files changed, 2 insertions(+) diff --git a/Documentation/devices.txt b/Documentation/devices.txt index 80b72419ffd8..10378cc48374 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -409,6 +409,7 @@ Your cooperation is appreciated. 193 = /dev/d7s SPARC 7-segment display 194 = /dev/zkshim Zero-Knowledge network shim control 195 = /dev/elographics/e2201 Elographics touchscreen E271-2201 + 196 = /dev/vfio/vfio VFIO userspace driver interface 198 = /dev/sexec Signed executable interface 199 = /dev/scanners/cuecat :CueCat barcode scanner 200 = /dev/net/tun TAP/TUN network device diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index f7eaf2d60083..3737f7218f51 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -30,6 +30,7 @@ #define STORE_QUEUE_MINOR 155 #define I2O_MINOR 166 #define MICROCODE_MINOR 184 +#define VFIO_MINOR 196 #define TUN_MINOR 200 #define CUSE_MINOR 203 #define MWAVE_MINOR 219 /* ACP/Mwave Modem */ From d10999016f4164e9b80f1b3dece3842087cfa3bb Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 19 Dec 2013 10:17:13 -0700 Subject: [PATCH 2/3] vfio: Convert control interface to misc driver This change allows us to support module auto loading using devname support in userspace tools. With this, /dev/vfio/vfio will always be present and opening it will cause the vfio module to load. This should avoid needing to configure the system to statically load vfio in order to get libvirt to correctly detect support for it. Suggested-by: Paolo Bonzini Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 70 +++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 1eab4ace0671..21271d8df023 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -45,9 +46,7 @@ static struct vfio { struct idr group_idr; struct mutex group_lock; struct cdev group_cdev; - struct device *dev; - dev_t devt; - struct cdev cdev; + dev_t group_devt; wait_queue_head_t release_q; } vfio; @@ -142,8 +141,7 @@ EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); */ static int vfio_alloc_group_minor(struct vfio_group *group) { - /* index 0 is used by /dev/vfio/vfio */ - return idr_alloc(&vfio.group_idr, group, 1, MINORMASK + 1, GFP_KERNEL); + return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL); } static void vfio_free_group_minor(int minor) @@ -243,7 +241,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) } } - dev = device_create(vfio.class, NULL, MKDEV(MAJOR(vfio.devt), minor), + dev = device_create(vfio.class, NULL, + MKDEV(MAJOR(vfio.group_devt), minor), group, "%d", iommu_group_id(iommu_group)); if (IS_ERR(dev)) { vfio_free_group_minor(minor); @@ -268,7 +267,7 @@ static void vfio_group_release(struct kref *kref) WARN_ON(!list_empty(&group->device_list)); - device_destroy(vfio.class, MKDEV(MAJOR(vfio.devt), group->minor)); + device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor)); list_del(&group->vfio_next); vfio_free_group_minor(group->minor); vfio_group_unlock_and_free(group); @@ -1419,12 +1418,17 @@ EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id); */ static char *vfio_devnode(struct device *dev, umode_t *mode) { - if (mode && (MINOR(dev->devt) == 0)) - *mode = S_IRUGO | S_IWUGO; - return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); } +static struct miscdevice vfio_dev = { + .minor = VFIO_MINOR, + .name = "vfio", + .fops = &vfio_fops, + .nodename = "vfio/vfio", + .mode = S_IRUGO | S_IWUGO, +}; + static int __init vfio_init(void) { int ret; @@ -1436,6 +1440,13 @@ static int __init vfio_init(void) INIT_LIST_HEAD(&vfio.iommu_drivers_list); init_waitqueue_head(&vfio.release_q); + ret = misc_register(&vfio_dev); + if (ret) { + pr_err("vfio: misc device register failed\n"); + return ret; + } + + /* /dev/vfio/$GROUP */ vfio.class = class_create(THIS_MODULE, "vfio"); if (IS_ERR(vfio.class)) { ret = PTR_ERR(vfio.class); @@ -1444,27 +1455,14 @@ static int __init vfio_init(void) vfio.class->devnode = vfio_devnode; - ret = alloc_chrdev_region(&vfio.devt, 0, MINORMASK, "vfio"); + ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio"); if (ret) - goto err_base_chrdev; + goto err_alloc_chrdev; - cdev_init(&vfio.cdev, &vfio_fops); - ret = cdev_add(&vfio.cdev, vfio.devt, 1); - if (ret) - goto err_base_cdev; - - vfio.dev = device_create(vfio.class, NULL, vfio.devt, NULL, "vfio"); - if (IS_ERR(vfio.dev)) { - ret = PTR_ERR(vfio.dev); - goto err_base_dev; - } - - /* /dev/vfio/$GROUP */ cdev_init(&vfio.group_cdev, &vfio_group_fops); - ret = cdev_add(&vfio.group_cdev, - MKDEV(MAJOR(vfio.devt), 1), MINORMASK - 1); + ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK); if (ret) - goto err_groups_cdev; + goto err_cdev_add; pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); @@ -1478,16 +1476,13 @@ static int __init vfio_init(void) return 0; -err_groups_cdev: - device_destroy(vfio.class, vfio.devt); -err_base_dev: - cdev_del(&vfio.cdev); -err_base_cdev: - unregister_chrdev_region(vfio.devt, MINORMASK); -err_base_chrdev: +err_cdev_add: + unregister_chrdev_region(vfio.group_devt, MINORMASK); +err_alloc_chrdev: class_destroy(vfio.class); vfio.class = NULL; err_class: + misc_deregister(&vfio_dev); return ret; } @@ -1497,11 +1492,10 @@ static void __exit vfio_cleanup(void) idr_destroy(&vfio.group_idr); cdev_del(&vfio.group_cdev); - device_destroy(vfio.class, vfio.devt); - cdev_del(&vfio.cdev); - unregister_chrdev_region(vfio.devt, MINORMASK); + unregister_chrdev_region(vfio.group_devt, MINORMASK); class_destroy(vfio.class); vfio.class = NULL; + misc_deregister(&vfio_dev); } module_init(vfio_init); @@ -1511,3 +1505,5 @@ MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_ALIAS_MISCDEV(VFIO_MINOR); +MODULE_ALIAS("devname:vfio/vfio"); From 3be3a074cf5ba641529d8fdae0e05ca642f23e12 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 14 Jan 2014 16:12:55 -0700 Subject: [PATCH 3/3] vfio-pci: Don't use device_lock around AER interrupt setup device_lock is much too prone to lockups. For instance if we have a pending .remove then device_lock is already held. If userspace attempts to modify AER signaling after that point, a deadlock occurs. eventfd setup/teardown is already protected in vfio with the igate mutex. AER is not a high performance interrupt, so we can also use the same mutex to protect signaling versus setup races. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 4 ++++ drivers/vfio/pci/vfio_pci_intrs.c | 17 ----------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6ab71b9fcf8d..3ffd27f42418 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -883,9 +883,13 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_DISCONNECT; } + mutex_lock(&vdev->igate); + if (vdev->err_trigger) eventfd_signal(vdev->err_trigger, 1); + mutex_unlock(&vdev->igate); + vfio_device_put(device); return PCI_ERS_RESULT_CAN_RECOVER; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 641bc87bdb96..210357691dc0 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -749,54 +749,37 @@ static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev, unsigned count, uint32_t flags, void *data) { int32_t fd = *(int32_t *)data; - struct pci_dev *pdev = vdev->pdev; if ((index != VFIO_PCI_ERR_IRQ_INDEX) || !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK)) return -EINVAL; - /* - * device_lock synchronizes setting and checking of - * err_trigger. The vfio_pci_aer_err_detected() is also - * called with device_lock held. - */ - /* DATA_NONE/DATA_BOOL enables loopback testing */ - if (flags & VFIO_IRQ_SET_DATA_NONE) { - device_lock(&pdev->dev); if (vdev->err_trigger) eventfd_signal(vdev->err_trigger, 1); - device_unlock(&pdev->dev); return 0; } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t trigger = *(uint8_t *)data; - device_lock(&pdev->dev); if (trigger && vdev->err_trigger) eventfd_signal(vdev->err_trigger, 1); - device_unlock(&pdev->dev); return 0; } /* Handle SET_DATA_EVENTFD */ - if (fd == -1) { - device_lock(&pdev->dev); if (vdev->err_trigger) eventfd_ctx_put(vdev->err_trigger); vdev->err_trigger = NULL; - device_unlock(&pdev->dev); return 0; } else if (fd >= 0) { struct eventfd_ctx *efdctx; efdctx = eventfd_ctx_fdget(fd); if (IS_ERR(efdctx)) return PTR_ERR(efdctx); - device_lock(&pdev->dev); if (vdev->err_trigger) eventfd_ctx_put(vdev->err_trigger); vdev->err_trigger = efdctx; - device_unlock(&pdev->dev); return 0; } else return -EINVAL;