diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index e9824c35c34f..003662aa8060 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -5,6 +5,7 @@ * Copyright IBM Corp. 2018 * * Author(s): Tony Krowiak + * Pierre Morel */ #include @@ -40,14 +41,45 @@ static struct ap_device_id ap_queue_ids[] = { MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids); +/** + * vfio_ap_queue_dev_probe: + * + * Allocate a vfio_ap_queue structure and associate it + * with the device as driver_data. + */ static int vfio_ap_queue_dev_probe(struct ap_device *apdev) { + struct vfio_ap_queue *q; + + q = kzalloc(sizeof(*q), GFP_KERNEL); + if (!q) + return -ENOMEM; + dev_set_drvdata(&apdev->device, q); + q->apqn = to_ap_queue(&apdev->device)->qid; + q->saved_isc = VFIO_AP_ISC_INVALID; return 0; } +/** + * vfio_ap_queue_dev_remove: + * + * Takes the matrix lock to avoid actions on this device while removing + * Free the associated vfio_ap_queue structure + */ static void vfio_ap_queue_dev_remove(struct ap_device *apdev) { - /* Nothing to do yet */ + struct vfio_ap_queue *q; + int apid, apqi; + + mutex_lock(&matrix_dev->lock); + q = dev_get_drvdata(&apdev->device); + dev_set_drvdata(&apdev->device, NULL); + apid = AP_QID_CARD(q->apqn); + apqi = AP_QID_QUEUE(q->apqn); + vfio_ap_mdev_reset_queue(apid, apqi, 1); + vfio_ap_irq_disable(q); + kfree(q); + mutex_unlock(&matrix_dev->lock); } static void vfio_ap_matrix_dev_release(struct device *dev) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index e8e87bf4c744..2c9fb1423a39 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -24,6 +24,296 @@ #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" +static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev); + +static int match_apqn(struct device *dev, void *data) +{ + struct vfio_ap_queue *q = dev_get_drvdata(dev); + + return (q->apqn == *(int *)(data)) ? 1 : 0; +} + +/** + * vfio_ap_get_queue: Retrieve a queue with a specific APQN from a list + * @matrix_mdev: the associated mediated matrix + * @apqn: The queue APQN + * + * Retrieve a queue with a specific APQN from the list of the + * devices of the vfio_ap_drv. + * Verify that the APID and the APQI are set in the matrix. + * + * Returns the pointer to the associated vfio_ap_queue + */ +static struct vfio_ap_queue *vfio_ap_get_queue( + struct ap_matrix_mdev *matrix_mdev, + int apqn) +{ + struct vfio_ap_queue *q; + struct device *dev; + + if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm)) + return NULL; + if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) + return NULL; + + dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, + &apqn, match_apqn); + if (!dev) + return NULL; + q = dev_get_drvdata(dev); + q->matrix_mdev = matrix_mdev; + put_device(dev); + + return q; +} + +/** + * vfio_ap_wait_for_irqclear + * @apqn: The AP Queue number + * + * Checks the IRQ bit for the status of this APQN using ap_tapq. + * Returns if the ap_tapq function succeeded and the bit is clear. + * Returns if ap_tapq function failed with invalid, deconfigured or + * checkstopped AP. + * Otherwise retries up to 5 times after waiting 20ms. + * + */ +static void vfio_ap_wait_for_irqclear(int apqn) +{ + struct ap_queue_status status; + int retry = 5; + + do { + status = ap_tapq(apqn, NULL); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + case AP_RESPONSE_RESET_IN_PROGRESS: + if (!status.irq_enabled) + return; + /* Fall through */ + case AP_RESPONSE_BUSY: + msleep(20); + break; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + default: + WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__, + status.response_code, apqn); + return; + } + } while (--retry); + + WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n", + __func__, status.response_code, apqn); +} + +/** + * vfio_ap_free_aqic_resources + * @q: The vfio_ap_queue + * + * Unregisters the ISC in the GIB when the saved ISC not invalid. + * Unpin the guest's page holding the NIB when it exist. + * Reset the saved_pfn and saved_isc to invalid values. + * Clear the pointer to the matrix mediated device. + * + */ +static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) +{ + if (q->saved_isc != VFIO_AP_ISC_INVALID && q->matrix_mdev) + kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc); + if (q->saved_pfn && q->matrix_mdev) + vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), + &q->saved_pfn, 1); + q->saved_pfn = 0; + q->saved_isc = VFIO_AP_ISC_INVALID; + q->matrix_mdev = NULL; +} + +/** + * vfio_ap_irq_disable + * @q: The vfio_ap_queue + * + * Uses ap_aqic to disable the interruption and in case of success, reset + * in progress or IRQ disable command already proceeded: calls + * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear + * and calls vfio_ap_free_aqic_resources() to free the resources associated + * with the AP interrupt handling. + * + * In the case the AP is busy, or a reset is in progress, + * retries after 20ms, up to 5 times. + * + * Returns if ap_aqic function failed with invalid, deconfigured or + * checkstopped AP. + */ +struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) +{ + struct ap_qirq_ctrl aqic_gisa = {}; + struct ap_queue_status status; + int retries = 5; + + do { + status = ap_aqic(q->apqn, aqic_gisa, NULL); + switch (status.response_code) { + case AP_RESPONSE_OTHERWISE_CHANGED: + case AP_RESPONSE_NORMAL: + vfio_ap_wait_for_irqclear(q->apqn); + goto end_free; + case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_BUSY: + msleep(20); + break; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + case AP_RESPONSE_INVALID_ADDRESS: + default: + /* All cases in default means AP not operational */ + WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__, + status.response_code); + goto end_free; + } + } while (retries--); + + WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__, + status.response_code); +end_free: + vfio_ap_free_aqic_resources(q); + return status; +} + +/** + * vfio_ap_setirq: Enable Interruption for a APQN + * + * @dev: the device associated with the ap_queue + * @q: the vfio_ap_queue holding AQIC parameters + * + * Pin the NIB saved in *q + * Register the guest ISC to GIB interface and retrieve the + * host ISC to issue the host side PQAP/AQIC + * + * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the + * vfio_pin_pages failed. + * + * Otherwise return the ap_queue_status returned by the ap_aqic(), + * all retry handling will be done by the guest. + */ +static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, + int isc, + unsigned long nib) +{ + struct ap_qirq_ctrl aqic_gisa = {}; + struct ap_queue_status status = {}; + struct kvm_s390_gisa *gisa; + struct kvm *kvm; + unsigned long h_nib, g_pfn, h_pfn; + int ret; + + g_pfn = nib >> PAGE_SHIFT; + ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1, + IOMMU_READ | IOMMU_WRITE, &h_pfn); + switch (ret) { + case 1: + break; + default: + status.response_code = AP_RESPONSE_INVALID_ADDRESS; + return status; + } + + kvm = q->matrix_mdev->kvm; + gisa = kvm->arch.gisa_int.origin; + + h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); + aqic_gisa.gisc = isc; + aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc); + aqic_gisa.ir = 1; + aqic_gisa.gisa = (uint64_t)gisa >> 4; + + status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + /* See if we did clear older IRQ configuration */ + vfio_ap_free_aqic_resources(q); + q->saved_pfn = g_pfn; + q->saved_isc = isc; + break; + case AP_RESPONSE_OTHERWISE_CHANGED: + /* We could not modify IRQ setings: clear new configuration */ + vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1); + kvm_s390_gisc_unregister(kvm, isc); + break; + default: + pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn, + status.response_code); + vfio_ap_irq_disable(q); + break; + } + + return status; +} + +/** + * handle_pqap: PQAP instruction callback + * + * @vcpu: The vcpu on which we received the PQAP instruction + * + * Get the general register contents to initialize internal variables. + * REG[0]: APQN + * REG[1]: IR and ISC + * REG[2]: NIB + * + * Response.status may be set to following Response Code: + * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available + * - AP_RESPONSE_DECONFIGURED: if the queue is not configured + * - AP_RESPONSE_NORMAL (0) : in case of successs + * Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC. + * We take the matrix_dev lock to ensure serialization on queues and + * mediated device access. + * + * Return 0 if we could handle the request inside KVM. + * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault. + */ +static int handle_pqap(struct kvm_vcpu *vcpu) +{ + uint64_t status; + uint16_t apqn; + struct vfio_ap_queue *q; + struct ap_queue_status qstatus = { + .response_code = AP_RESPONSE_Q_NOT_AVAIL, }; + struct ap_matrix_mdev *matrix_mdev; + + /* If we do not use the AIV facility just go to userland */ + if (!(vcpu->arch.sie_block->eca & ECA_AIV)) + return -EOPNOTSUPP; + + apqn = vcpu->run->s.regs.gprs[0] & 0xffff; + mutex_lock(&matrix_dev->lock); + + if (!vcpu->kvm->arch.crypto.pqap_hook) + goto out_unlock; + matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook, + struct ap_matrix_mdev, pqap_hook); + + q = vfio_ap_get_queue(matrix_mdev, apqn); + if (!q) + goto out_unlock; + + status = vcpu->run->s.regs.gprs[1]; + + /* If IR bit(16) is set we enable the interrupt */ + if ((status >> (63 - 16)) & 0x01) + qstatus = vfio_ap_irq_enable(q, status & 0x07, + vcpu->run->s.regs.gprs[2]); + else + qstatus = vfio_ap_irq_disable(q); + +out_unlock: + memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus)); + vcpu->run->s.regs.gprs[1] >>= 32; + mutex_unlock(&matrix_dev->lock); + return 0; +} + static void vfio_ap_matrix_init(struct ap_config_info *info, struct ap_matrix *matrix) { @@ -45,8 +335,11 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev) return -ENOMEM; } + matrix_mdev->mdev = mdev; vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix); mdev_set_drvdata(mdev, matrix_mdev); + matrix_mdev->pqap_hook.hook = handle_pqap; + matrix_mdev->pqap_hook.owner = THIS_MODULE; mutex_lock(&matrix_dev->lock); list_add(&matrix_mdev->node, &matrix_dev->mdev_list); mutex_unlock(&matrix_dev->lock); @@ -62,6 +355,7 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev) return -EBUSY; mutex_lock(&matrix_dev->lock); + vfio_ap_mdev_reset_queues(mdev); list_del(&matrix_mdev->node); mutex_unlock(&matrix_dev->lock); @@ -754,6 +1048,8 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, } matrix_mdev->kvm = kvm; + kvm_get_kvm(kvm); + kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; mutex_unlock(&matrix_dev->lock); return 0; @@ -819,15 +1115,36 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, - unsigned int retry) +static void vfio_ap_irq_disable_apqn(int apqn) +{ + struct device *dev; + struct vfio_ap_queue *q; + + dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, + &apqn, match_apqn); + if (dev) { + q = dev_get_drvdata(dev); + vfio_ap_irq_disable(q); + put_device(dev); + } +} + +int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, + unsigned int retry) { struct ap_queue_status status; + int retry2 = 2; + int apqn = AP_MKQID(apid, apqi); do { - status = ap_zapq(AP_MKQID(apid, apqi)); + status = ap_zapq(apqn); switch (status.response_code) { case AP_RESPONSE_NORMAL: + while (!status.queue_empty && retry2--) { + msleep(20); + status = ap_tapq(apqn, NULL); + } + WARN_ON_ONCE(retry <= 0); return 0; case AP_RESPONSE_RESET_IN_PROGRESS: case AP_RESPONSE_BUSY: @@ -861,6 +1178,7 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) */ if (ret) rc = ret; + vfio_ap_irq_disable_apqn(AP_MKQID(apid, apqi)); } } @@ -904,15 +1222,20 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev) { struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); - if (matrix_mdev->kvm) + mutex_lock(&matrix_dev->lock); + if (matrix_mdev->kvm) { kvm_arch_crypto_clear_masks(matrix_mdev->kvm); + matrix_mdev->kvm->arch.crypto.pqap_hook = NULL; + vfio_ap_mdev_reset_queues(mdev); + kvm_put_kvm(matrix_mdev->kvm); + matrix_mdev->kvm = NULL; + } + mutex_unlock(&matrix_dev->lock); - vfio_ap_mdev_reset_queues(mdev); vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &matrix_mdev->iommu_notifier); vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &matrix_mdev->group_notifier); - matrix_mdev->kvm = NULL; module_put(THIS_MODULE); } @@ -941,6 +1264,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev, { int ret; + mutex_lock(&matrix_dev->lock); switch (cmd) { case VFIO_DEVICE_GET_INFO: ret = vfio_ap_mdev_get_device_info(arg); @@ -952,6 +1276,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev, ret = -EOPNOTSUPP; break; } + mutex_unlock(&matrix_dev->lock); return ret; } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index 18dcc4d769cc..f46dde56b464 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -4,6 +4,7 @@ * * Author(s): Tony Krowiak * Halil Pasic + * Pierre Morel * * Copyright IBM Corp. 2018 */ @@ -89,5 +90,15 @@ struct ap_matrix_mdev { extern int vfio_ap_mdev_register(void); extern void vfio_ap_mdev_unregister(void); +int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, + unsigned int retry); +struct vfio_ap_queue { + struct ap_matrix_mdev *matrix_mdev; + unsigned long saved_pfn; + int apqn; +#define VFIO_AP_ISC_INVALID 0xff + unsigned char saved_isc; +}; +struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q); #endif /* _VFIO_AP_PRIVATE_H_ */