vhost,vdpa: fixes
Fixes all over the place. A new UAPI is borderline: can also be considered a new feature but also seems to be the only way we could come up with to fix addressing for userspace - and it seems important to switch to it now before userspace making assumptions about addressing ability of devices is set in stone. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAl+cIbUPHG1zdEByZWRo YXQuY29tAAoJECgfDbjSjVRp4M8IAIDSk1l83iKDlnyGQgHg1BgtS0GBk+GdvZnE 22brFVnJ3QXZ9WTAujN2sXJL0wqJ0rR92uGuENBflRymGAaD39SmXXaK/RjBWZrf K559ahnXf4gav1UPegyb3qtTI8lFn34rDjrNbw7/8qQVHdeNUJHUJ+YCvLseI4Uk eoM93FlDySca5KNcQhdx29s+0I+HFT5aKxAFJRNFuSMpF5+EMUGP/8FsR8IB2378 gDVFsn+kNk/+zi2psQzV3bpp/K0ktl7TR1qsjH4r/0sGBMMst5c9lURGocZ2SCDW bPi39xOZIMJyoYL2/FXP2OZ+VgHTZBVRQzFboHlVEyxKzYfJ2yA= =dxoH -----END PGP SIGNATURE----- Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost Pull vhost fixes from Michael Tsirkin: "Fixes all over the place. A new UAPI is borderline: can also be considered a new feature but also seems to be the only way we could come up with to fix addressing for userspace - and it seems important to switch to it now before userspace making assumptions about addressing ability of devices is set in stone" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: vdpasim: allow to assign a MAC address vdpasim: fix MAC address configuration vdpa: handle irq bypass register failure case vdpa_sim: Fix DMA mask Revert "vhost-vdpa: fix page pinning leakage in error path" vdpa/mlx5: Fix error return in map_direct_mr() vhost_vdpa: Return -EFAULT if copy_from_user() fails vdpa_sim: implement get_iova_range() vhost: vdpa: report iova range vdpa: introduce config op to get valid iova range
This commit is contained in:
commit
c2dc4c073f
@ -239,7 +239,6 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
|
||||
u64 paend;
|
||||
struct scatterlist *sg;
|
||||
struct device *dma = mvdev->mdev->device;
|
||||
int ret;
|
||||
|
||||
for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
|
||||
map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
|
||||
@ -277,8 +276,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
|
||||
done:
|
||||
mr->log_size = log_entity_size;
|
||||
mr->nsg = nsg;
|
||||
ret = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
|
||||
if (!ret)
|
||||
err = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
|
||||
if (!err)
|
||||
goto err_map;
|
||||
|
||||
err = create_direct_mr(mvdev, mr);
|
||||
|
@ -38,6 +38,10 @@ static int batch_mapping = 1;
|
||||
module_param(batch_mapping, int, 0444);
|
||||
MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
|
||||
|
||||
static char *macaddr;
|
||||
module_param(macaddr, charp, 0);
|
||||
MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
|
||||
|
||||
struct vdpasim_virtqueue {
|
||||
struct vringh vring;
|
||||
struct vringh_kiov iov;
|
||||
@ -60,7 +64,8 @@ struct vdpasim_virtqueue {
|
||||
|
||||
static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) |
|
||||
(1ULL << VIRTIO_F_VERSION_1) |
|
||||
(1ULL << VIRTIO_F_ACCESS_PLATFORM);
|
||||
(1ULL << VIRTIO_F_ACCESS_PLATFORM) |
|
||||
(1ULL << VIRTIO_NET_F_MAC);
|
||||
|
||||
/* State of each vdpasim device */
|
||||
struct vdpasim {
|
||||
@ -361,7 +366,9 @@ static struct vdpasim *vdpasim_create(void)
|
||||
spin_lock_init(&vdpasim->iommu_lock);
|
||||
|
||||
dev = &vdpasim->vdpa.dev;
|
||||
dev->coherent_dma_mask = DMA_BIT_MASK(64);
|
||||
dev->dma_mask = &dev->coherent_dma_mask;
|
||||
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
|
||||
goto err_iommu;
|
||||
set_dma_ops(dev, &vdpasim_dma_ops);
|
||||
|
||||
vdpasim->iommu = vhost_iotlb_alloc(2048, 0);
|
||||
@ -372,7 +379,15 @@ static struct vdpasim *vdpasim_create(void)
|
||||
if (!vdpasim->buffer)
|
||||
goto err_iommu;
|
||||
|
||||
eth_random_addr(vdpasim->config.mac);
|
||||
if (macaddr) {
|
||||
mac_pton(macaddr, vdpasim->config.mac);
|
||||
if (!is_valid_ether_addr(vdpasim->config.mac)) {
|
||||
ret = -EADDRNOTAVAIL;
|
||||
goto err_iommu;
|
||||
}
|
||||
} else {
|
||||
eth_random_addr(vdpasim->config.mac);
|
||||
}
|
||||
|
||||
vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu);
|
||||
vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu);
|
||||
@ -574,6 +589,16 @@ static u32 vdpasim_get_generation(struct vdpa_device *vdpa)
|
||||
return vdpasim->generation;
|
||||
}
|
||||
|
||||
static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa)
|
||||
{
|
||||
struct vdpa_iova_range range = {
|
||||
.first = 0ULL,
|
||||
.last = ULLONG_MAX,
|
||||
};
|
||||
|
||||
return range;
|
||||
}
|
||||
|
||||
static int vdpasim_set_map(struct vdpa_device *vdpa,
|
||||
struct vhost_iotlb *iotlb)
|
||||
{
|
||||
@ -657,6 +682,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = {
|
||||
.get_config = vdpasim_get_config,
|
||||
.set_config = vdpasim_set_config,
|
||||
.get_generation = vdpasim_get_generation,
|
||||
.get_iova_range = vdpasim_get_iova_range,
|
||||
.dma_map = vdpasim_dma_map,
|
||||
.dma_unmap = vdpasim_dma_unmap,
|
||||
.free = vdpasim_free,
|
||||
@ -683,6 +709,7 @@ static const struct vdpa_config_ops vdpasim_net_batch_config_ops = {
|
||||
.get_config = vdpasim_get_config,
|
||||
.set_config = vdpasim_set_config,
|
||||
.get_generation = vdpasim_get_generation,
|
||||
.get_iova_range = vdpasim_get_iova_range,
|
||||
.set_map = vdpasim_set_map,
|
||||
.free = vdpasim_free,
|
||||
};
|
||||
|
@ -47,6 +47,7 @@ struct vhost_vdpa {
|
||||
int minor;
|
||||
struct eventfd_ctx *config_ctx;
|
||||
int in_batch;
|
||||
struct vdpa_iova_range range;
|
||||
};
|
||||
|
||||
static DEFINE_IDA(vhost_vdpa_ida);
|
||||
@ -103,6 +104,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
|
||||
vq->call_ctx.producer.token = vq->call_ctx.ctx;
|
||||
vq->call_ctx.producer.irq = irq;
|
||||
ret = irq_bypass_register_producer(&vq->call_ctx.producer);
|
||||
if (unlikely(ret))
|
||||
dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n",
|
||||
qid, vq->call_ctx.producer.token, ret);
|
||||
}
|
||||
|
||||
static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
|
||||
@ -337,6 +341,16 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
|
||||
{
|
||||
struct vhost_vdpa_iova_range range = {
|
||||
.first = v->range.first,
|
||||
.last = v->range.last,
|
||||
};
|
||||
|
||||
return copy_to_user(argp, &range, sizeof(range));
|
||||
}
|
||||
|
||||
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
|
||||
void __user *argp)
|
||||
{
|
||||
@ -421,12 +435,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
|
||||
void __user *argp = (void __user *)arg;
|
||||
u64 __user *featurep = argp;
|
||||
u64 features;
|
||||
long r;
|
||||
long r = 0;
|
||||
|
||||
if (cmd == VHOST_SET_BACKEND_FEATURES) {
|
||||
r = copy_from_user(&features, featurep, sizeof(features));
|
||||
if (r)
|
||||
return r;
|
||||
if (copy_from_user(&features, featurep, sizeof(features)))
|
||||
return -EFAULT;
|
||||
if (features & ~VHOST_VDPA_BACKEND_FEATURES)
|
||||
return -EOPNOTSUPP;
|
||||
vhost_set_backend_features(&v->vdev, features);
|
||||
@ -469,7 +482,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
|
||||
break;
|
||||
case VHOST_GET_BACKEND_FEATURES:
|
||||
features = VHOST_VDPA_BACKEND_FEATURES;
|
||||
r = copy_to_user(featurep, &features, sizeof(features));
|
||||
if (copy_to_user(featurep, &features, sizeof(features)))
|
||||
r = -EFAULT;
|
||||
break;
|
||||
case VHOST_VDPA_GET_IOVA_RANGE:
|
||||
r = vhost_vdpa_get_iova_range(v, argp);
|
||||
break;
|
||||
default:
|
||||
r = vhost_dev_ioctl(&v->vdev, cmd, argp);
|
||||
@ -588,19 +605,25 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
|
||||
struct vhost_dev *dev = &v->vdev;
|
||||
struct vhost_iotlb *iotlb = dev->iotlb;
|
||||
struct page **page_list;
|
||||
struct vm_area_struct **vmas;
|
||||
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
|
||||
unsigned int gup_flags = FOLL_LONGTERM;
|
||||
unsigned long map_pfn, last_pfn = 0;
|
||||
unsigned long npages, lock_limit;
|
||||
unsigned long i, nmap = 0;
|
||||
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
|
||||
unsigned long locked, lock_limit, pinned, i;
|
||||
u64 iova = msg->iova;
|
||||
long pinned;
|
||||
int ret = 0;
|
||||
|
||||
if (msg->iova < v->range.first ||
|
||||
msg->iova + msg->size - 1 > v->range.last)
|
||||
return -EINVAL;
|
||||
|
||||
if (vhost_iotlb_itree_first(iotlb, msg->iova,
|
||||
msg->iova + msg->size - 1))
|
||||
return -EEXIST;
|
||||
|
||||
page_list = (struct page **) __get_free_page(GFP_KERNEL);
|
||||
if (!page_list)
|
||||
return -ENOMEM;
|
||||
|
||||
if (msg->perm & VHOST_ACCESS_WO)
|
||||
gup_flags |= FOLL_WRITE;
|
||||
|
||||
@ -608,86 +631,61 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
|
||||
if (!npages)
|
||||
return -EINVAL;
|
||||
|
||||
page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
|
||||
vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
|
||||
GFP_KERNEL);
|
||||
if (!page_list || !vmas) {
|
||||
ret = -ENOMEM;
|
||||
goto free;
|
||||
}
|
||||
|
||||
mmap_read_lock(dev->mm);
|
||||
|
||||
locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
|
||||
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
|
||||
|
||||
if (locked > lock_limit) {
|
||||
ret = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
|
||||
page_list, vmas);
|
||||
if (npages != pinned) {
|
||||
if (pinned < 0) {
|
||||
ret = pinned;
|
||||
} else {
|
||||
unpin_user_pages(page_list, pinned);
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
goto unlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
cur_base = msg->uaddr & PAGE_MASK;
|
||||
iova &= PAGE_MASK;
|
||||
map_pfn = page_to_pfn(page_list[0]);
|
||||
|
||||
/* One more iteration to avoid extra vdpa_map() call out of loop. */
|
||||
for (i = 0; i <= npages; i++) {
|
||||
unsigned long this_pfn;
|
||||
u64 csize;
|
||||
while (npages) {
|
||||
pinned = min_t(unsigned long, npages, list_size);
|
||||
ret = pin_user_pages(cur_base, pinned,
|
||||
gup_flags, page_list, NULL);
|
||||
if (ret != pinned)
|
||||
goto out;
|
||||
|
||||
/* The last chunk may have no valid PFN next to it */
|
||||
this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
|
||||
if (!last_pfn)
|
||||
map_pfn = page_to_pfn(page_list[0]);
|
||||
|
||||
if (last_pfn && (this_pfn == -1UL ||
|
||||
this_pfn != last_pfn + 1)) {
|
||||
/* Pin a contiguous chunk of memory */
|
||||
csize = last_pfn - map_pfn + 1;
|
||||
ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
|
||||
map_pfn << PAGE_SHIFT,
|
||||
msg->perm);
|
||||
if (ret) {
|
||||
/*
|
||||
* Unpin the rest chunks of memory on the
|
||||
* flight with no corresponding vdpa_map()
|
||||
* calls having been made yet. On the other
|
||||
* hand, vdpa_unmap() in the failure path
|
||||
* is in charge of accounting the number of
|
||||
* pinned pages for its own.
|
||||
* This asymmetrical pattern of accounting
|
||||
* is for efficiency to pin all pages at
|
||||
* once, while there is no other callsite
|
||||
* of vdpa_map() than here above.
|
||||
*/
|
||||
unpin_user_pages(&page_list[nmap],
|
||||
npages - nmap);
|
||||
goto out;
|
||||
for (i = 0; i < ret; i++) {
|
||||
unsigned long this_pfn = page_to_pfn(page_list[i]);
|
||||
u64 csize;
|
||||
|
||||
if (last_pfn && (this_pfn != last_pfn + 1)) {
|
||||
/* Pin a contiguous chunk of memory */
|
||||
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
|
||||
if (vhost_vdpa_map(v, iova, csize,
|
||||
map_pfn << PAGE_SHIFT,
|
||||
msg->perm))
|
||||
goto out;
|
||||
map_pfn = this_pfn;
|
||||
iova += csize;
|
||||
}
|
||||
atomic64_add(csize, &dev->mm->pinned_vm);
|
||||
nmap += csize;
|
||||
iova += csize << PAGE_SHIFT;
|
||||
map_pfn = this_pfn;
|
||||
|
||||
last_pfn = this_pfn;
|
||||
}
|
||||
last_pfn = this_pfn;
|
||||
|
||||
cur_base += ret << PAGE_SHIFT;
|
||||
npages -= ret;
|
||||
}
|
||||
|
||||
WARN_ON(nmap != npages);
|
||||
/* Pin the rest chunk */
|
||||
ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
|
||||
map_pfn << PAGE_SHIFT, msg->perm);
|
||||
out:
|
||||
if (ret)
|
||||
if (ret) {
|
||||
vhost_vdpa_unmap(v, msg->iova, msg->size);
|
||||
unlock:
|
||||
atomic64_sub(npages, &dev->mm->pinned_vm);
|
||||
}
|
||||
mmap_read_unlock(dev->mm);
|
||||
free:
|
||||
kvfree(vmas);
|
||||
kvfree(page_list);
|
||||
free_page((unsigned long)page_list);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -783,6 +781,27 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
|
||||
v->domain = NULL;
|
||||
}
|
||||
|
||||
static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
|
||||
{
|
||||
struct vdpa_iova_range *range = &v->range;
|
||||
struct iommu_domain_geometry geo;
|
||||
struct vdpa_device *vdpa = v->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
|
||||
if (ops->get_iova_range) {
|
||||
*range = ops->get_iova_range(vdpa);
|
||||
} else if (v->domain &&
|
||||
!iommu_domain_get_attr(v->domain,
|
||||
DOMAIN_ATTR_GEOMETRY, &geo) &&
|
||||
geo.force_aperture) {
|
||||
range->first = geo.aperture_start;
|
||||
range->last = geo.aperture_end;
|
||||
} else {
|
||||
range->first = 0;
|
||||
range->last = ULLONG_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
static int vhost_vdpa_open(struct inode *inode, struct file *filep)
|
||||
{
|
||||
struct vhost_vdpa *v;
|
||||
@ -823,6 +842,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
|
||||
if (r)
|
||||
goto err_init_iotlb;
|
||||
|
||||
vhost_vdpa_set_iova_range(v);
|
||||
|
||||
filep->private_data = v;
|
||||
|
||||
return 0;
|
||||
|
@ -52,6 +52,16 @@ struct vdpa_device {
|
||||
int nvqs;
|
||||
};
|
||||
|
||||
/**
|
||||
* vDPA IOVA range - the IOVA range support by the device
|
||||
* @first: start of the IOVA range
|
||||
* @last: end of the IOVA range
|
||||
*/
|
||||
struct vdpa_iova_range {
|
||||
u64 first;
|
||||
u64 last;
|
||||
};
|
||||
|
||||
/**
|
||||
* vDPA_config_ops - operations for configuring a vDPA device.
|
||||
* Note: vDPA device drivers are required to implement all of the
|
||||
@ -151,6 +161,10 @@ struct vdpa_device {
|
||||
* @get_generation: Get device config generation (optional)
|
||||
* @vdev: vdpa device
|
||||
* Returns u32: device generation
|
||||
* @get_iova_range: Get supported iova range (optional)
|
||||
* @vdev: vdpa device
|
||||
* Returns the iova range supported by
|
||||
* the device.
|
||||
* @set_map: Set device memory mapping (optional)
|
||||
* Needed for device that using device
|
||||
* specific DMA translation (on-chip IOMMU)
|
||||
@ -216,6 +230,7 @@ struct vdpa_config_ops {
|
||||
void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
|
||||
const void *buf, unsigned int len);
|
||||
u32 (*get_generation)(struct vdpa_device *vdev);
|
||||
struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev);
|
||||
|
||||
/* DMA ops */
|
||||
int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
|
||||
|
@ -146,4 +146,8 @@
|
||||
|
||||
/* Set event fd for config interrupt*/
|
||||
#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
|
||||
|
||||
/* Get the valid iova range */
|
||||
#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \
|
||||
struct vhost_vdpa_iova_range)
|
||||
#endif
|
||||
|
@ -138,6 +138,15 @@ struct vhost_vdpa_config {
|
||||
__u8 buf[0];
|
||||
};
|
||||
|
||||
/* vhost vdpa IOVA range
|
||||
* @first: First address that can be mapped by vhost-vDPA
|
||||
* @last: Last address that can be mapped by vhost-vDPA
|
||||
*/
|
||||
struct vhost_vdpa_iova_range {
|
||||
__u64 first;
|
||||
__u64 last;
|
||||
};
|
||||
|
||||
/* Feature bits */
|
||||
/* Log all write descriptors. Can be changed while device is active. */
|
||||
#define VHOST_F_LOG_ALL 26
|
||||
|
Loading…
Reference in New Issue
Block a user