util/vfio-helpers: Collect IOVA reserved regions

The IOVA allocator currently ignores host reserved regions.
As a result some chosen IOVAs may collide with some of them,
resulting in VFIO MAP_DMA errors later on. This happens on ARM
where the MSI reserved window quickly is encountered:
[0x8000000, 0x8100000]. since 5.4 kernel, VFIO returns the usable
IOVA regions. So let's enumerate them in the prospect to avoid
them, later on.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-id: 20200929085550.30926-2-eric.auger@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Eric Auger 2020-09-29 10:55:49 +02:00 committed by Stefan Hajnoczi
parent ad1e691db2
commit 4487d420d0
1 changed files with 70 additions and 2 deletions

View File

@ -40,6 +40,11 @@ typedef struct {
uint64_t iova;
} IOVAMapping;
struct IOVARange {
uint64_t start;
uint64_t end;
};
struct QEMUVFIOState {
QemuMutex lock;
@ -49,6 +54,8 @@ struct QEMUVFIOState {
int device;
RAMBlockNotifier ram_notifier;
struct vfio_region_info config_region_info, bar_region_info[6];
struct IOVARange *usable_iova_ranges;
uint8_t nb_iova_ranges;
/* These fields are protected by @lock */
/* VFIO's IO virtual address space is managed by splitting into a few
@ -236,6 +243,35 @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int
return ret == size ? 0 : -errno;
}
static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
{
struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf;
struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset;
struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
int i;
while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
if (!cap->next) {
return;
}
cap = (struct vfio_info_cap_header *)(buf + cap->next);
}
cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
s->nb_iova_ranges = cap_iova_range->nr_iovas;
if (s->nb_iova_ranges > 1) {
s->usable_iova_ranges =
g_realloc(s->usable_iova_ranges,
s->nb_iova_ranges * sizeof(struct IOVARange));
}
for (i = 0; i < s->nb_iova_ranges; i++) {
s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
}
}
static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
Error **errp)
{
@ -243,10 +279,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
int i;
uint16_t pci_cmd;
struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
struct vfio_iommu_type1_info *iommu_info = NULL;
size_t iommu_info_size = sizeof(*iommu_info);
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
char *group_file = NULL;
s->usable_iova_ranges = NULL;
/* Create a new container */
s->container = open("/dev/vfio/vfio", O_RDWR);
@ -310,13 +349,35 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
goto fail;
}
iommu_info = g_malloc0(iommu_info_size);
iommu_info->argsz = iommu_info_size;
/* Get additional IOMMU info */
if (ioctl(s->container, VFIO_IOMMU_GET_INFO, &iommu_info)) {
if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
error_setg_errno(errp, errno, "Failed to get IOMMU info");
ret = -errno;
goto fail;
}
/*
* if the kernel does not report usable IOVA regions, choose
* the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
*/
s->nb_iova_ranges = 1;
s->usable_iova_ranges = g_new0(struct IOVARange, 1);
s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
if (iommu_info->argsz > iommu_info_size) {
iommu_info_size = iommu_info->argsz;
iommu_info = g_realloc(iommu_info, iommu_info_size);
if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
ret = -errno;
goto fail;
}
collect_usable_iova_ranges(s, iommu_info);
}
s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
if (s->device < 0) {
@ -365,8 +426,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
if (ret) {
goto fail;
}
g_free(iommu_info);
return 0;
fail:
g_free(s->usable_iova_ranges);
s->usable_iova_ranges = NULL;
s->nb_iova_ranges = 0;
g_free(iommu_info);
close(s->group);
fail_container:
close(s->container);
@ -716,6 +782,8 @@ void qemu_vfio_close(QEMUVFIOState *s)
qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
}
ram_block_notifier_remove(&s->ram_notifier);
g_free(s->usable_iova_ranges);
s->nb_iova_ranges = 0;
qemu_vfio_reset(s);
close(s->device);
close(s->group);