vfio queue:
* Introduce an IOMMU interface backend for VFIO devices * Convert IOMMU type1 and sPAPR IOMMU to respective backends * Introduce a new IOMMUFD backend for ARM, x86_64 and s390x platforms -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmWB34AACgkQUaNDx8/7 7KGOMxAAqXegvAneHqIlu4c8TzTuUR2rkYgev9RdfIHRDuY2XtaX14xlWn/rpTXZ qSgeta+iT8Cv4YV1POJeHWFDNs9E29p1w+R7nLcH1qTIIaZHtxwbVVQ3s7kAo1Vb 1S1G0/zIznzGVI50a0lj1gO2yQJnu/79nXpnICgA5REW0CscMssnvboQODlwq17V ZLNVM8CSAvKl6ppkmzRdfNXCfq6x7bf4MsvnuXsqda4TBbvyyTjAqdo/8sjKiGly gSDQqhgy6cvEXIF0UUHPJzFApf0YdXUDlL8hzH90hvRVu4W/t24dPmT7UkVIX9Ek TA7RVxv7iJlHtFDqfSTAJFr7nKO9Tm2V9N7xbD1OJUKrMoPZRT6+0R1hMKqsZ5z+ nG6khqHGzuo/aI9n70YxYIPXt+vs/EHI4WUtslGLUTL0xv8lUzk6cxyIJupFRmDS ix6GM9TXOV8RyOveL2knHVymlFnAR6dekkMB+6ljUTuzDwG0oco4vno8z9bi7Vct j36bM56U3lhY+w+Ljoy0gPwgrw/FROnGG3mp1mwp1KRHqtEDnUQu8CaLbJOBsBGE JJDP6AKAYMczdmYVkd4CvE0WaeSxtOUxW5H5NCPjtaFQt0qEcght2lA2K15g521q jeojoJ/QK5949jnNCqm1Z66/YQVL79lPyL0E+mxEohwu+yTORk4= =U0x5 -----END PGP SIGNATURE----- Merge tag 'pull-vfio-20231219' of https://github.com/legoater/qemu into staging vfio queue: * Introduce an IOMMU interface backend for VFIO devices * Convert IOMMU type1 and sPAPR IOMMU to respective backends * Introduce a new IOMMUFD backend for ARM, x86_64 and s390x platforms # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmWB34AACgkQUaNDx8/7 # 7KGOMxAAqXegvAneHqIlu4c8TzTuUR2rkYgev9RdfIHRDuY2XtaX14xlWn/rpTXZ # qSgeta+iT8Cv4YV1POJeHWFDNs9E29p1w+R7nLcH1qTIIaZHtxwbVVQ3s7kAo1Vb # 1S1G0/zIznzGVI50a0lj1gO2yQJnu/79nXpnICgA5REW0CscMssnvboQODlwq17V # ZLNVM8CSAvKl6ppkmzRdfNXCfq6x7bf4MsvnuXsqda4TBbvyyTjAqdo/8sjKiGly # gSDQqhgy6cvEXIF0UUHPJzFApf0YdXUDlL8hzH90hvRVu4W/t24dPmT7UkVIX9Ek # TA7RVxv7iJlHtFDqfSTAJFr7nKO9Tm2V9N7xbD1OJUKrMoPZRT6+0R1hMKqsZ5z+ # nG6khqHGzuo/aI9n70YxYIPXt+vs/EHI4WUtslGLUTL0xv8lUzk6cxyIJupFRmDS # ix6GM9TXOV8RyOveL2knHVymlFnAR6dekkMB+6ljUTuzDwG0oco4vno8z9bi7Vct # j36bM56U3lhY+w+Ljoy0gPwgrw/FROnGG3mp1mwp1KRHqtEDnUQu8CaLbJOBsBGE # JJDP6AKAYMczdmYVkd4CvE0WaeSxtOUxW5H5NCPjtaFQt0qEcght2lA2K15g521q # jeojoJ/QK5949jnNCqm1Z66/YQVL79lPyL0E+mxEohwu+yTORk4= # =U0x5 # -----END PGP SIGNATURE----- # gpg: Signature made Tue 19 Dec 2023 13:22:56 EST # gpg: using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1 # gpg: Good signature from "Cédric Le Goater <clg@redhat.com>" [unknown] # gpg: aka "Cédric Le Goater <clg@kaod.org>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: A0F6 6548 F048 95EB FE6B 0B60 51A3 43C7 CFFB ECA1 * tag 'pull-vfio-20231219' of https://github.com/legoater/qemu: (47 commits) hw/ppc/Kconfig: Imply VFIO_PCI docs/devel: Add VFIO iommufd backend documentation vfio: Introduce a helper function to initialize VFIODevice vfio/ccw: Move VFIODevice initializations in vfio_ccw_instance_init vfio/ap: Move VFIODevice initializations in vfio_ap_instance_init vfio/platform: Move VFIODevice initializations in vfio_platform_instance_init vfio/pci: Move VFIODevice initializations in vfio_instance_init hw/i386: Activate IOMMUFD for q35 machines kconfig: Activate IOMMUFD for s390x machines hw/arm: Activate IOMMUFD for virt machines vfio: Make VFIOContainerBase poiner parameter const in VFIOIOMMUOps callbacks vfio/ccw: Make vfio cdev pre-openable by passing a file handle vfio/ccw: Allow the selection of a given iommu backend vfio/ap: Make vfio cdev pre-openable by passing a file handle vfio/ap: Allow the selection of a given iommu backend vfio/platform: Make vfio cdev pre-openable by passing a file handle vfio/platform: Allow the selection of a given iommu backend vfio/pci: Make vfio cdev pre-openable by passing a file handle vfio/pci: Allow the selection of a given iommu backend vfio/iommufd: Enable pci hot reset through iommufd cdev interface ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
dd7d3e3540
11
MAINTAINERS
11
MAINTAINERS
@ -2167,6 +2167,17 @@ F: hw/vfio/ap.c
|
||||
F: docs/system/s390x/vfio-ap.rst
|
||||
L: qemu-s390x@nongnu.org
|
||||
|
||||
iommufd
|
||||
M: Yi Liu <yi.l.liu@intel.com>
|
||||
M: Eric Auger <eric.auger@redhat.com>
|
||||
M: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||
S: Supported
|
||||
F: backends/iommufd.c
|
||||
F: include/sysemu/iommufd.h
|
||||
F: include/qemu/chardev_open.h
|
||||
F: util/chardev_open.c
|
||||
F: docs/devel/vfio-iommufd.rst
|
||||
|
||||
vhost
|
||||
M: Michael S. Tsirkin <mst@redhat.com>
|
||||
S: Supported
|
||||
|
@ -1 +1,5 @@
|
||||
source tpm/Kconfig
|
||||
|
||||
config IOMMUFD
|
||||
bool
|
||||
depends on VFIO
|
||||
|
245
backends/iommufd.c
Normal file
245
backends/iommufd.c
Normal file
@ -0,0 +1,245 @@
|
||||
/*
|
||||
* iommufd container backend
|
||||
*
|
||||
* Copyright (C) 2023 Intel Corporation.
|
||||
* Copyright Red Hat, Inc. 2023
|
||||
*
|
||||
* Authors: Yi Liu <yi.l.liu@intel.com>
|
||||
* Eric Auger <eric.auger@redhat.com>
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "sysemu/iommufd.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/qmp/qerror.h"
|
||||
#include "qemu/module.h"
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "monitor/monitor.h"
|
||||
#include "trace.h"
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/iommufd.h>
|
||||
|
||||
static void iommufd_backend_init(Object *obj)
|
||||
{
|
||||
IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
|
||||
be->fd = -1;
|
||||
be->users = 0;
|
||||
be->owned = true;
|
||||
qemu_mutex_init(&be->lock);
|
||||
}
|
||||
|
||||
static void iommufd_backend_finalize(Object *obj)
|
||||
{
|
||||
IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
|
||||
if (be->owned) {
|
||||
close(be->fd);
|
||||
be->fd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
|
||||
{
|
||||
IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
int fd = -1;
|
||||
|
||||
fd = monitor_fd_param(monitor_cur(), str, errp);
|
||||
if (fd == -1) {
|
||||
error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||
return;
|
||||
}
|
||||
qemu_mutex_lock(&be->lock);
|
||||
be->fd = fd;
|
||||
be->owned = false;
|
||||
qemu_mutex_unlock(&be->lock);
|
||||
trace_iommu_backend_set_fd(be->fd);
|
||||
}
|
||||
|
||||
static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
|
||||
{
|
||||
IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
|
||||
|
||||
return !be->users;
|
||||
}
|
||||
|
||||
static void iommufd_backend_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
||||
|
||||
ucc->can_be_deleted = iommufd_backend_can_be_deleted;
|
||||
|
||||
object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
|
||||
}
|
||||
|
||||
int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
{
|
||||
int fd, ret = 0;
|
||||
|
||||
qemu_mutex_lock(&be->lock);
|
||||
if (be->users == UINT32_MAX) {
|
||||
error_setg(errp, "too many connections");
|
||||
ret = -E2BIG;
|
||||
goto out;
|
||||
}
|
||||
if (be->owned && !be->users) {
|
||||
fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||
if (fd < 0) {
|
||||
error_setg_errno(errp, errno, "/dev/iommu opening failed");
|
||||
ret = fd;
|
||||
goto out;
|
||||
}
|
||||
be->fd = fd;
|
||||
}
|
||||
be->users++;
|
||||
out:
|
||||
trace_iommufd_backend_connect(be->fd, be->owned,
|
||||
be->users, ret);
|
||||
qemu_mutex_unlock(&be->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||
{
|
||||
qemu_mutex_lock(&be->lock);
|
||||
if (!be->users) {
|
||||
goto out;
|
||||
}
|
||||
be->users--;
|
||||
if (!be->users && be->owned) {
|
||||
close(be->fd);
|
||||
be->fd = -1;
|
||||
}
|
||||
out:
|
||||
trace_iommufd_backend_disconnect(be->fd, be->users);
|
||||
qemu_mutex_unlock(&be->lock);
|
||||
}
|
||||
|
||||
int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
Error **errp)
|
||||
{
|
||||
int ret, fd = be->fd;
|
||||
struct iommu_ioas_alloc alloc_data = {
|
||||
.size = sizeof(alloc_data),
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, errno, "Failed to allocate ioas");
|
||||
return ret;
|
||||
}
|
||||
|
||||
*ioas_id = alloc_data.out_ioas_id;
|
||||
trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
|
||||
{
|
||||
int ret, fd = be->fd;
|
||||
struct iommu_destroy des = {
|
||||
.size = sizeof(des),
|
||||
.id = id,
|
||||
};
|
||||
|
||||
ret = ioctl(fd, IOMMU_DESTROY, &des);
|
||||
trace_iommufd_backend_free_id(fd, id, ret);
|
||||
if (ret) {
|
||||
error_report("Failed to free id: %u %m", id);
|
||||
}
|
||||
}
|
||||
|
||||
int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||
ram_addr_t size, void *vaddr, bool readonly)
|
||||
{
|
||||
int ret, fd = be->fd;
|
||||
struct iommu_ioas_map map = {
|
||||
.size = sizeof(map),
|
||||
.flags = IOMMU_IOAS_MAP_READABLE |
|
||||
IOMMU_IOAS_MAP_FIXED_IOVA,
|
||||
.ioas_id = ioas_id,
|
||||
.__reserved = 0,
|
||||
.user_va = (uintptr_t)vaddr,
|
||||
.iova = iova,
|
||||
.length = size,
|
||||
};
|
||||
|
||||
if (!readonly) {
|
||||
map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
|
||||
}
|
||||
|
||||
ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
|
||||
trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
|
||||
vaddr, readonly, ret);
|
||||
if (ret) {
|
||||
ret = -errno;
|
||||
|
||||
/* TODO: Not support mapping hardware PCI BAR region for now. */
|
||||
if (errno == EFAULT) {
|
||||
warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
|
||||
} else {
|
||||
error_report("IOMMU_IOAS_MAP failed: %m");
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
hwaddr iova, ram_addr_t size)
|
||||
{
|
||||
int ret, fd = be->fd;
|
||||
struct iommu_ioas_unmap unmap = {
|
||||
.size = sizeof(unmap),
|
||||
.ioas_id = ioas_id,
|
||||
.iova = iova,
|
||||
.length = size,
|
||||
};
|
||||
|
||||
ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
|
||||
/*
|
||||
* IOMMUFD takes mapping as some kind of object, unmapping
|
||||
* nonexistent mapping is treated as deleting a nonexistent
|
||||
* object and return ENOENT. This is different from legacy
|
||||
* backend which allows it. vIOMMU may trigger a lot of
|
||||
* redundant unmapping, to avoid flush the log, treat them
|
||||
* as succeess for IOMMUFD just like legacy backend.
|
||||
*/
|
||||
if (ret && errno == ENOENT) {
|
||||
trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
|
||||
ret = 0;
|
||||
} else {
|
||||
trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
ret = -errno;
|
||||
error_report("IOMMU_IOAS_UNMAP failed: %m");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const TypeInfo iommufd_backend_info = {
|
||||
.name = TYPE_IOMMUFD_BACKEND,
|
||||
.parent = TYPE_OBJECT,
|
||||
.instance_size = sizeof(IOMMUFDBackend),
|
||||
.instance_init = iommufd_backend_init,
|
||||
.instance_finalize = iommufd_backend_finalize,
|
||||
.class_size = sizeof(IOMMUFDBackendClass),
|
||||
.class_init = iommufd_backend_class_init,
|
||||
.interfaces = (InterfaceInfo[]) {
|
||||
{ TYPE_USER_CREATABLE },
|
||||
{ }
|
||||
}
|
||||
};
|
||||
|
||||
static void register_types(void)
|
||||
{
|
||||
type_register_static(&iommufd_backend_info);
|
||||
}
|
||||
|
||||
type_init(register_types);
|
@ -20,6 +20,7 @@ if have_vhost_user
|
||||
system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
|
||||
endif
|
||||
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
|
||||
system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c'))
|
||||
if have_vhost_user_crypto
|
||||
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
|
||||
endif
|
||||
|
@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void)
|
||||
dbus_vmstate_post_load(int version_id) "version_id: %d"
|
||||
dbus_vmstate_loading(const char *id) "id: %s"
|
||||
dbus_vmstate_saving(const char *id) "id: %s"
|
||||
|
||||
# iommufd.c
|
||||
iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)"
|
||||
iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
|
||||
iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
|
||||
iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
|
||||
iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||
iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||
iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)"
|
||||
iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
|
||||
|
@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them.
|
||||
s390-dasd-ipl
|
||||
tracing
|
||||
vfio-migration
|
||||
vfio-iommufd
|
||||
writing-monitor-commands
|
||||
virtio-backends
|
||||
|
166
docs/devel/vfio-iommufd.rst
Normal file
166
docs/devel/vfio-iommufd.rst
Normal file
@ -0,0 +1,166 @@
|
||||
===============================
|
||||
IOMMUFD BACKEND usage with VFIO
|
||||
===============================
|
||||
|
||||
(Same meaning for backend/container/BE)
|
||||
|
||||
With the introduction of iommufd, the Linux kernel provides a generic
|
||||
interface for user space drivers to propagate their DMA mappings to kernel
|
||||
for assigned devices. While the legacy kernel interface is group-centric,
|
||||
the new iommufd interface is device-centric, relying on device fd and iommufd.
|
||||
|
||||
To support both interfaces in the QEMU VFIO device, introduce a base container
|
||||
to abstract the common part of VFIO legacy and iommufd container. So that the
|
||||
generic VFIO code can use either container.
|
||||
|
||||
The base container implements generic functions such as memory_listener and
|
||||
address space management whereas the derived container implements callbacks
|
||||
specific to either legacy or iommufd. Each container has its own way to setup
|
||||
secure context and dma management interface. The below diagram shows how it
|
||||
looks like with both containers.
|
||||
|
||||
::
|
||||
|
||||
VFIO AddressSpace/Memory
|
||||
+-------+ +----------+ +-----+ +-----+
|
||||
| pci | | platform | | ap | | ccw |
|
||||
+---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+
|
||||
| | | | | AddressSpace |
|
||||
| | | | +------------+---------+
|
||||
+---V-----------V-----------V--------V----+ /
|
||||
| VFIOAddressSpace | <------------+
|
||||
| | | MemoryListener
|
||||
| VFIOContainerBase list |
|
||||
+-------+----------------------------+----+
|
||||
| |
|
||||
| |
|
||||
+-------V------+ +--------V----------+
|
||||
| iommufd | | vfio legacy |
|
||||
| container | | container |
|
||||
+-------+------+ +--------+----------+
|
||||
| |
|
||||
| /dev/iommu | /dev/vfio/vfio
|
||||
| /dev/vfio/devices/vfioX | /dev/vfio/$group_id
|
||||
Userspace | |
|
||||
============+============================+===========================
|
||||
Kernel | device fd |
|
||||
+---------------+ | group/container fd
|
||||
| (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU)
|
||||
| ATTACH_IOAS) | | device fd
|
||||
| | |
|
||||
| +-------V------------V-----------------+
|
||||
iommufd | | vfio |
|
||||
(map/unmap | +---------+--------------------+-------+
|
||||
ioas_copy) | | | map/unmap
|
||||
| | |
|
||||
+------V------+ +-----V------+ +------V--------+
|
||||
| iommfd core | | device | | vfio iommu |
|
||||
+-------------+ +------------+ +---------------+
|
||||
|
||||
* Secure Context setup
|
||||
|
||||
- iommufd BE: uses device fd and iommufd to setup secure context
|
||||
(bind_iommufd, attach_ioas)
|
||||
- vfio legacy BE: uses group fd and container fd to setup secure context
|
||||
(set_container, set_iommu)
|
||||
|
||||
* Device access
|
||||
|
||||
- iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX``
|
||||
- vfio legacy BE: device fd is retrieved from group fd ioctl
|
||||
|
||||
* DMA Mapping flow
|
||||
|
||||
1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener
|
||||
2. VFIO populates DMA map/unmap via the container BEs
|
||||
* iommufd BE: uses iommufd
|
||||
* vfio legacy BE: uses container fd
|
||||
|
||||
Example configuration
|
||||
=====================
|
||||
|
||||
Step 1: configure the host device
|
||||
---------------------------------
|
||||
|
||||
It's exactly same as the VFIO device with legacy VFIO container.
|
||||
|
||||
Step 2: configure QEMU
|
||||
----------------------
|
||||
|
||||
Interactions with the ``/dev/iommu`` are abstracted by a new iommufd
|
||||
object (compiled in with the ``CONFIG_IOMMUFD`` option).
|
||||
|
||||
Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must
|
||||
be linked with an iommufd object. It gets a new optional property
|
||||
named iommufd which allows to pass an iommufd object. Take ``vfio-pci``
|
||||
device for example:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
-object iommufd,id=iommufd0
|
||||
-device vfio-pci,host=0000:02:00.0,iommufd=iommufd0
|
||||
|
||||
Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a
|
||||
management layer. In such a case the fd is passed, the fd supports a
|
||||
string naming the fd or a number, for example:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
-object iommufd,id=iommufd0,fd=22
|
||||
-device vfio-pci,iommufd=iommufd0,fd=23
|
||||
|
||||
If the ``fd`` property is not passed, the fd is opened by QEMU.
|
||||
|
||||
If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd
|
||||
is not used and the user gets the behavior based on the legacy VFIO
|
||||
container:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
-device vfio-pci,host=0000:02:00.0
|
||||
|
||||
Supported platform
|
||||
==================
|
||||
|
||||
Supports x86, ARM and s390x currently.
|
||||
|
||||
Caveats
|
||||
=======
|
||||
|
||||
Dirty page sync
|
||||
---------------
|
||||
|
||||
Dirty page sync with iommufd backend is unsupported yet, live migration is
|
||||
disabled by default. But it can be force enabled like below, low efficient
|
||||
though.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
-object iommufd,id=iommufd0
|
||||
-device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on
|
||||
|
||||
P2P DMA
|
||||
-------
|
||||
|
||||
PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI
|
||||
BAR region yet. Below warning shows for assigned PCI device, it's not a bug.
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR?
|
||||
qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address)
|
||||
|
||||
FD passing with mdev
|
||||
--------------------
|
||||
|
||||
``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev.
|
||||
If FD passing is used, there is no way to know that and the mdev is treated
|
||||
like a real PCI device. There is an error as below if user wants to enable
|
||||
RAM discarding for mdev.
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices
|
||||
|
||||
``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend
|
||||
devices are always mdev and RAM discarding is force enabled.
|
@ -8,6 +8,7 @@ config ARM_VIRT
|
||||
imply TPM_TIS_SYSBUS
|
||||
imply TPM_TIS_I2C
|
||||
imply NVDIMM
|
||||
imply IOMMUFD
|
||||
select ARM_GIC
|
||||
select ACPI
|
||||
select ARM_SMMUV3
|
||||
|
@ -95,6 +95,7 @@ config Q35
|
||||
imply E1000E_PCI_EXPRESS
|
||||
imply VMPORT
|
||||
imply VMMOUSE
|
||||
imply IOMMUFD
|
||||
select PC_PCI
|
||||
select PC_ACPI
|
||||
select PCI_EXPRESS_Q35
|
||||
|
@ -3,11 +3,11 @@ config PSERIES
|
||||
imply PCI_DEVICES
|
||||
imply TEST_DEVICES
|
||||
imply VIRTIO_VGA
|
||||
imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c
|
||||
select NVDIMM
|
||||
select DIMM
|
||||
select PCI
|
||||
select SPAPR_VSCSI
|
||||
select VFIO if LINUX # needed by spapr_pci_vfio.c
|
||||
select XICS
|
||||
select XIVE
|
||||
select MSI_NONBROKEN
|
||||
|
@ -26,10 +26,12 @@
|
||||
#include "hw/pci/pci_device.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
|
||||
|
||||
/*
|
||||
* Interfaces for IBM EEH (Enhanced Error Handling)
|
||||
*/
|
||||
#ifdef CONFIG_VFIO_PCI
|
||||
static bool vfio_eeh_container_ok(VFIOContainer *container)
|
||||
{
|
||||
/*
|
||||
@ -84,27 +86,27 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
|
||||
static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
|
||||
{
|
||||
VFIOAddressSpace *space = vfio_get_address_space(as);
|
||||
VFIOContainer *container = NULL;
|
||||
VFIOContainerBase *bcontainer = NULL;
|
||||
|
||||
if (QLIST_EMPTY(&space->containers)) {
|
||||
/* No containers to act on */
|
||||
goto out;
|
||||
}
|
||||
|
||||
container = QLIST_FIRST(&space->containers);
|
||||
bcontainer = QLIST_FIRST(&space->containers);
|
||||
|
||||
if (QLIST_NEXT(container, next)) {
|
||||
if (QLIST_NEXT(bcontainer, next)) {
|
||||
/*
|
||||
* We don't yet have logic to synchronize EEH state across
|
||||
* multiple containers
|
||||
*/
|
||||
container = NULL;
|
||||
bcontainer = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
vfio_put_address_space(space);
|
||||
return container;
|
||||
return container_of(bcontainer, VFIOContainer, bcontainer);
|
||||
}
|
||||
|
||||
static bool vfio_eeh_as_ok(AddressSpace *as)
|
||||
@ -314,3 +316,37 @@ int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
|
||||
|
||||
return RTAS_OUT_SUCCESS;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
bool spapr_phb_eeh_available(SpaprPhbState *sphb)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void spapr_phb_vfio_reset(DeviceState *qdev)
|
||||
{
|
||||
}
|
||||
|
||||
int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
|
||||
unsigned int addr, int option)
|
||||
{
|
||||
return RTAS_OUT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
|
||||
{
|
||||
return RTAS_OUT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
|
||||
{
|
||||
return RTAS_OUT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
|
||||
{
|
||||
return RTAS_OUT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_VFIO_PCI */
|
||||
|
@ -6,6 +6,7 @@ config S390_CCW_VIRTIO
|
||||
imply VFIO_CCW
|
||||
imply WDT_DIAG288
|
||||
imply PCIE_DEVICES
|
||||
imply IOMMUFD
|
||||
select PCI_EXPRESS
|
||||
select S390_FLIC
|
||||
select S390_FLIC_KVM if KVM
|
||||
|
47
hw/vfio/ap.c
47
hw/vfio/ap.c
@ -11,10 +11,12 @@
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||
#include <linux/vfio.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include "qapi/error.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#include "sysemu/iommufd.h"
|
||||
#include "hw/s390x/ap-device.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/event_notifier.h"
|
||||
@ -158,18 +160,9 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
|
||||
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev);
|
||||
VFIODevice *vbasedev = &vapdev->vdev;
|
||||
|
||||
vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||||
vbasedev->ops = &vfio_ap_ops;
|
||||
vbasedev->type = VFIO_DEVICE_TYPE_AP;
|
||||
vbasedev->dev = dev;
|
||||
|
||||
/*
|
||||
* vfio-ap devices operate in a way compatible with discarding of
|
||||
* memory in RAM blocks, as no pages are pinned in the host.
|
||||
* This needs to be set before vfio_get_device() for vfio common to
|
||||
* handle ram_block_discard_disable().
|
||||
*/
|
||||
vapdev->vdev.ram_block_discard_allowed = true;
|
||||
if (vfio_device_get_name(vbasedev, errp) < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
ret = vfio_attach_device(vbasedev->name, vbasedev,
|
||||
&address_space_memory, errp);
|
||||
@ -204,6 +197,10 @@ static void vfio_ap_unrealize(DeviceState *dev)
|
||||
|
||||
static Property vfio_ap_properties[] = {
|
||||
DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev),
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
DEFINE_PROP_LINK("iommufd", VFIOAPDevice, vdev.iommufd,
|
||||
TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
|
||||
#endif
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
@ -224,11 +221,36 @@ static const VMStateDescription vfio_ap_vmstate = {
|
||||
.unmigratable = 1,
|
||||
};
|
||||
|
||||
static void vfio_ap_instance_init(Object *obj)
|
||||
{
|
||||
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj);
|
||||
VFIODevice *vbasedev = &vapdev->vdev;
|
||||
|
||||
/*
|
||||
* vfio-ap devices operate in a way compatible with discarding of
|
||||
* memory in RAM blocks, as no pages are pinned in the host.
|
||||
* This needs to be set before vfio_get_device() for vfio common to
|
||||
* handle ram_block_discard_disable().
|
||||
*/
|
||||
vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops,
|
||||
DEVICE(vapdev), true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
static void vfio_ap_set_fd(Object *obj, const char *str, Error **errp)
|
||||
{
|
||||
vfio_device_set_fd(&VFIO_AP_DEVICE(obj)->vdev, str, errp);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void vfio_ap_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
DeviceClass *dc = DEVICE_CLASS(klass);
|
||||
|
||||
device_class_set_props(dc, vfio_ap_properties);
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
object_class_property_add_str(klass, "fd", NULL, vfio_ap_set_fd);
|
||||
#endif
|
||||
dc->vmsd = &vfio_ap_vmstate;
|
||||
dc->desc = "VFIO-based AP device assignment";
|
||||
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
||||
@ -243,6 +265,7 @@ static const TypeInfo vfio_ap_info = {
|
||||
.name = TYPE_VFIO_AP_DEVICE,
|
||||
.parent = TYPE_AP_DEVICE,
|
||||
.instance_size = sizeof(VFIOAPDevice),
|
||||
.instance_init = vfio_ap_instance_init,
|
||||
.class_init = vfio_ap_class_init,
|
||||
};
|
||||
|
||||
|
@ -15,12 +15,14 @@
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/vfio_ccw.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "qapi/error.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#include "sysemu/iommufd.h"
|
||||
#include "hw/s390x/s390-ccw.h"
|
||||
#include "hw/s390x/vfio-ccw.h"
|
||||
#include "hw/qdev-properties.h"
|
||||
@ -588,22 +590,9 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
vbasedev->ops = &vfio_ccw_ops;
|
||||
vbasedev->type = VFIO_DEVICE_TYPE_CCW;
|
||||
vbasedev->name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid,
|
||||
vcdev->cdev.hostid.ssid,
|
||||
vcdev->cdev.hostid.devid);
|
||||
vbasedev->dev = dev;
|
||||
|
||||
/*
|
||||
* All vfio-ccw devices are believed to operate in a way compatible with
|
||||
* discarding of memory in RAM blocks, ie. pages pinned in the host are
|
||||
* in the current working set of the guest driver and therefore never
|
||||
* overlap e.g., with pages available to the guest balloon driver. This
|
||||
* needs to be set before vfio_get_device() for vfio common to handle
|
||||
* ram_block_discard_disable().
|
||||
*/
|
||||
vbasedev->ram_block_discard_allowed = true;
|
||||
if (vfio_device_get_name(vbasedev, errp) < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
ret = vfio_attach_device(cdev->mdevid, vbasedev,
|
||||
&address_space_memory, errp);
|
||||
@ -677,6 +666,10 @@ static void vfio_ccw_unrealize(DeviceState *dev)
|
||||
static Property vfio_ccw_properties[] = {
|
||||
DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev),
|
||||
DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false),
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
DEFINE_PROP_LINK("iommufd", VFIOCCWDevice, vdev.iommufd,
|
||||
TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
|
||||
#endif
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
@ -685,12 +678,39 @@ static const VMStateDescription vfio_ccw_vmstate = {
|
||||
.unmigratable = 1,
|
||||
};
|
||||
|
||||
static void vfio_ccw_instance_init(Object *obj)
|
||||
{
|
||||
VFIOCCWDevice *vcdev = VFIO_CCW(obj);
|
||||
VFIODevice *vbasedev = &vcdev->vdev;
|
||||
|
||||
/*
|
||||
* All vfio-ccw devices are believed to operate in a way compatible with
|
||||
* discarding of memory in RAM blocks, ie. pages pinned in the host are
|
||||
* in the current working set of the guest driver and therefore never
|
||||
* overlap e.g., with pages available to the guest balloon driver. This
|
||||
* needs to be set before vfio_get_device() for vfio common to handle
|
||||
* ram_block_discard_disable().
|
||||
*/
|
||||
vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_CCW, &vfio_ccw_ops,
|
||||
DEVICE(vcdev), true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
static void vfio_ccw_set_fd(Object *obj, const char *str, Error **errp)
|
||||
{
|
||||
vfio_device_set_fd(&VFIO_CCW(obj)->vdev, str, errp);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void vfio_ccw_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
DeviceClass *dc = DEVICE_CLASS(klass);
|
||||
S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass);
|
||||
|
||||
device_class_set_props(dc, vfio_ccw_properties);
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
object_class_property_add_str(klass, "fd", NULL, vfio_ccw_set_fd);
|
||||
#endif
|
||||
dc->vmsd = &vfio_ccw_vmstate;
|
||||
dc->desc = "VFIO-based subchannel assignment";
|
||||
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
||||
@ -708,6 +728,7 @@ static const TypeInfo vfio_ccw_info = {
|
||||
.name = TYPE_VFIO_CCW,
|
||||
.parent = TYPE_S390_CCW,
|
||||
.instance_size = sizeof(VFIOCCWDevice),
|
||||
.instance_init = vfio_ccw_instance_init,
|
||||
.class_init = vfio_ccw_class_init,
|
||||
};
|
||||
|
||||
|
261
hw/vfio/common.c
261
hw/vfio/common.c
@ -19,6 +19,7 @@
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||
#include <sys/ioctl.h>
|
||||
#ifdef CONFIG_KVM
|
||||
#include <linux/kvm.h>
|
||||
@ -145,7 +146,7 @@ void vfio_unblock_multiple_devices_migration(void)
|
||||
|
||||
bool vfio_viommu_preset(VFIODevice *vbasedev)
|
||||
{
|
||||
return vbasedev->container->space->as != &address_space_memory;
|
||||
return vbasedev->bcontainer->space->as != &address_space_memory;
|
||||
}
|
||||
|
||||
static void vfio_set_migration_error(int err)
|
||||
@ -177,7 +178,7 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev)
|
||||
migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P;
|
||||
}
|
||||
|
||||
static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
|
||||
static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer)
|
||||
{
|
||||
VFIODevice *vbasedev;
|
||||
MigrationState *ms = migrate_get_current();
|
||||
@ -187,7 +188,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
|
||||
return false;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
|
||||
if (!migration) {
|
||||
@ -203,11 +204,11 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
|
||||
bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer)
|
||||
{
|
||||
VFIODevice *vbasedev;
|
||||
|
||||
QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||
if (!vbasedev->dirty_pages_supported) {
|
||||
return false;
|
||||
}
|
||||
@ -220,7 +221,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
|
||||
* Check if all VFIO devices are running and migration is active, which is
|
||||
* essentially equivalent to the migration being in pre-copy phase.
|
||||
*/
|
||||
bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
|
||||
bool
|
||||
vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer)
|
||||
{
|
||||
VFIODevice *vbasedev;
|
||||
|
||||
@ -228,7 +230,7 @@ bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
|
||||
return false;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
|
||||
if (!migration) {
|
||||
@ -292,7 +294,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
{
|
||||
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
|
||||
VFIOContainer *container = giommu->container;
|
||||
VFIOContainerBase *bcontainer = giommu->bcontainer;
|
||||
hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||
void *vaddr;
|
||||
int ret;
|
||||
@ -322,21 +324,22 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
* of vaddr will always be there, even if the memory object is
|
||||
* destroyed and its backing memory munmap-ed.
|
||||
*/
|
||||
ret = vfio_dma_map(container, iova,
|
||||
iotlb->addr_mask + 1, vaddr,
|
||||
read_only);
|
||||
ret = vfio_container_dma_map(bcontainer, iova,
|
||||
iotlb->addr_mask + 1, vaddr,
|
||||
read_only);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
|
||||
error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx", %p) = %d (%s)",
|
||||
container, iova,
|
||||
bcontainer, iova,
|
||||
iotlb->addr_mask + 1, vaddr, ret, strerror(-ret));
|
||||
}
|
||||
} else {
|
||||
ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb);
|
||||
ret = vfio_container_dma_unmap(bcontainer, iova,
|
||||
iotlb->addr_mask + 1, iotlb);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||
container, iova,
|
||||
bcontainer, iova,
|
||||
iotlb->addr_mask + 1, ret, strerror(-ret));
|
||||
vfio_set_migration_error(ret);
|
||||
}
|
||||
@ -350,14 +353,15 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
|
||||
{
|
||||
VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
|
||||
listener);
|
||||
VFIOContainerBase *bcontainer = vrdl->bcontainer;
|
||||
const hwaddr size = int128_get64(section->size);
|
||||
const hwaddr iova = section->offset_within_address_space;
|
||||
int ret;
|
||||
|
||||
/* Unmap with a single call. */
|
||||
ret = vfio_dma_unmap(vrdl->container, iova, size , NULL);
|
||||
ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL);
|
||||
if (ret) {
|
||||
error_report("%s: vfio_dma_unmap() failed: %s", __func__,
|
||||
error_report("%s: vfio_container_dma_unmap() failed: %s", __func__,
|
||||
strerror(-ret));
|
||||
}
|
||||
}
|
||||
@ -367,6 +371,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||
{
|
||||
VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
|
||||
listener);
|
||||
VFIOContainerBase *bcontainer = vrdl->bcontainer;
|
||||
const hwaddr end = section->offset_within_region +
|
||||
int128_get64(section->size);
|
||||
hwaddr start, next, iova;
|
||||
@ -385,8 +390,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||
section->offset_within_address_space;
|
||||
vaddr = memory_region_get_ram_ptr(section->mr) + start;
|
||||
|
||||
ret = vfio_dma_map(vrdl->container, iova, next - start,
|
||||
vaddr, section->readonly);
|
||||
ret = vfio_container_dma_map(bcontainer, iova, next - start,
|
||||
vaddr, section->readonly);
|
||||
if (ret) {
|
||||
/* Rollback */
|
||||
vfio_ram_discard_notify_discard(rdl, section);
|
||||
@ -396,7 +401,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||
static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
|
||||
@ -409,7 +414,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||
g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
|
||||
|
||||
vrdl = g_new0(VFIORamDiscardListener, 1);
|
||||
vrdl->container = container;
|
||||
vrdl->bcontainer = bcontainer;
|
||||
vrdl->mr = section->mr;
|
||||
vrdl->offset_within_address_space = section->offset_within_address_space;
|
||||
vrdl->size = int128_get64(section->size);
|
||||
@ -417,14 +422,14 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||
section->mr);
|
||||
|
||||
g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity));
|
||||
g_assert(container->pgsizes &&
|
||||
vrdl->granularity >= 1ULL << ctz64(container->pgsizes));
|
||||
g_assert(bcontainer->pgsizes &&
|
||||
vrdl->granularity >= 1ULL << ctz64(bcontainer->pgsizes));
|
||||
|
||||
ram_discard_listener_init(&vrdl->listener,
|
||||
vfio_ram_discard_notify_populate,
|
||||
vfio_ram_discard_notify_discard, true);
|
||||
ram_discard_manager_register_listener(rdm, &vrdl->listener, section);
|
||||
QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next);
|
||||
QLIST_INSERT_HEAD(&bcontainer->vrdl_list, vrdl, next);
|
||||
|
||||
/*
|
||||
* Sanity-check if we have a theoretically problematic setup where we could
|
||||
@ -439,7 +444,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||
* number of sections in the address space we could have over time,
|
||||
* also consuming DMA mappings.
|
||||
*/
|
||||
if (container->dma_max_mappings) {
|
||||
if (bcontainer->dma_max_mappings) {
|
||||
unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512;
|
||||
|
||||
#ifdef CONFIG_KVM
|
||||
@ -448,7 +453,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||
}
|
||||
#endif
|
||||
|
||||
QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
|
||||
QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
|
||||
hwaddr start, end;
|
||||
|
||||
start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space,
|
||||
@ -460,23 +465,23 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||
}
|
||||
|
||||
if (vrdl_mappings + max_memslots - vrdl_count >
|
||||
container->dma_max_mappings) {
|
||||
bcontainer->dma_max_mappings) {
|
||||
warn_report("%s: possibly running out of DMA mappings. E.g., try"
|
||||
" increasing the 'block-size' of virtio-mem devies."
|
||||
" Maximum possible DMA mappings: %d, Maximum possible"
|
||||
" memslots: %d", __func__, container->dma_max_mappings,
|
||||
" memslots: %d", __func__, bcontainer->dma_max_mappings,
|
||||
max_memslots);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
|
||||
static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
|
||||
VFIORamDiscardListener *vrdl = NULL;
|
||||
|
||||
QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
|
||||
QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
|
||||
if (vrdl->mr == section->mr &&
|
||||
vrdl->offset_within_address_space ==
|
||||
section->offset_within_address_space) {
|
||||
@ -538,7 +543,7 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool vfio_get_section_iova_range(VFIOContainer *container,
|
||||
static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section,
|
||||
hwaddr *out_iova, hwaddr *out_end,
|
||||
Int128 *out_llend)
|
||||
@ -566,7 +571,8 @@ static bool vfio_get_section_iova_range(VFIOContainer *container,
|
||||
static void vfio_listener_region_add(MemoryListener *listener,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||
listener);
|
||||
hwaddr iova, end;
|
||||
Int128 llend, llsize;
|
||||
void *vaddr;
|
||||
@ -577,7 +583,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||
return;
|
||||
}
|
||||
|
||||
if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) {
|
||||
if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end,
|
||||
&llend)) {
|
||||
if (memory_region_is_ram_device(section->mr)) {
|
||||
trace_vfio_listener_region_add_no_dma_map(
|
||||
memory_region_name(section->mr),
|
||||
@ -588,7 +595,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||
return;
|
||||
}
|
||||
|
||||
if (vfio_container_add_section_window(container, section, &err)) {
|
||||
if (vfio_container_add_section_window(bcontainer, section, &err)) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@ -610,7 +617,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||
giommu->iommu_mr = iommu_mr;
|
||||
giommu->iommu_offset = section->offset_within_address_space -
|
||||
section->offset_within_region;
|
||||
giommu->container = container;
|
||||
giommu->bcontainer = bcontainer;
|
||||
llend = int128_add(int128_make64(section->offset_within_region),
|
||||
section->size);
|
||||
llend = int128_sub(llend, int128_one());
|
||||
@ -623,16 +630,17 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||
iommu_idx);
|
||||
|
||||
ret = memory_region_iommu_set_page_size_mask(giommu->iommu_mr,
|
||||
container->pgsizes,
|
||||
bcontainer->pgsizes,
|
||||
&err);
|
||||
if (ret) {
|
||||
g_free(giommu);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (container->iova_ranges) {
|
||||
if (bcontainer->iova_ranges) {
|
||||
ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
|
||||
container->iova_ranges, &err);
|
||||
bcontainer->iova_ranges,
|
||||
&err);
|
||||
if (ret) {
|
||||
g_free(giommu);
|
||||
goto fail;
|
||||
@ -645,7 +653,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||
g_free(giommu);
|
||||
goto fail;
|
||||
}
|
||||
QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
|
||||
QLIST_INSERT_HEAD(&bcontainer->giommu_list, giommu, giommu_next);
|
||||
memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
|
||||
|
||||
return;
|
||||
@ -659,7 +667,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||
* about changes.
|
||||
*/
|
||||
if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||
vfio_register_ram_discard_listener(container, section);
|
||||
vfio_register_ram_discard_listener(bcontainer, section);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -672,7 +680,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||
llsize = int128_sub(llend, int128_make64(iova));
|
||||
|
||||
if (memory_region_is_ram_device(section->mr)) {
|
||||
hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1;
|
||||
hwaddr pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1;
|
||||
|
||||
if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
|
||||
trace_vfio_listener_region_add_no_dma_map(
|
||||
@ -684,12 +692,12 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||
}
|
||||
}
|
||||
|
||||
ret = vfio_dma_map(container, iova, int128_get64(llsize),
|
||||
vaddr, section->readonly);
|
||||
ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
|
||||
vaddr, section->readonly);
|
||||
if (ret) {
|
||||
error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
|
||||
error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx", %p) = %d (%s)",
|
||||
container, iova, int128_get64(llsize), vaddr, ret,
|
||||
bcontainer, iova, int128_get64(llsize), vaddr, ret,
|
||||
strerror(-ret));
|
||||
if (memory_region_is_ram_device(section->mr)) {
|
||||
/* Allow unexpected mappings not to be fatal for RAM devices */
|
||||
@ -711,9 +719,9 @@ fail:
|
||||
* can gracefully fail. Runtime, there's not much we can do other
|
||||
* than throw a hardware error.
|
||||
*/
|
||||
if (!container->initialized) {
|
||||
if (!container->error) {
|
||||
error_propagate_prepend(&container->error, err,
|
||||
if (!bcontainer->initialized) {
|
||||
if (!bcontainer->error) {
|
||||
error_propagate_prepend(&bcontainer->error, err,
|
||||
"Region %s: ",
|
||||
memory_region_name(section->mr));
|
||||
} else {
|
||||
@ -728,7 +736,8 @@ fail:
|
||||
static void vfio_listener_region_del(MemoryListener *listener,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||
listener);
|
||||
hwaddr iova, end;
|
||||
Int128 llend, llsize;
|
||||
int ret;
|
||||
@ -741,7 +750,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||
if (memory_region_is_iommu(section->mr)) {
|
||||
VFIOGuestIOMMU *giommu;
|
||||
|
||||
QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
|
||||
QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
|
||||
if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
|
||||
giommu->n.start == section->offset_within_region) {
|
||||
memory_region_unregister_iommu_notifier(section->mr,
|
||||
@ -761,7 +770,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||
*/
|
||||
}
|
||||
|
||||
if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) {
|
||||
if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end,
|
||||
&llend)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -772,10 +782,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||
if (memory_region_is_ram_device(section->mr)) {
|
||||
hwaddr pgmask;
|
||||
|
||||
pgmask = (1ULL << ctz64(container->pgsizes)) - 1;
|
||||
pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1;
|
||||
try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
|
||||
} else if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||
vfio_unregister_ram_discard_listener(container, section);
|
||||
vfio_unregister_ram_discard_listener(bcontainer, section);
|
||||
/* Unregistering will trigger an unmap. */
|
||||
try_unmap = false;
|
||||
}
|
||||
@ -784,27 +794,29 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||
if (int128_eq(llsize, int128_2_64())) {
|
||||
/* The unmap ioctl doesn't accept a full 64-bit span. */
|
||||
llsize = int128_rshift(llsize, 1);
|
||||
ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
|
||||
ret = vfio_container_dma_unmap(bcontainer, iova,
|
||||
int128_get64(llsize), NULL);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||
container, iova, int128_get64(llsize), ret,
|
||||
bcontainer, iova, int128_get64(llsize), ret,
|
||||
strerror(-ret));
|
||||
}
|
||||
iova += int128_get64(llsize);
|
||||
}
|
||||
ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
|
||||
ret = vfio_container_dma_unmap(bcontainer, iova,
|
||||
int128_get64(llsize), NULL);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||
container, iova, int128_get64(llsize), ret,
|
||||
bcontainer, iova, int128_get64(llsize), ret,
|
||||
strerror(-ret));
|
||||
}
|
||||
}
|
||||
|
||||
memory_region_unref(section->mr);
|
||||
|
||||
vfio_container_del_section_window(container, section);
|
||||
vfio_container_del_section_window(bcontainer, section);
|
||||
}
|
||||
|
||||
typedef struct VFIODirtyRanges {
|
||||
@ -817,13 +829,13 @@ typedef struct VFIODirtyRanges {
|
||||
} VFIODirtyRanges;
|
||||
|
||||
typedef struct VFIODirtyRangesListener {
|
||||
VFIOContainer *container;
|
||||
VFIOContainerBase *bcontainer;
|
||||
VFIODirtyRanges ranges;
|
||||
MemoryListener listener;
|
||||
} VFIODirtyRangesListener;
|
||||
|
||||
static bool vfio_section_is_vfio_pci(MemoryRegionSection *section,
|
||||
VFIOContainer *container)
|
||||
VFIOContainerBase *bcontainer)
|
||||
{
|
||||
VFIOPCIDevice *pcidev;
|
||||
VFIODevice *vbasedev;
|
||||
@ -831,7 +843,7 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section,
|
||||
|
||||
owner = memory_region_owner(section->mr);
|
||||
|
||||
QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||
if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
|
||||
continue;
|
||||
}
|
||||
@ -854,7 +866,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener,
|
||||
hwaddr iova, end, *min, *max;
|
||||
|
||||
if (!vfio_listener_valid_section(section, "tracking_update") ||
|
||||
!vfio_get_section_iova_range(dirty->container, section,
|
||||
!vfio_get_section_iova_range(dirty->bcontainer, section,
|
||||
&iova, &end, NULL)) {
|
||||
return;
|
||||
}
|
||||
@ -878,7 +890,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener,
|
||||
* The alternative would be an IOVATree but that has a much bigger runtime
|
||||
* overhead and unnecessary complexity.
|
||||
*/
|
||||
if (vfio_section_is_vfio_pci(section, dirty->container) &&
|
||||
if (vfio_section_is_vfio_pci(section, dirty->bcontainer) &&
|
||||
iova >= UINT32_MAX) {
|
||||
min = &range->minpci64;
|
||||
max = &range->maxpci64;
|
||||
@ -902,7 +914,7 @@ static const MemoryListener vfio_dirty_tracking_listener = {
|
||||
.region_add = vfio_dirty_tracking_update,
|
||||
};
|
||||
|
||||
static void vfio_dirty_tracking_init(VFIOContainer *container,
|
||||
static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer,
|
||||
VFIODirtyRanges *ranges)
|
||||
{
|
||||
VFIODirtyRangesListener dirty;
|
||||
@ -912,10 +924,10 @@ static void vfio_dirty_tracking_init(VFIOContainer *container,
|
||||
dirty.ranges.min64 = UINT64_MAX;
|
||||
dirty.ranges.minpci64 = UINT64_MAX;
|
||||
dirty.listener = vfio_dirty_tracking_listener;
|
||||
dirty.container = container;
|
||||
dirty.bcontainer = bcontainer;
|
||||
|
||||
memory_listener_register(&dirty.listener,
|
||||
container->space->as);
|
||||
bcontainer->space->as);
|
||||
|
||||
*ranges = dirty.ranges;
|
||||
|
||||
@ -927,7 +939,7 @@ static void vfio_dirty_tracking_init(VFIOContainer *container,
|
||||
memory_listener_unregister(&dirty.listener);
|
||||
}
|
||||
|
||||
static void vfio_devices_dma_logging_stop(VFIOContainer *container)
|
||||
static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
|
||||
{
|
||||
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
|
||||
sizeof(uint64_t))] = {};
|
||||
@ -938,7 +950,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container)
|
||||
feature->flags = VFIO_DEVICE_FEATURE_SET |
|
||||
VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP;
|
||||
|
||||
QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||
if (!vbasedev->dirty_tracking) {
|
||||
continue;
|
||||
}
|
||||
@ -952,7 +964,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container)
|
||||
}
|
||||
|
||||
static struct vfio_device_feature *
|
||||
vfio_device_feature_dma_logging_start_create(VFIOContainer *container,
|
||||
vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer,
|
||||
VFIODirtyRanges *tracking)
|
||||
{
|
||||
struct vfio_device_feature *feature;
|
||||
@ -1025,21 +1037,21 @@ static void vfio_device_feature_dma_logging_start_destroy(
|
||||
g_free(feature);
|
||||
}
|
||||
|
||||
static int vfio_devices_dma_logging_start(VFIOContainer *container)
|
||||
static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer)
|
||||
{
|
||||
struct vfio_device_feature *feature;
|
||||
VFIODirtyRanges ranges;
|
||||
VFIODevice *vbasedev;
|
||||
int ret = 0;
|
||||
|
||||
vfio_dirty_tracking_init(container, &ranges);
|
||||
feature = vfio_device_feature_dma_logging_start_create(container,
|
||||
vfio_dirty_tracking_init(bcontainer, &ranges);
|
||||
feature = vfio_device_feature_dma_logging_start_create(bcontainer,
|
||||
&ranges);
|
||||
if (!feature) {
|
||||
return -errno;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||
if (vbasedev->dirty_tracking) {
|
||||
continue;
|
||||
}
|
||||
@ -1056,7 +1068,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container)
|
||||
|
||||
out:
|
||||
if (ret) {
|
||||
vfio_devices_dma_logging_stop(container);
|
||||
vfio_devices_dma_logging_stop(bcontainer);
|
||||
}
|
||||
|
||||
vfio_device_feature_dma_logging_start_destroy(feature);
|
||||
@ -1066,13 +1078,14 @@ out:
|
||||
|
||||
static void vfio_listener_log_global_start(MemoryListener *listener)
|
||||
{
|
||||
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||
listener);
|
||||
int ret;
|
||||
|
||||
if (vfio_devices_all_device_dirty_tracking(container)) {
|
||||
ret = vfio_devices_dma_logging_start(container);
|
||||
if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
|
||||
ret = vfio_devices_dma_logging_start(bcontainer);
|
||||
} else {
|
||||
ret = vfio_set_dirty_page_tracking(container, true);
|
||||
ret = vfio_container_set_dirty_page_tracking(bcontainer, true);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
@ -1084,13 +1097,14 @@ static void vfio_listener_log_global_start(MemoryListener *listener)
|
||||
|
||||
static void vfio_listener_log_global_stop(MemoryListener *listener)
|
||||
{
|
||||
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||
listener);
|
||||
int ret = 0;
|
||||
|
||||
if (vfio_devices_all_device_dirty_tracking(container)) {
|
||||
vfio_devices_dma_logging_stop(container);
|
||||
if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
|
||||
vfio_devices_dma_logging_stop(bcontainer);
|
||||
} else {
|
||||
ret = vfio_set_dirty_page_tracking(container, false);
|
||||
ret = vfio_container_set_dirty_page_tracking(bcontainer, false);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
@ -1126,14 +1140,14 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
|
||||
int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||
VFIOBitmap *vbmap, hwaddr iova,
|
||||
hwaddr size)
|
||||
{
|
||||
VFIODevice *vbasedev;
|
||||
int ret;
|
||||
|
||||
QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||
ret = vfio_device_dma_logging_report(vbasedev, iova, size,
|
||||
vbmap->bitmap);
|
||||
if (ret) {
|
||||
@ -1149,16 +1163,16 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||
int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
|
||||
uint64_t size, ram_addr_t ram_addr)
|
||||
{
|
||||
bool all_device_dirty_tracking =
|
||||
vfio_devices_all_device_dirty_tracking(container);
|
||||
vfio_devices_all_device_dirty_tracking(bcontainer);
|
||||
uint64_t dirty_pages;
|
||||
VFIOBitmap vbmap;
|
||||
int ret;
|
||||
|
||||
if (!container->dirty_pages_supported && !all_device_dirty_tracking) {
|
||||
if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
|
||||
cpu_physical_memory_set_dirty_range(ram_addr, size,
|
||||
tcg_enabled() ? DIRTY_CLIENTS_ALL :
|
||||
DIRTY_CLIENTS_NOCODE);
|
||||
@ -1171,9 +1185,9 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||
}
|
||||
|
||||
if (all_device_dirty_tracking) {
|
||||
ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size);
|
||||
ret = vfio_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size);
|
||||
} else {
|
||||
ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
|
||||
ret = vfio_container_query_dirty_bitmap(bcontainer, &vbmap, iova, size);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
@ -1183,8 +1197,7 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||
dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
|
||||
vbmap.pages);
|
||||
|
||||
trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size,
|
||||
ram_addr, dirty_pages);
|
||||
trace_vfio_get_dirty_bitmap(iova, size, vbmap.size, ram_addr, dirty_pages);
|
||||
out:
|
||||
g_free(vbmap.bitmap);
|
||||
|
||||
@ -1201,7 +1214,7 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
vfio_giommu_dirty_notifier *gdn = container_of(n,
|
||||
vfio_giommu_dirty_notifier, n);
|
||||
VFIOGuestIOMMU *giommu = gdn->giommu;
|
||||
VFIOContainer *container = giommu->container;
|
||||
VFIOContainerBase *bcontainer = giommu->bcontainer;
|
||||
hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||
ram_addr_t translated_addr;
|
||||
int ret = -EINVAL;
|
||||
@ -1216,12 +1229,12 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
|
||||
rcu_read_lock();
|
||||
if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
|
||||
ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
|
||||
ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
|
||||
translated_addr);
|
||||
if (ret) {
|
||||
error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||
container, iova, iotlb->addr_mask + 1, ret,
|
||||
bcontainer, iova, iotlb->addr_mask + 1, ret,
|
||||
strerror(-ret));
|
||||
}
|
||||
}
|
||||
@ -1246,16 +1259,17 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section,
|
||||
* Sync the whole mapped region (spanning multiple individual mappings)
|
||||
* in one go.
|
||||
*/
|
||||
return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr);
|
||||
return vfio_get_dirty_bitmap(vrdl->bcontainer, iova, size, ram_addr);
|
||||
}
|
||||
|
||||
static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
|
||||
MemoryRegionSection *section)
|
||||
static int
|
||||
vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
|
||||
VFIORamDiscardListener *vrdl = NULL;
|
||||
|
||||
QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
|
||||
QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
|
||||
if (vrdl->mr == section->mr &&
|
||||
vrdl->offset_within_address_space ==
|
||||
section->offset_within_address_space) {
|
||||
@ -1276,7 +1290,7 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
|
||||
&vrdl);
|
||||
}
|
||||
|
||||
static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||
static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
ram_addr_t ram_addr;
|
||||
@ -1284,7 +1298,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||
if (memory_region_is_iommu(section->mr)) {
|
||||
VFIOGuestIOMMU *giommu;
|
||||
|
||||
QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
|
||||
QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
|
||||
if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
|
||||
giommu->n.start == section->offset_within_region) {
|
||||
Int128 llend;
|
||||
@ -1308,13 +1322,13 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||
}
|
||||
return 0;
|
||||
} else if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||
return vfio_sync_ram_discard_listener_dirty_bitmap(container, section);
|
||||
return vfio_sync_ram_discard_listener_dirty_bitmap(bcontainer, section);
|
||||
}
|
||||
|
||||
ram_addr = memory_region_get_ram_addr(section->mr) +
|
||||
section->offset_within_region;
|
||||
|
||||
return vfio_get_dirty_bitmap(container,
|
||||
return vfio_get_dirty_bitmap(bcontainer,
|
||||
REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
|
||||
int128_get64(section->size), ram_addr);
|
||||
}
|
||||
@ -1322,15 +1336,16 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||
static void vfio_listener_log_sync(MemoryListener *listener,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||
listener);
|
||||
int ret;
|
||||
|
||||
if (vfio_listener_skipped_section(section)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (vfio_devices_all_dirty_tracking(container)) {
|
||||
ret = vfio_sync_dirty_bitmap(container, section);
|
||||
if (vfio_devices_all_dirty_tracking(bcontainer)) {
|
||||
ret = vfio_sync_dirty_bitmap(bcontainer, section);
|
||||
if (ret) {
|
||||
error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret,
|
||||
strerror(-ret));
|
||||
@ -1449,10 +1464,13 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
|
||||
|
||||
void vfio_put_address_space(VFIOAddressSpace *space)
|
||||
{
|
||||
if (QLIST_EMPTY(&space->containers)) {
|
||||
QLIST_REMOVE(space, list);
|
||||
g_free(space);
|
||||
if (!QLIST_EMPTY(&space->containers)) {
|
||||
return;
|
||||
}
|
||||
|
||||
QLIST_REMOVE(space, list);
|
||||
g_free(space);
|
||||
|
||||
if (QLIST_EMPTY(&vfio_address_spaces)) {
|
||||
qemu_unregister_reset(vfio_reset_handler, NULL);
|
||||
}
|
||||
@ -1481,3 +1499,24 @@ retry:
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||
AddressSpace *as, Error **errp)
|
||||
{
|
||||
const VFIOIOMMUOps *ops = &vfio_legacy_ops;
|
||||
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
if (vbasedev->iommufd) {
|
||||
ops = &vfio_iommufd_ops;
|
||||
}
|
||||
#endif
|
||||
return ops->attach_device(name, vbasedev, as, errp);
|
||||
}
|
||||
|
||||
void vfio_detach_device(VFIODevice *vbasedev)
|
||||
{
|
||||
if (!vbasedev->bcontainer) {
|
||||
return;
|
||||
}
|
||||
vbasedev->bcontainer->ops->detach_device(vbasedev);
|
||||
}
|
||||
|
101
hw/vfio/container-base.c
Normal file
101
hw/vfio/container-base.c
Normal file
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* VFIO BASE CONTAINER
|
||||
*
|
||||
* Copyright (C) 2023 Intel Corporation.
|
||||
* Copyright Red Hat, Inc. 2023
|
||||
*
|
||||
* Authors: Yi Liu <yi.l.liu@intel.com>
|
||||
* Eric Auger <eric.auger@redhat.com>
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "hw/vfio/vfio-container-base.h"
|
||||
|
||||
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
void *vaddr, bool readonly)
|
||||
{
|
||||
g_assert(bcontainer->ops->dma_map);
|
||||
return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly);
|
||||
}
|
||||
|
||||
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
IOMMUTLBEntry *iotlb)
|
||||
{
|
||||
g_assert(bcontainer->ops->dma_unmap);
|
||||
return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb);
|
||||
}
|
||||
|
||||
int vfio_container_add_section_window(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section,
|
||||
Error **errp)
|
||||
{
|
||||
if (!bcontainer->ops->add_window) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return bcontainer->ops->add_window(bcontainer, section, errp);
|
||||
}
|
||||
|
||||
void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
if (!bcontainer->ops->del_window) {
|
||||
return;
|
||||
}
|
||||
|
||||
return bcontainer->ops->del_window(bcontainer, section);
|
||||
}
|
||||
|
||||
int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||
bool start)
|
||||
{
|
||||
if (!bcontainer->dirty_pages_supported) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
g_assert(bcontainer->ops->set_dirty_page_tracking);
|
||||
return bcontainer->ops->set_dirty_page_tracking(bcontainer, start);
|
||||
}
|
||||
|
||||
int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||
VFIOBitmap *vbmap,
|
||||
hwaddr iova, hwaddr size)
|
||||
{
|
||||
g_assert(bcontainer->ops->query_dirty_bitmap);
|
||||
return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size);
|
||||
}
|
||||
|
||||
void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space,
|
||||
const VFIOIOMMUOps *ops)
|
||||
{
|
||||
bcontainer->ops = ops;
|
||||
bcontainer->space = space;
|
||||
bcontainer->error = NULL;
|
||||
bcontainer->dirty_pages_supported = false;
|
||||
bcontainer->dma_max_mappings = 0;
|
||||
bcontainer->iova_ranges = NULL;
|
||||
QLIST_INIT(&bcontainer->giommu_list);
|
||||
QLIST_INIT(&bcontainer->vrdl_list);
|
||||
}
|
||||
|
||||
void vfio_container_destroy(VFIOContainerBase *bcontainer)
|
||||
{
|
||||
VFIOGuestIOMMU *giommu, *tmp;
|
||||
|
||||
QLIST_REMOVE(bcontainer, next);
|
||||
|
||||
QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) {
|
||||
memory_region_unregister_iommu_notifier(
|
||||
MEMORY_REGION(giommu->iommu_mr), &giommu->n);
|
||||
QLIST_REMOVE(giommu, giommu_next);
|
||||
g_free(giommu);
|
||||
}
|
||||
|
||||
g_list_free_full(bcontainer->iova_ranges, g_free);
|
||||
}
|
@ -33,6 +33,7 @@
|
||||
#include "trace.h"
|
||||
#include "qapi/error.h"
|
||||
#include "migration/migration.h"
|
||||
#include "pci.h"
|
||||
|
||||
VFIOGroupList vfio_group_list =
|
||||
QLIST_HEAD_INITIALIZER(vfio_group_list);
|
||||
@ -60,10 +61,11 @@ static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
|
||||
}
|
||||
}
|
||||
|
||||
static int vfio_dma_unmap_bitmap(VFIOContainer *container,
|
||||
static int vfio_dma_unmap_bitmap(const VFIOContainer *container,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
IOMMUTLBEntry *iotlb)
|
||||
{
|
||||
const VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
struct vfio_iommu_type1_dma_unmap *unmap;
|
||||
struct vfio_bitmap *bitmap;
|
||||
VFIOBitmap vbmap;
|
||||
@ -91,7 +93,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
|
||||
bitmap->size = vbmap.size;
|
||||
bitmap->data = (__u64 *)vbmap.bitmap;
|
||||
|
||||
if (vbmap.size > container->max_dirty_bitmap_size) {
|
||||
if (vbmap.size > bcontainer->max_dirty_bitmap_size) {
|
||||
error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size);
|
||||
ret = -E2BIG;
|
||||
goto unmap_exit;
|
||||
@ -115,9 +117,12 @@ unmap_exit:
|
||||
/*
|
||||
* DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
|
||||
*/
|
||||
int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
|
||||
ram_addr_t size, IOMMUTLBEntry *iotlb)
|
||||
static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
IOMMUTLBEntry *iotlb)
|
||||
{
|
||||
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||
bcontainer);
|
||||
struct vfio_iommu_type1_dma_unmap unmap = {
|
||||
.argsz = sizeof(unmap),
|
||||
.flags = 0,
|
||||
@ -127,9 +132,9 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
|
||||
bool need_dirty_sync = false;
|
||||
int ret;
|
||||
|
||||
if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
|
||||
if (!vfio_devices_all_device_dirty_tracking(container) &&
|
||||
container->dirty_pages_supported) {
|
||||
if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) {
|
||||
if (!vfio_devices_all_device_dirty_tracking(bcontainer) &&
|
||||
bcontainer->dirty_pages_supported) {
|
||||
return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
|
||||
}
|
||||
|
||||
@ -151,8 +156,8 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
|
||||
*/
|
||||
if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
|
||||
container->iommu_type == VFIO_TYPE1v2_IOMMU) {
|
||||
trace_vfio_dma_unmap_overflow_workaround();
|
||||
unmap.size -= 1ULL << ctz64(container->pgsizes);
|
||||
trace_vfio_legacy_dma_unmap_overflow_workaround();
|
||||
unmap.size -= 1ULL << ctz64(bcontainer->pgsizes);
|
||||
continue;
|
||||
}
|
||||
error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
|
||||
@ -160,7 +165,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
|
||||
}
|
||||
|
||||
if (need_dirty_sync) {
|
||||
ret = vfio_get_dirty_bitmap(container, iova, size,
|
||||
ret = vfio_get_dirty_bitmap(bcontainer, iova, size,
|
||||
iotlb->translated_addr);
|
||||
if (ret) {
|
||||
return ret;
|
||||
@ -170,9 +175,11 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vfio_dma_map(VFIOContainer *container, hwaddr iova,
|
||||
ram_addr_t size, void *vaddr, bool readonly)
|
||||
static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
|
||||
ram_addr_t size, void *vaddr, bool readonly)
|
||||
{
|
||||
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||
bcontainer);
|
||||
struct vfio_iommu_type1_dma_map map = {
|
||||
.argsz = sizeof(map),
|
||||
.flags = VFIO_DMA_MAP_FLAG_READ,
|
||||
@ -191,7 +198,8 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova,
|
||||
* the VGA ROM space.
|
||||
*/
|
||||
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
|
||||
(errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
|
||||
(errno == EBUSY &&
|
||||
vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 &&
|
||||
ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
|
||||
return 0;
|
||||
}
|
||||
@ -200,17 +208,17 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova,
|
||||
return -errno;
|
||||
}
|
||||
|
||||
int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
|
||||
static int
|
||||
vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
|
||||
bool start)
|
||||
{
|
||||
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||
bcontainer);
|
||||
int ret;
|
||||
struct vfio_iommu_type1_dirty_bitmap dirty = {
|
||||
.argsz = sizeof(dirty),
|
||||
};
|
||||
|
||||
if (!container->dirty_pages_supported) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (start) {
|
||||
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
|
||||
} else {
|
||||
@ -227,9 +235,12 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
|
||||
hwaddr iova, hwaddr size)
|
||||
static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||
VFIOBitmap *vbmap,
|
||||
hwaddr iova, hwaddr size)
|
||||
{
|
||||
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||
bcontainer);
|
||||
struct vfio_iommu_type1_dirty_bitmap *dbitmap;
|
||||
struct vfio_iommu_type1_dirty_bitmap_get *range;
|
||||
int ret;
|
||||
@ -296,7 +307,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
|
||||
}
|
||||
|
||||
static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
|
||||
VFIOContainer *container)
|
||||
VFIOContainerBase *bcontainer)
|
||||
{
|
||||
struct vfio_info_cap_header *hdr;
|
||||
struct vfio_iommu_type1_info_cap_iova_range *cap;
|
||||
@ -314,8 +325,8 @@ static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
|
||||
|
||||
range_set_bounds(range, cap->iova_ranges[i].start,
|
||||
cap->iova_ranges[i].end);
|
||||
container->iova_ranges =
|
||||
range_list_insert(container->iova_ranges, range);
|
||||
bcontainer->iova_ranges =
|
||||
range_list_insert(bcontainer->iova_ranges, range);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -442,6 +453,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
|
||||
{
|
||||
struct vfio_info_cap_header *hdr;
|
||||
struct vfio_iommu_type1_info_cap_migration *cap_mig;
|
||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
|
||||
hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
|
||||
if (!hdr) {
|
||||
@ -456,22 +468,17 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
|
||||
* qemu_real_host_page_size to mark those dirty.
|
||||
*/
|
||||
if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
|
||||
container->dirty_pages_supported = true;
|
||||
container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
|
||||
container->dirty_pgsizes = cap_mig->pgsize_bitmap;
|
||||
bcontainer->dirty_pages_supported = true;
|
||||
bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
|
||||
bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap;
|
||||
}
|
||||
}
|
||||
|
||||
static void vfio_free_container(VFIOContainer *container)
|
||||
{
|
||||
g_list_free_full(container->iova_ranges, g_free);
|
||||
g_free(container);
|
||||
}
|
||||
|
||||
static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
Error **errp)
|
||||
{
|
||||
VFIOContainer *container;
|
||||
VFIOContainerBase *bcontainer;
|
||||
int ret, fd;
|
||||
VFIOAddressSpace *space;
|
||||
|
||||
@ -508,7 +515,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
* details once we know which type of IOMMU we are using.
|
||||
*/
|
||||
|
||||
QLIST_FOREACH(container, &space->containers, next) {
|
||||
QLIST_FOREACH(bcontainer, &space->containers, next) {
|
||||
container = container_of(bcontainer, VFIOContainer, bcontainer);
|
||||
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
|
||||
ret = vfio_ram_block_discard_disable(container, true);
|
||||
if (ret) {
|
||||
@ -544,14 +552,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
}
|
||||
|
||||
container = g_malloc0(sizeof(*container));
|
||||
container->space = space;
|
||||
container->fd = fd;
|
||||
container->error = NULL;
|
||||
container->dirty_pages_supported = false;
|
||||
container->dma_max_mappings = 0;
|
||||
container->iova_ranges = NULL;
|
||||
QLIST_INIT(&container->giommu_list);
|
||||
QLIST_INIT(&container->vrdl_list);
|
||||
bcontainer = &container->bcontainer;
|
||||
vfio_container_init(bcontainer, space, &vfio_legacy_ops);
|
||||
|
||||
ret = vfio_init_container(container, group->fd, errp);
|
||||
if (ret) {
|
||||
@ -577,16 +580,16 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
}
|
||||
|
||||
if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
|
||||
container->pgsizes = info->iova_pgsizes;
|
||||
bcontainer->pgsizes = info->iova_pgsizes;
|
||||
} else {
|
||||
container->pgsizes = qemu_real_host_page_size();
|
||||
bcontainer->pgsizes = qemu_real_host_page_size();
|
||||
}
|
||||
|
||||
if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
|
||||
container->dma_max_mappings = 65535;
|
||||
if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) {
|
||||
bcontainer->dma_max_mappings = 65535;
|
||||
}
|
||||
|
||||
vfio_get_info_iova_range(info, container);
|
||||
vfio_get_info_iova_range(info, bcontainer);
|
||||
|
||||
vfio_get_iommu_info_migration(container, info);
|
||||
g_free(info);
|
||||
@ -606,30 +609,29 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
vfio_kvm_device_add_group(group);
|
||||
|
||||
QLIST_INIT(&container->group_list);
|
||||
QLIST_INSERT_HEAD(&space->containers, container, next);
|
||||
QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
|
||||
|
||||
group->container = container;
|
||||
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
|
||||
|
||||
container->listener = vfio_memory_listener;
|
||||
bcontainer->listener = vfio_memory_listener;
|
||||
memory_listener_register(&bcontainer->listener, bcontainer->space->as);
|
||||
|
||||
memory_listener_register(&container->listener, container->space->as);
|
||||
|
||||
if (container->error) {
|
||||
if (bcontainer->error) {
|
||||
ret = -1;
|
||||
error_propagate_prepend(errp, container->error,
|
||||
error_propagate_prepend(errp, bcontainer->error,
|
||||
"memory listener initialization failed: ");
|
||||
goto listener_release_exit;
|
||||
}
|
||||
|
||||
container->initialized = true;
|
||||
bcontainer->initialized = true;
|
||||
|
||||
return 0;
|
||||
listener_release_exit:
|
||||
QLIST_REMOVE(group, container_next);
|
||||
QLIST_REMOVE(container, next);
|
||||
QLIST_REMOVE(bcontainer, next);
|
||||
vfio_kvm_device_del_group(group);
|
||||
memory_listener_unregister(&container->listener);
|
||||
memory_listener_unregister(&bcontainer->listener);
|
||||
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
|
||||
container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
|
||||
vfio_spapr_container_deinit(container);
|
||||
@ -639,7 +641,7 @@ enable_discards_exit:
|
||||
vfio_ram_block_discard_disable(container, false);
|
||||
|
||||
free_container_exit:
|
||||
vfio_free_container(container);
|
||||
g_free(container);
|
||||
|
||||
close_fd_exit:
|
||||
close(fd);
|
||||
@ -653,6 +655,7 @@ put_space_exit:
|
||||
static void vfio_disconnect_container(VFIOGroup *group)
|
||||
{
|
||||
VFIOContainer *container = group->container;
|
||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
|
||||
QLIST_REMOVE(group, container_next);
|
||||
group->container = NULL;
|
||||
@ -663,7 +666,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
|
||||
* group.
|
||||
*/
|
||||
if (QLIST_EMPTY(&container->group_list)) {
|
||||
memory_listener_unregister(&container->listener);
|
||||
memory_listener_unregister(&bcontainer->listener);
|
||||
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
|
||||
container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
|
||||
vfio_spapr_container_deinit(container);
|
||||
@ -676,21 +679,13 @@ static void vfio_disconnect_container(VFIOGroup *group)
|
||||
}
|
||||
|
||||
if (QLIST_EMPTY(&container->group_list)) {
|
||||
VFIOAddressSpace *space = container->space;
|
||||
VFIOGuestIOMMU *giommu, *tmp;
|
||||
VFIOAddressSpace *space = bcontainer->space;
|
||||
|
||||
QLIST_REMOVE(container, next);
|
||||
|
||||
QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
|
||||
memory_region_unregister_iommu_notifier(
|
||||
MEMORY_REGION(giommu->iommu_mr), &giommu->n);
|
||||
QLIST_REMOVE(giommu, giommu_next);
|
||||
g_free(giommu);
|
||||
}
|
||||
vfio_container_destroy(bcontainer);
|
||||
|
||||
trace_vfio_disconnect_container(container->fd);
|
||||
close(container->fd);
|
||||
vfio_free_container(container);
|
||||
g_free(container);
|
||||
|
||||
vfio_put_address_space(space);
|
||||
}
|
||||
@ -705,7 +700,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
|
||||
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
if (group->groupid == groupid) {
|
||||
/* Found it. Now is it already in the right context? */
|
||||
if (group->container->space->as == as) {
|
||||
if (group->container->bcontainer.space->as == as) {
|
||||
return group;
|
||||
} else {
|
||||
error_setg(errp, "group %d used in multiple address spaces",
|
||||
@ -877,13 +872,13 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
|
||||
* @name and @vbasedev->name are likely to be different depending
|
||||
* on the type of the device, hence the need for passing @name
|
||||
*/
|
||||
int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||
AddressSpace *as, Error **errp)
|
||||
static int vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
|
||||
AddressSpace *as, Error **errp)
|
||||
{
|
||||
int groupid = vfio_device_groupid(vbasedev, errp);
|
||||
VFIODevice *vbasedev_iter;
|
||||
VFIOGroup *group;
|
||||
VFIOContainer *container;
|
||||
VFIOContainerBase *bcontainer;
|
||||
int ret;
|
||||
|
||||
if (groupid < 0) {
|
||||
@ -910,26 +905,200 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
container = group->container;
|
||||
vbasedev->container = container;
|
||||
QLIST_INSERT_HEAD(&container->device_list, vbasedev, container_next);
|
||||
bcontainer = &group->container->bcontainer;
|
||||
vbasedev->bcontainer = bcontainer;
|
||||
QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
|
||||
QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void vfio_detach_device(VFIODevice *vbasedev)
|
||||
static void vfio_legacy_detach_device(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOGroup *group = vbasedev->group;
|
||||
|
||||
if (!vbasedev->container) {
|
||||
return;
|
||||
}
|
||||
|
||||
QLIST_REMOVE(vbasedev, global_next);
|
||||
QLIST_REMOVE(vbasedev, container_next);
|
||||
vbasedev->container = NULL;
|
||||
vbasedev->bcontainer = NULL;
|
||||
trace_vfio_detach_device(vbasedev->name, group->groupid);
|
||||
vfio_put_base_device(vbasedev);
|
||||
vfio_put_group(group);
|
||||
}
|
||||
|
||||
static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single)
|
||||
{
|
||||
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
|
||||
VFIOGroup *group;
|
||||
struct vfio_pci_hot_reset_info *info = NULL;
|
||||
struct vfio_pci_dependent_device *devices;
|
||||
struct vfio_pci_hot_reset *reset;
|
||||
int32_t *fds;
|
||||
int ret, i, count;
|
||||
bool multi = false;
|
||||
|
||||
trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
|
||||
|
||||
if (!single) {
|
||||
vfio_pci_pre_reset(vdev);
|
||||
}
|
||||
vdev->vbasedev.needs_reset = false;
|
||||
|
||||
ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
|
||||
|
||||
if (ret) {
|
||||
goto out_single;
|
||||
}
|
||||
devices = &info->devices[0];
|
||||
|
||||
trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
|
||||
|
||||
/* Verify that we have all the groups required */
|
||||
for (i = 0; i < info->count; i++) {
|
||||
PCIHostDeviceAddress host;
|
||||
VFIOPCIDevice *tmp;
|
||||
VFIODevice *vbasedev_iter;
|
||||
|
||||
host.domain = devices[i].segment;
|
||||
host.bus = devices[i].bus;
|
||||
host.slot = PCI_SLOT(devices[i].devfn);
|
||||
host.function = PCI_FUNC(devices[i].devfn);
|
||||
|
||||
trace_vfio_pci_hot_reset_dep_devices(host.domain,
|
||||
host.bus, host.slot, host.function, devices[i].group_id);
|
||||
|
||||
if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
if (group->groupid == devices[i].group_id) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!group) {
|
||||
if (!vdev->has_pm_reset) {
|
||||
error_report("vfio: Cannot reset device %s, "
|
||||
"depends on group %d which is not owned.",
|
||||
vdev->vbasedev.name, devices[i].group_id);
|
||||
}
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Prep dependent devices for reset and clear our marker. */
|
||||
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
|
||||
if (!vbasedev_iter->dev->realized ||
|
||||
vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
|
||||
continue;
|
||||
}
|
||||
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
|
||||
if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
|
||||
if (single) {
|
||||
ret = -EINVAL;
|
||||
goto out_single;
|
||||
}
|
||||
vfio_pci_pre_reset(tmp);
|
||||
tmp->vbasedev.needs_reset = false;
|
||||
multi = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!single && !multi) {
|
||||
ret = -EINVAL;
|
||||
goto out_single;
|
||||
}
|
||||
|
||||
/* Determine how many group fds need to be passed */
|
||||
count = 0;
|
||||
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
for (i = 0; i < info->count; i++) {
|
||||
if (group->groupid == devices[i].group_id) {
|
||||
count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
|
||||
reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
|
||||
fds = &reset->group_fds[0];
|
||||
|
||||
/* Fill in group fds */
|
||||
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
for (i = 0; i < info->count; i++) {
|
||||
if (group->groupid == devices[i].group_id) {
|
||||
fds[reset->count++] = group->fd;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Bus reset! */
|
||||
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
|
||||
g_free(reset);
|
||||
if (ret) {
|
||||
ret = -errno;
|
||||
}
|
||||
|
||||
trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
|
||||
ret ? strerror(errno) : "Success");
|
||||
|
||||
out:
|
||||
/* Re-enable INTx on affected devices */
|
||||
for (i = 0; i < info->count; i++) {
|
||||
PCIHostDeviceAddress host;
|
||||
VFIOPCIDevice *tmp;
|
||||
VFIODevice *vbasedev_iter;
|
||||
|
||||
host.domain = devices[i].segment;
|
||||
host.bus = devices[i].bus;
|
||||
host.slot = PCI_SLOT(devices[i].devfn);
|
||||
host.function = PCI_FUNC(devices[i].devfn);
|
||||
|
||||
if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
if (group->groupid == devices[i].group_id) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!group) {
|
||||
break;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
|
||||
if (!vbasedev_iter->dev->realized ||
|
||||
vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
|
||||
continue;
|
||||
}
|
||||
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
|
||||
if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
|
||||
vfio_pci_post_reset(tmp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
out_single:
|
||||
if (!single) {
|
||||
vfio_pci_post_reset(vdev);
|
||||
}
|
||||
g_free(info);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const VFIOIOMMUOps vfio_legacy_ops = {
|
||||
.dma_map = vfio_legacy_dma_map,
|
||||
.dma_unmap = vfio_legacy_dma_unmap,
|
||||
.attach_device = vfio_legacy_attach_device,
|
||||
.detach_device = vfio_legacy_detach_device,
|
||||
.set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking,
|
||||
.query_dirty_bitmap = vfio_legacy_query_dirty_bitmap,
|
||||
.pci_hot_reset = vfio_legacy_pci_hot_reset,
|
||||
};
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "trace.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "monitor/monitor.h"
|
||||
|
||||
/*
|
||||
* Common VFIO interrupt disable
|
||||
@ -609,3 +610,56 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
|
||||
{
|
||||
struct stat st;
|
||||
|
||||
if (vbasedev->fd < 0) {
|
||||
if (stat(vbasedev->sysfsdev, &st) < 0) {
|
||||
error_setg_errno(errp, errno, "no such host device");
|
||||
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
|
||||
return -errno;
|
||||
}
|
||||
/* User may specify a name, e.g: VFIO platform device */
|
||||
if (!vbasedev->name) {
|
||||
vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||||
}
|
||||
} else {
|
||||
if (!vbasedev->iommufd) {
|
||||
error_setg(errp, "Use FD passing only with iommufd backend");
|
||||
return -EINVAL;
|
||||
}
|
||||
/*
|
||||
* Give a name with fd so any function printing out vbasedev->name
|
||||
* will not break.
|
||||
*/
|
||||
if (!vbasedev->name) {
|
||||
vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
|
||||
{
|
||||
int fd = monitor_fd_param(monitor_cur(), str, errp);
|
||||
|
||||
if (fd < 0) {
|
||||
error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||
return;
|
||||
}
|
||||
vbasedev->fd = fd;
|
||||
}
|
||||
|
||||
void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
|
||||
DeviceState *dev, bool ram_discard)
|
||||
{
|
||||
vbasedev->type = type;
|
||||
vbasedev->ops = ops;
|
||||
vbasedev->dev = dev;
|
||||
vbasedev->fd = -1;
|
||||
|
||||
vbasedev->ram_block_discard_allowed = ram_discard;
|
||||
}
|
||||
|
630
hw/vfio/iommufd.c
Normal file
630
hw/vfio/iommufd.c
Normal file
@ -0,0 +1,630 @@
|
||||
/*
|
||||
* iommufd container backend
|
||||
*
|
||||
* Copyright (C) 2023 Intel Corporation.
|
||||
* Copyright Red Hat, Inc. 2023
|
||||
*
|
||||
* Authors: Yi Liu <yi.l.liu@intel.com>
|
||||
* Eric Auger <eric.auger@redhat.com>
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/iommufd.h>
|
||||
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "trace.h"
|
||||
#include "qapi/error.h"
|
||||
#include "sysemu/iommufd.h"
|
||||
#include "hw/qdev-core.h"
|
||||
#include "sysemu/reset.h"
|
||||
#include "qemu/cutils.h"
|
||||
#include "qemu/chardev_open.h"
|
||||
#include "pci.h"
|
||||
|
||||
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
|
||||
ram_addr_t size, void *vaddr, bool readonly)
|
||||
{
|
||||
const VFIOIOMMUFDContainer *container =
|
||||
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
|
||||
|
||||
return iommufd_backend_map_dma(container->be,
|
||||
container->ioas_id,
|
||||
iova, size, vaddr, readonly);
|
||||
}
|
||||
|
||||
static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
IOMMUTLBEntry *iotlb)
|
||||
{
|
||||
const VFIOIOMMUFDContainer *container =
|
||||
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
|
||||
|
||||
/* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
|
||||
return iommufd_backend_unmap_dma(container->be,
|
||||
container->ioas_id, iova, size);
|
||||
}
|
||||
|
||||
static int iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp)
|
||||
{
|
||||
return vfio_kvm_device_add_fd(vbasedev->fd, errp);
|
||||
}
|
||||
|
||||
static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev)
|
||||
{
|
||||
Error *err = NULL;
|
||||
|
||||
if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) {
|
||||
error_report_err(err);
|
||||
}
|
||||
}
|
||||
|
||||
static int iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
|
||||
{
|
||||
IOMMUFDBackend *iommufd = vbasedev->iommufd;
|
||||
struct vfio_device_bind_iommufd bind = {
|
||||
.argsz = sizeof(bind),
|
||||
.flags = 0,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = iommufd_backend_connect(iommufd, errp);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add device to kvm-vfio to be prepared for the tracking
|
||||
* in KVM. Especially for some emulated devices, it requires
|
||||
* to have kvm information in the device open.
|
||||
*/
|
||||
ret = iommufd_cdev_kvm_device_add(vbasedev, errp);
|
||||
if (ret) {
|
||||
goto err_kvm_device_add;
|
||||
}
|
||||
|
||||
/* Bind device to iommufd */
|
||||
bind.iommufd = iommufd->fd;
|
||||
ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d",
|
||||
vbasedev->fd, bind.iommufd);
|
||||
goto err_bind;
|
||||
}
|
||||
|
||||
vbasedev->devid = bind.out_devid;
|
||||
trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
|
||||
vbasedev->fd, vbasedev->devid);
|
||||
return ret;
|
||||
err_bind:
|
||||
iommufd_cdev_kvm_device_del(vbasedev);
|
||||
err_kvm_device_add:
|
||||
iommufd_backend_disconnect(iommufd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
|
||||
{
|
||||
/* Unbind is automatically conducted when device fd is closed */
|
||||
iommufd_cdev_kvm_device_del(vbasedev);
|
||||
iommufd_backend_disconnect(vbasedev->iommufd);
|
||||
}
|
||||
|
||||
static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
|
||||
{
|
||||
long int ret = -ENOTTY;
|
||||
char *path, *vfio_dev_path = NULL, *vfio_path = NULL;
|
||||
DIR *dir = NULL;
|
||||
struct dirent *dent;
|
||||
gchar *contents;
|
||||
struct stat st;
|
||||
gsize length;
|
||||
int major, minor;
|
||||
dev_t vfio_devt;
|
||||
|
||||
path = g_strdup_printf("%s/vfio-dev", sysfs_path);
|
||||
if (stat(path, &st) < 0) {
|
||||
error_setg_errno(errp, errno, "no such host device");
|
||||
goto out_free_path;
|
||||
}
|
||||
|
||||
dir = opendir(path);
|
||||
if (!dir) {
|
||||
error_setg_errno(errp, errno, "couldn't open directory %s", path);
|
||||
goto out_free_path;
|
||||
}
|
||||
|
||||
while ((dent = readdir(dir))) {
|
||||
if (!strncmp(dent->d_name, "vfio", 4)) {
|
||||
vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!vfio_dev_path) {
|
||||
error_setg(errp, "failed to find vfio-dev/vfioX/dev");
|
||||
goto out_close_dir;
|
||||
}
|
||||
|
||||
if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) {
|
||||
error_setg(errp, "failed to load \"%s\"", vfio_dev_path);
|
||||
goto out_free_dev_path;
|
||||
}
|
||||
|
||||
if (sscanf(contents, "%d:%d", &major, &minor) != 2) {
|
||||
error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path);
|
||||
goto out_free_dev_path;
|
||||
}
|
||||
g_free(contents);
|
||||
vfio_devt = makedev(major, minor);
|
||||
|
||||
vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name);
|
||||
ret = open_cdev(vfio_path, vfio_devt);
|
||||
if (ret < 0) {
|
||||
error_setg(errp, "Failed to open %s", vfio_path);
|
||||
}
|
||||
|
||||
trace_iommufd_cdev_getfd(vfio_path, ret);
|
||||
g_free(vfio_path);
|
||||
|
||||
out_free_dev_path:
|
||||
g_free(vfio_dev_path);
|
||||
out_close_dir:
|
||||
closedir(dir);
|
||||
out_free_path:
|
||||
if (*errp) {
|
||||
error_prepend(errp, VFIO_MSG_PREFIX, path);
|
||||
}
|
||||
g_free(path);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
|
||||
Error **errp)
|
||||
{
|
||||
int ret, iommufd = vbasedev->iommufd->fd;
|
||||
struct vfio_device_attach_iommufd_pt attach_data = {
|
||||
.argsz = sizeof(attach_data),
|
||||
.flags = 0,
|
||||
.pt_id = id,
|
||||
};
|
||||
|
||||
/* Attach device to an IOAS or hwpt within iommufd */
|
||||
ret = ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, errno,
|
||||
"[iommufd=%d] error attach %s (%d) to id=%d",
|
||||
iommufd, vbasedev->name, vbasedev->fd, id);
|
||||
} else {
|
||||
trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name,
|
||||
vbasedev->fd, id);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
|
||||
{
|
||||
int ret, iommufd = vbasedev->iommufd->fd;
|
||||
struct vfio_device_detach_iommufd_pt detach_data = {
|
||||
.argsz = sizeof(detach_data),
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
ret = ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, errno, "detach %s failed", vbasedev->name);
|
||||
} else {
|
||||
trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iommufd_cdev_attach_container(VFIODevice *vbasedev,
|
||||
VFIOIOMMUFDContainer *container,
|
||||
Error **errp)
|
||||
{
|
||||
return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
|
||||
}
|
||||
|
||||
static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
|
||||
VFIOIOMMUFDContainer *container)
|
||||
{
|
||||
Error *err = NULL;
|
||||
|
||||
if (iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
|
||||
error_report_err(err);
|
||||
}
|
||||
}
|
||||
|
||||
static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
|
||||
{
|
||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
|
||||
if (!QLIST_EMPTY(&bcontainer->device_list)) {
|
||||
return;
|
||||
}
|
||||
memory_listener_unregister(&bcontainer->listener);
|
||||
vfio_container_destroy(bcontainer);
|
||||
iommufd_backend_free_id(container->be, container->ioas_id);
|
||||
g_free(container);
|
||||
}
|
||||
|
||||
static int iommufd_cdev_ram_block_discard_disable(bool state)
|
||||
{
|
||||
/*
|
||||
* We support coordinated discarding of RAM via the RamDiscardManager.
|
||||
*/
|
||||
return ram_block_uncoordinated_discard_disable(state);
|
||||
}
|
||||
|
||||
static int iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container,
|
||||
uint32_t ioas_id, Error **errp)
|
||||
{
|
||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
struct iommu_ioas_iova_ranges *info;
|
||||
struct iommu_iova_range *iova_ranges;
|
||||
int ret, sz, fd = container->be->fd;
|
||||
|
||||
info = g_malloc0(sizeof(*info));
|
||||
info->size = sizeof(*info);
|
||||
info->ioas_id = ioas_id;
|
||||
|
||||
ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info);
|
||||
if (ret && errno != EMSGSIZE) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
sz = info->num_iovas * sizeof(struct iommu_iova_range);
|
||||
info = g_realloc(info, sizeof(*info) + sz);
|
||||
info->allowed_iovas = (uintptr_t)(info + 1);
|
||||
|
||||
ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info);
|
||||
if (ret) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas;
|
||||
|
||||
for (int i = 0; i < info->num_iovas; i++) {
|
||||
Range *range = g_new(Range, 1);
|
||||
|
||||
range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last);
|
||||
bcontainer->iova_ranges =
|
||||
range_list_insert(bcontainer->iova_ranges, range);
|
||||
}
|
||||
bcontainer->pgsizes = info->out_iova_alignment;
|
||||
|
||||
g_free(info);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
ret = -errno;
|
||||
g_free(info);
|
||||
error_setg_errno(errp, errno, "Cannot get IOVA ranges");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
||||
AddressSpace *as, Error **errp)
|
||||
{
|
||||
VFIOContainerBase *bcontainer;
|
||||
VFIOIOMMUFDContainer *container;
|
||||
VFIOAddressSpace *space;
|
||||
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
|
||||
int ret, devfd;
|
||||
uint32_t ioas_id;
|
||||
Error *err = NULL;
|
||||
|
||||
if (vbasedev->fd < 0) {
|
||||
devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
|
||||
if (devfd < 0) {
|
||||
return devfd;
|
||||
}
|
||||
vbasedev->fd = devfd;
|
||||
} else {
|
||||
devfd = vbasedev->fd;
|
||||
}
|
||||
|
||||
ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
|
||||
if (ret) {
|
||||
goto err_connect_bind;
|
||||
}
|
||||
|
||||
space = vfio_get_address_space(as);
|
||||
|
||||
/* try to attach to an existing container in this space */
|
||||
QLIST_FOREACH(bcontainer, &space->containers, next) {
|
||||
container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
|
||||
if (bcontainer->ops != &vfio_iommufd_ops ||
|
||||
vbasedev->iommufd != container->be) {
|
||||
continue;
|
||||
}
|
||||
if (iommufd_cdev_attach_container(vbasedev, container, &err)) {
|
||||
const char *msg = error_get_pretty(err);
|
||||
|
||||
trace_iommufd_cdev_fail_attach_existing_container(msg);
|
||||
error_free(err);
|
||||
err = NULL;
|
||||
} else {
|
||||
ret = iommufd_cdev_ram_block_discard_disable(true);
|
||||
if (ret) {
|
||||
error_setg(errp,
|
||||
"Cannot set discarding of RAM broken (%d)", ret);
|
||||
goto err_discard_disable;
|
||||
}
|
||||
goto found_container;
|
||||
}
|
||||
}
|
||||
|
||||
/* Need to allocate a new dedicated container */
|
||||
ret = iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp);
|
||||
if (ret < 0) {
|
||||
goto err_alloc_ioas;
|
||||
}
|
||||
|
||||
trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
|
||||
|
||||
container = g_malloc0(sizeof(*container));
|
||||
container->be = vbasedev->iommufd;
|
||||
container->ioas_id = ioas_id;
|
||||
|
||||
bcontainer = &container->bcontainer;
|
||||
vfio_container_init(bcontainer, space, &vfio_iommufd_ops);
|
||||
QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
|
||||
|
||||
ret = iommufd_cdev_attach_container(vbasedev, container, errp);
|
||||
if (ret) {
|
||||
goto err_attach_container;
|
||||
}
|
||||
|
||||
ret = iommufd_cdev_ram_block_discard_disable(true);
|
||||
if (ret) {
|
||||
goto err_discard_disable;
|
||||
}
|
||||
|
||||
ret = iommufd_cdev_get_info_iova_range(container, ioas_id, &err);
|
||||
if (ret) {
|
||||
error_append_hint(&err,
|
||||
"Fallback to default 64bit IOVA range and 4K page size\n");
|
||||
warn_report_err(err);
|
||||
err = NULL;
|
||||
bcontainer->pgsizes = qemu_real_host_page_size();
|
||||
}
|
||||
|
||||
bcontainer->listener = vfio_memory_listener;
|
||||
memory_listener_register(&bcontainer->listener, bcontainer->space->as);
|
||||
|
||||
if (bcontainer->error) {
|
||||
ret = -1;
|
||||
error_propagate_prepend(errp, bcontainer->error,
|
||||
"memory listener initialization failed: ");
|
||||
goto err_listener_register;
|
||||
}
|
||||
|
||||
bcontainer->initialized = true;
|
||||
|
||||
found_container:
|
||||
ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, errno, "error getting device info");
|
||||
goto err_listener_register;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level
|
||||
* for discarding incompatibility check as well?
|
||||
*/
|
||||
if (vbasedev->ram_block_discard_allowed) {
|
||||
iommufd_cdev_ram_block_discard_disable(false);
|
||||
}
|
||||
|
||||
vbasedev->group = 0;
|
||||
vbasedev->num_irqs = dev_info.num_irqs;
|
||||
vbasedev->num_regions = dev_info.num_regions;
|
||||
vbasedev->flags = dev_info.flags;
|
||||
vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
|
||||
vbasedev->bcontainer = bcontainer;
|
||||
QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
|
||||
QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
|
||||
|
||||
trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs,
|
||||
vbasedev->num_regions, vbasedev->flags);
|
||||
return 0;
|
||||
|
||||
err_listener_register:
|
||||
iommufd_cdev_ram_block_discard_disable(false);
|
||||
err_discard_disable:
|
||||
iommufd_cdev_detach_container(vbasedev, container);
|
||||
err_attach_container:
|
||||
iommufd_cdev_container_destroy(container);
|
||||
err_alloc_ioas:
|
||||
vfio_put_address_space(space);
|
||||
iommufd_cdev_unbind_and_disconnect(vbasedev);
|
||||
err_connect_bind:
|
||||
close(vbasedev->fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void iommufd_cdev_detach(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOContainerBase *bcontainer = vbasedev->bcontainer;
|
||||
VFIOAddressSpace *space = bcontainer->space;
|
||||
VFIOIOMMUFDContainer *container = container_of(bcontainer,
|
||||
VFIOIOMMUFDContainer,
|
||||
bcontainer);
|
||||
QLIST_REMOVE(vbasedev, global_next);
|
||||
QLIST_REMOVE(vbasedev, container_next);
|
||||
vbasedev->bcontainer = NULL;
|
||||
|
||||
if (!vbasedev->ram_block_discard_allowed) {
|
||||
iommufd_cdev_ram_block_discard_disable(false);
|
||||
}
|
||||
|
||||
iommufd_cdev_detach_container(vbasedev, container);
|
||||
iommufd_cdev_container_destroy(container);
|
||||
vfio_put_address_space(space);
|
||||
|
||||
iommufd_cdev_unbind_and_disconnect(vbasedev);
|
||||
close(vbasedev->fd);
|
||||
}
|
||||
|
||||
static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid)
|
||||
{
|
||||
VFIODevice *vbasedev_iter;
|
||||
|
||||
QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
|
||||
if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) {
|
||||
continue;
|
||||
}
|
||||
if (devid == vbasedev_iter->devid) {
|
||||
return vbasedev_iter;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static VFIOPCIDevice *
|
||||
iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev,
|
||||
VFIODevice *reset_dev)
|
||||
{
|
||||
VFIODevice *vbasedev_tmp;
|
||||
|
||||
if (dep_dev->devid == reset_dev->devid ||
|
||||
dep_dev->devid == VFIO_PCI_DEVID_OWNED) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid);
|
||||
if (!vbasedev_tmp || !vbasedev_tmp->dev->realized ||
|
||||
vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return container_of(vbasedev_tmp, VFIOPCIDevice, vbasedev);
|
||||
}
|
||||
|
||||
static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single)
|
||||
{
|
||||
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
|
||||
struct vfio_pci_hot_reset_info *info = NULL;
|
||||
struct vfio_pci_dependent_device *devices;
|
||||
struct vfio_pci_hot_reset *reset;
|
||||
int ret, i;
|
||||
bool multi = false;
|
||||
|
||||
trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
|
||||
|
||||
if (!single) {
|
||||
vfio_pci_pre_reset(vdev);
|
||||
}
|
||||
vdev->vbasedev.needs_reset = false;
|
||||
|
||||
ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
|
||||
|
||||
if (ret) {
|
||||
goto out_single;
|
||||
}
|
||||
|
||||
assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID);
|
||||
|
||||
devices = &info->devices[0];
|
||||
|
||||
if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) {
|
||||
if (!vdev->has_pm_reset) {
|
||||
for (i = 0; i < info->count; i++) {
|
||||
if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) {
|
||||
error_report("vfio: Cannot reset device %s, "
|
||||
"depends on device %04x:%02x:%02x.%x "
|
||||
"which is not owned.",
|
||||
vdev->vbasedev.name, devices[i].segment,
|
||||
devices[i].bus, PCI_SLOT(devices[i].devfn),
|
||||
PCI_FUNC(devices[i].devfn));
|
||||
}
|
||||
}
|
||||
}
|
||||
ret = -EPERM;
|
||||
goto out_single;
|
||||
}
|
||||
|
||||
trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
|
||||
|
||||
for (i = 0; i < info->count; i++) {
|
||||
VFIOPCIDevice *tmp;
|
||||
|
||||
trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment,
|
||||
devices[i].bus,
|
||||
PCI_SLOT(devices[i].devfn),
|
||||
PCI_FUNC(devices[i].devfn),
|
||||
devices[i].devid);
|
||||
|
||||
/*
|
||||
* If a VFIO cdev device is resettable, all the dependent devices
|
||||
* are either bound to same iommufd or within same iommu_groups as
|
||||
* one of the iommufd bound devices.
|
||||
*/
|
||||
assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED);
|
||||
|
||||
tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev);
|
||||
if (!tmp) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (single) {
|
||||
ret = -EINVAL;
|
||||
goto out_single;
|
||||
}
|
||||
vfio_pci_pre_reset(tmp);
|
||||
tmp->vbasedev.needs_reset = false;
|
||||
multi = true;
|
||||
}
|
||||
|
||||
if (!single && !multi) {
|
||||
ret = -EINVAL;
|
||||
goto out_single;
|
||||
}
|
||||
|
||||
/* Use zero length array for hot reset with iommufd backend */
|
||||
reset = g_malloc0(sizeof(*reset));
|
||||
reset->argsz = sizeof(*reset);
|
||||
|
||||
/* Bus reset! */
|
||||
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
|
||||
g_free(reset);
|
||||
if (ret) {
|
||||
ret = -errno;
|
||||
}
|
||||
|
||||
trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
|
||||
ret ? strerror(errno) : "Success");
|
||||
|
||||
/* Re-enable INTx on affected devices */
|
||||
for (i = 0; i < info->count; i++) {
|
||||
VFIOPCIDevice *tmp;
|
||||
|
||||
tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev);
|
||||
if (!tmp) {
|
||||
continue;
|
||||
}
|
||||
vfio_pci_post_reset(tmp);
|
||||
}
|
||||
out_single:
|
||||
if (!single) {
|
||||
vfio_pci_post_reset(vdev);
|
||||
}
|
||||
g_free(info);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const VFIOIOMMUOps vfio_iommufd_ops = {
|
||||
.dma_map = iommufd_cdev_map,
|
||||
.dma_unmap = iommufd_cdev_unmap,
|
||||
.attach_device = iommufd_cdev_attach,
|
||||
.detach_device = iommufd_cdev_detach,
|
||||
.pci_hot_reset = iommufd_cdev_pci_hot_reset,
|
||||
};
|
@ -2,10 +2,14 @@ vfio_ss = ss.source_set()
|
||||
vfio_ss.add(files(
|
||||
'helpers.c',
|
||||
'common.c',
|
||||
'container-base.c',
|
||||
'container.c',
|
||||
'spapr.c',
|
||||
'migration.c',
|
||||
))
|
||||
vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files(
|
||||
'iommufd.c',
|
||||
))
|
||||
vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
|
||||
'display.c',
|
||||
'pci-quirks.c',
|
||||
|
218
hw/vfio/pci.c
218
hw/vfio/pci.c
@ -19,6 +19,7 @@
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||
#include <linux/vfio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
@ -42,6 +43,7 @@
|
||||
#include "qapi/error.h"
|
||||
#include "migration/blocker.h"
|
||||
#include "migration/qemu-file.h"
|
||||
#include "sysemu/iommufd.h"
|
||||
|
||||
#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
|
||||
|
||||
@ -2374,7 +2376,7 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
|
||||
void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
uint16_t cmd;
|
||||
@ -2411,7 +2413,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
|
||||
vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
|
||||
}
|
||||
|
||||
static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
|
||||
void vfio_pci_post_reset(VFIOPCIDevice *vdev)
|
||||
{
|
||||
Error *err = NULL;
|
||||
int nr;
|
||||
@ -2435,7 +2437,7 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
|
||||
vfio_quirk_reset(vdev);
|
||||
}
|
||||
|
||||
static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
|
||||
bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
|
||||
{
|
||||
char tmp[13];
|
||||
|
||||
@ -2445,22 +2447,13 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
|
||||
return (strcmp(tmp, name) == 0);
|
||||
}
|
||||
|
||||
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
|
||||
int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
|
||||
struct vfio_pci_hot_reset_info **info_p)
|
||||
{
|
||||
VFIOGroup *group;
|
||||
struct vfio_pci_hot_reset_info *info;
|
||||
struct vfio_pci_dependent_device *devices;
|
||||
struct vfio_pci_hot_reset *reset;
|
||||
int32_t *fds;
|
||||
int ret, i, count;
|
||||
bool multi = false;
|
||||
int ret, count;
|
||||
|
||||
trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
|
||||
|
||||
if (!single) {
|
||||
vfio_pci_pre_reset(vdev);
|
||||
}
|
||||
vdev->vbasedev.needs_reset = false;
|
||||
assert(info_p && !*info_p);
|
||||
|
||||
info = g_malloc0(sizeof(*info));
|
||||
info->argsz = sizeof(*info);
|
||||
@ -2468,163 +2461,36 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
|
||||
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
|
||||
if (ret && errno != ENOSPC) {
|
||||
ret = -errno;
|
||||
g_free(info);
|
||||
if (!vdev->has_pm_reset) {
|
||||
error_report("vfio: Cannot reset device %s, "
|
||||
"no available reset mechanism.", vdev->vbasedev.name);
|
||||
}
|
||||
goto out_single;
|
||||
return ret;
|
||||
}
|
||||
|
||||
count = info->count;
|
||||
info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
|
||||
info->argsz = sizeof(*info) + (count * sizeof(*devices));
|
||||
devices = &info->devices[0];
|
||||
info = g_realloc(info, sizeof(*info) + (count * sizeof(info->devices[0])));
|
||||
info->argsz = sizeof(*info) + (count * sizeof(info->devices[0]));
|
||||
|
||||
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
|
||||
if (ret) {
|
||||
ret = -errno;
|
||||
g_free(info);
|
||||
error_report("vfio: hot reset info failed: %m");
|
||||
goto out_single;
|
||||
return ret;
|
||||
}
|
||||
|
||||
trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
|
||||
*info_p = info;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Verify that we have all the groups required */
|
||||
for (i = 0; i < info->count; i++) {
|
||||
PCIHostDeviceAddress host;
|
||||
VFIOPCIDevice *tmp;
|
||||
VFIODevice *vbasedev_iter;
|
||||
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
|
||||
{
|
||||
VFIODevice *vbasedev = &vdev->vbasedev;
|
||||
const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops;
|
||||
|
||||
host.domain = devices[i].segment;
|
||||
host.bus = devices[i].bus;
|
||||
host.slot = PCI_SLOT(devices[i].devfn);
|
||||
host.function = PCI_FUNC(devices[i].devfn);
|
||||
|
||||
trace_vfio_pci_hot_reset_dep_devices(host.domain,
|
||||
host.bus, host.slot, host.function, devices[i].group_id);
|
||||
|
||||
if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
if (group->groupid == devices[i].group_id) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!group) {
|
||||
if (!vdev->has_pm_reset) {
|
||||
error_report("vfio: Cannot reset device %s, "
|
||||
"depends on group %d which is not owned.",
|
||||
vdev->vbasedev.name, devices[i].group_id);
|
||||
}
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Prep dependent devices for reset and clear our marker. */
|
||||
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
|
||||
if (!vbasedev_iter->dev->realized ||
|
||||
vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
|
||||
continue;
|
||||
}
|
||||
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
|
||||
if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
|
||||
if (single) {
|
||||
ret = -EINVAL;
|
||||
goto out_single;
|
||||
}
|
||||
vfio_pci_pre_reset(tmp);
|
||||
tmp->vbasedev.needs_reset = false;
|
||||
multi = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!single && !multi) {
|
||||
ret = -EINVAL;
|
||||
goto out_single;
|
||||
}
|
||||
|
||||
/* Determine how many group fds need to be passed */
|
||||
count = 0;
|
||||
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
for (i = 0; i < info->count; i++) {
|
||||
if (group->groupid == devices[i].group_id) {
|
||||
count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
|
||||
reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
|
||||
fds = &reset->group_fds[0];
|
||||
|
||||
/* Fill in group fds */
|
||||
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
for (i = 0; i < info->count; i++) {
|
||||
if (group->groupid == devices[i].group_id) {
|
||||
fds[reset->count++] = group->fd;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Bus reset! */
|
||||
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
|
||||
g_free(reset);
|
||||
|
||||
trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
|
||||
ret ? strerror(errno) : "Success");
|
||||
|
||||
out:
|
||||
/* Re-enable INTx on affected devices */
|
||||
for (i = 0; i < info->count; i++) {
|
||||
PCIHostDeviceAddress host;
|
||||
VFIOPCIDevice *tmp;
|
||||
VFIODevice *vbasedev_iter;
|
||||
|
||||
host.domain = devices[i].segment;
|
||||
host.bus = devices[i].bus;
|
||||
host.slot = PCI_SLOT(devices[i].devfn);
|
||||
host.function = PCI_FUNC(devices[i].devfn);
|
||||
|
||||
if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
if (group->groupid == devices[i].group_id) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!group) {
|
||||
break;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
|
||||
if (!vbasedev_iter->dev->realized ||
|
||||
vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
|
||||
continue;
|
||||
}
|
||||
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
|
||||
if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
|
||||
vfio_pci_post_reset(tmp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
out_single:
|
||||
if (!single) {
|
||||
vfio_pci_post_reset(vdev);
|
||||
}
|
||||
g_free(info);
|
||||
|
||||
return ret;
|
||||
return ops->pci_hot_reset(vbasedev, single);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3078,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||
VFIODevice *vbasedev = &vdev->vbasedev;
|
||||
char *tmp, *subsys;
|
||||
Error *err = NULL;
|
||||
struct stat st;
|
||||
int i, ret;
|
||||
bool is_mdev;
|
||||
char uuid[UUID_STR_LEN];
|
||||
char *name;
|
||||
|
||||
if (!vbasedev->sysfsdev) {
|
||||
if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
|
||||
if (!(~vdev->host.domain || ~vdev->host.bus ||
|
||||
~vdev->host.slot || ~vdev->host.function)) {
|
||||
error_setg(errp, "No provided host device");
|
||||
error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
"or -device vfio-pci,fd=DEVICE_FD "
|
||||
#endif
|
||||
"or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
|
||||
return;
|
||||
}
|
||||
@ -3098,17 +2966,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||
vdev->host.slot, vdev->host.function);
|
||||
}
|
||||
|
||||
if (stat(vbasedev->sysfsdev, &st) < 0) {
|
||||
error_setg_errno(errp, errno, "no such host device");
|
||||
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
|
||||
if (vfio_device_get_name(vbasedev, errp) < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||||
vbasedev->ops = &vfio_pci_ops;
|
||||
vbasedev->type = VFIO_DEVICE_TYPE_PCI;
|
||||
vbasedev->dev = DEVICE(vdev);
|
||||
|
||||
/*
|
||||
* Mediated devices *might* operate compatibly with discarding of RAM, but
|
||||
* we cannot know for certain, it depends on whether the mdev vendor driver
|
||||
@ -3456,6 +3317,7 @@ static void vfio_instance_init(Object *obj)
|
||||
{
|
||||
PCIDevice *pci_dev = PCI_DEVICE(obj);
|
||||
VFIOPCIDevice *vdev = VFIO_PCI(obj);
|
||||
VFIODevice *vbasedev = &vdev->vbasedev;
|
||||
|
||||
device_add_bootindex_property(obj, &vdev->bootindex,
|
||||
"bootindex", NULL,
|
||||
@ -3465,6 +3327,9 @@ static void vfio_instance_init(Object *obj)
|
||||
vdev->host.slot = ~0U;
|
||||
vdev->host.function = ~0U;
|
||||
|
||||
vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops,
|
||||
DEVICE(vdev), false);
|
||||
|
||||
vdev->nv_gpudirect_clique = 0xFF;
|
||||
|
||||
/* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
|
||||
@ -3517,14 +3382,20 @@ static Property vfio_pci_dev_properties[] = {
|
||||
qdev_prop_nv_gpudirect_clique, uint8_t),
|
||||
DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
|
||||
OFF_AUTOPCIBAR_OFF),
|
||||
/*
|
||||
* TODO - support passed fds... is this necessary?
|
||||
* DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
|
||||
* DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
|
||||
*/
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
|
||||
TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
|
||||
#endif
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
|
||||
{
|
||||
vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
DeviceClass *dc = DEVICE_CLASS(klass);
|
||||
@ -3532,6 +3403,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
|
||||
|
||||
dc->reset = vfio_pci_reset;
|
||||
device_class_set_props(dc, vfio_pci_dev_properties);
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
|
||||
#endif
|
||||
dc->desc = "VFIO-based PCI device assignment";
|
||||
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
||||
pdc->realize = vfio_realize;
|
||||
|
@ -218,6 +218,12 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
|
||||
|
||||
extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
|
||||
|
||||
void vfio_pci_pre_reset(VFIOPCIDevice *vdev);
|
||||
void vfio_pci_post_reset(VFIOPCIDevice *vdev);
|
||||
bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name);
|
||||
int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
|
||||
struct vfio_pci_hot_reset_info **info_p);
|
||||
|
||||
int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
|
||||
|
||||
int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
|
||||
|
@ -15,11 +15,13 @@
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||
#include "qapi/error.h"
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/vfio.h>
|
||||
|
||||
#include "hw/vfio/vfio-platform.h"
|
||||
#include "sysemu/iommufd.h"
|
||||
#include "migration/vmstate.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/lockable.h"
|
||||
@ -529,14 +531,13 @@ static VFIODeviceOps vfio_platform_ops = {
|
||||
*/
|
||||
static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
|
||||
{
|
||||
struct stat st;
|
||||
int ret;
|
||||
|
||||
/* @sysfsdev takes precedence over @host */
|
||||
if (vbasedev->sysfsdev) {
|
||||
/* @fd takes precedence over @sysfsdev which takes precedence over @host */
|
||||
if (vbasedev->fd < 0 && vbasedev->sysfsdev) {
|
||||
g_free(vbasedev->name);
|
||||
vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||||
} else {
|
||||
} else if (vbasedev->fd < 0) {
|
||||
if (!vbasedev->name || strchr(vbasedev->name, '/')) {
|
||||
error_setg(errp, "wrong host device name");
|
||||
return -EINVAL;
|
||||
@ -546,10 +547,9 @@ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
|
||||
vbasedev->name);
|
||||
}
|
||||
|
||||
if (stat(vbasedev->sysfsdev, &st) < 0) {
|
||||
error_setg_errno(errp, errno,
|
||||
"failed to get the sysfs host device file status");
|
||||
return -errno;
|
||||
ret = vfio_device_get_name(vbasedev, errp);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vfio_attach_device(vbasedev->name, vbasedev,
|
||||
@ -581,10 +581,6 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
|
||||
VFIODevice *vbasedev = &vdev->vbasedev;
|
||||
int i, ret;
|
||||
|
||||
vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
|
||||
vbasedev->dev = dev;
|
||||
vbasedev->ops = &vfio_platform_ops;
|
||||
|
||||
qemu_mutex_init(&vdev->intp_mutex);
|
||||
|
||||
trace_vfio_platform_realize(vbasedev->sysfsdev ?
|
||||
@ -649,9 +645,29 @@ static Property vfio_platform_dev_properties[] = {
|
||||
DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
|
||||
mmap_timeout, 1100),
|
||||
DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd,
|
||||
TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
|
||||
#endif
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
static void vfio_platform_instance_init(Object *obj)
|
||||
{
|
||||
VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj);
|
||||
VFIODevice *vbasedev = &vdev->vbasedev;
|
||||
|
||||
vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops,
|
||||
DEVICE(vdev), false);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp)
|
||||
{
|
||||
vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void vfio_platform_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
DeviceClass *dc = DEVICE_CLASS(klass);
|
||||
@ -659,6 +675,9 @@ static void vfio_platform_class_init(ObjectClass *klass, void *data)
|
||||
|
||||
dc->realize = vfio_platform_realize;
|
||||
device_class_set_props(dc, vfio_platform_dev_properties);
|
||||
#ifdef CONFIG_IOMMUFD
|
||||
object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd);
|
||||
#endif
|
||||
dc->vmsd = &vfio_platform_vmstate;
|
||||
dc->desc = "VFIO-based platform device assignment";
|
||||
sbc->connect_irq_notifier = vfio_start_irqfd_injection;
|
||||
@ -671,6 +690,7 @@ static const TypeInfo vfio_platform_dev_info = {
|
||||
.name = TYPE_VFIO_PLATFORM,
|
||||
.parent = TYPE_SYS_BUS_DEVICE,
|
||||
.instance_size = sizeof(VFIOPlatformDevice),
|
||||
.instance_init = vfio_platform_instance_init,
|
||||
.class_init = vfio_platform_class_init,
|
||||
.class_size = sizeof(VFIOPlatformDeviceClass),
|
||||
};
|
||||
|
108
hw/vfio/spapr.c
108
hw/vfio/spapr.c
@ -24,6 +24,12 @@
|
||||
#include "qapi/error.h"
|
||||
#include "trace.h"
|
||||
|
||||
typedef struct VFIOSpaprContainer {
|
||||
VFIOContainer container;
|
||||
MemoryListener prereg_listener;
|
||||
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||
} VFIOSpaprContainer;
|
||||
|
||||
static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section)
|
||||
{
|
||||
if (memory_region_is_iommu(section->mr)) {
|
||||
@ -44,8 +50,10 @@ static void *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa)
|
||||
static void vfio_prereg_listener_region_add(MemoryListener *listener,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
VFIOContainer *container = container_of(listener, VFIOContainer,
|
||||
prereg_listener);
|
||||
VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer,
|
||||
prereg_listener);
|
||||
VFIOContainer *container = &scontainer->container;
|
||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
const hwaddr gpa = section->offset_within_address_space;
|
||||
hwaddr end;
|
||||
int ret;
|
||||
@ -88,9 +96,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener,
|
||||
* can gracefully fail. Runtime, there's not much we can do other
|
||||
* than throw a hardware error.
|
||||
*/
|
||||
if (!container->initialized) {
|
||||
if (!container->error) {
|
||||
error_setg_errno(&container->error, -ret,
|
||||
if (!bcontainer->initialized) {
|
||||
if (!bcontainer->error) {
|
||||
error_setg_errno(&bcontainer->error, -ret,
|
||||
"Memory registering failed");
|
||||
}
|
||||
} else {
|
||||
@ -102,8 +110,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener,
|
||||
static void vfio_prereg_listener_region_del(MemoryListener *listener,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
VFIOContainer *container = container_of(listener, VFIOContainer,
|
||||
prereg_listener);
|
||||
VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer,
|
||||
prereg_listener);
|
||||
VFIOContainer *container = &scontainer->container;
|
||||
const hwaddr gpa = section->offset_within_address_space;
|
||||
hwaddr end;
|
||||
int ret;
|
||||
@ -146,12 +155,12 @@ static const MemoryListener vfio_prereg_listener = {
|
||||
.region_del = vfio_prereg_listener_region_del,
|
||||
};
|
||||
|
||||
static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova,
|
||||
static void vfio_host_win_add(VFIOSpaprContainer *scontainer, hwaddr min_iova,
|
||||
hwaddr max_iova, uint64_t iova_pgsizes)
|
||||
{
|
||||
VFIOHostDMAWindow *hostwin;
|
||||
|
||||
QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
|
||||
QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) {
|
||||
if (ranges_overlap(hostwin->min_iova,
|
||||
hostwin->max_iova - hostwin->min_iova + 1,
|
||||
min_iova,
|
||||
@ -165,15 +174,15 @@ static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova,
|
||||
hostwin->min_iova = min_iova;
|
||||
hostwin->max_iova = max_iova;
|
||||
hostwin->iova_pgsizes = iova_pgsizes;
|
||||
QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
|
||||
QLIST_INSERT_HEAD(&scontainer->hostwin_list, hostwin, hostwin_next);
|
||||
}
|
||||
|
||||
static int vfio_host_win_del(VFIOContainer *container,
|
||||
static int vfio_host_win_del(VFIOSpaprContainer *scontainer,
|
||||
hwaddr min_iova, hwaddr max_iova)
|
||||
{
|
||||
VFIOHostDMAWindow *hostwin;
|
||||
|
||||
QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
|
||||
QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) {
|
||||
if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
|
||||
QLIST_REMOVE(hostwin, hostwin_next);
|
||||
g_free(hostwin);
|
||||
@ -184,7 +193,7 @@ static int vfio_host_win_del(VFIOContainer *container,
|
||||
return -1;
|
||||
}
|
||||
|
||||
static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container,
|
||||
static VFIOHostDMAWindow *vfio_find_hostwin(VFIOSpaprContainer *container,
|
||||
hwaddr iova, hwaddr end)
|
||||
{
|
||||
VFIOHostDMAWindow *hostwin;
|
||||
@ -226,6 +235,7 @@ static int vfio_spapr_create_window(VFIOContainer *container,
|
||||
hwaddr *pgsize)
|
||||
{
|
||||
int ret = 0;
|
||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
|
||||
uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask;
|
||||
unsigned entries, bits_total, bits_per_level, max_levels;
|
||||
@ -239,13 +249,13 @@ static int vfio_spapr_create_window(VFIOContainer *container,
|
||||
if (pagesize > rampagesize) {
|
||||
pagesize = rampagesize;
|
||||
}
|
||||
pgmask = container->pgsizes & (pagesize | (pagesize - 1));
|
||||
pgmask = bcontainer->pgsizes & (pagesize | (pagesize - 1));
|
||||
pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0;
|
||||
if (!pagesize) {
|
||||
error_report("Host doesn't support page size 0x%"PRIx64
|
||||
", the supported mask is 0x%lx",
|
||||
memory_region_iommu_get_min_page_size(iommu_mr),
|
||||
container->pgsizes);
|
||||
bcontainer->pgsizes);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -313,10 +323,15 @@ static int vfio_spapr_create_window(VFIOContainer *container,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vfio_container_add_section_window(VFIOContainer *container,
|
||||
MemoryRegionSection *section,
|
||||
Error **errp)
|
||||
static int
|
||||
vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section,
|
||||
Error **errp)
|
||||
{
|
||||
VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||
bcontainer);
|
||||
VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||
container);
|
||||
VFIOHostDMAWindow *hostwin;
|
||||
hwaddr pgsize = 0;
|
||||
int ret;
|
||||
@ -332,7 +347,7 @@ int vfio_container_add_section_window(VFIOContainer *container,
|
||||
iova = section->offset_within_address_space;
|
||||
end = iova + int128_get64(section->size) - 1;
|
||||
|
||||
if (!vfio_find_hostwin(container, iova, end)) {
|
||||
if (!vfio_find_hostwin(scontainer, iova, end)) {
|
||||
error_setg(errp, "Container %p can't map guest IOVA region"
|
||||
" 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container,
|
||||
iova, end);
|
||||
@ -346,7 +361,7 @@ int vfio_container_add_section_window(VFIOContainer *container,
|
||||
}
|
||||
|
||||
/* For now intersections are not allowed, we may relax this later */
|
||||
QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
|
||||
QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) {
|
||||
if (ranges_overlap(hostwin->min_iova,
|
||||
hostwin->max_iova - hostwin->min_iova + 1,
|
||||
section->offset_within_address_space,
|
||||
@ -368,7 +383,7 @@ int vfio_container_add_section_window(VFIOContainer *container,
|
||||
return ret;
|
||||
}
|
||||
|
||||
vfio_host_win_add(container, section->offset_within_address_space,
|
||||
vfio_host_win_add(scontainer, section->offset_within_address_space,
|
||||
section->offset_within_address_space +
|
||||
int128_get64(section->size) - 1, pgsize);
|
||||
#ifdef CONFIG_KVM
|
||||
@ -401,16 +416,22 @@ int vfio_container_add_section_window(VFIOContainer *container,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vfio_container_del_section_window(VFIOContainer *container,
|
||||
MemoryRegionSection *section)
|
||||
static void
|
||||
vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||
bcontainer);
|
||||
VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||
container);
|
||||
|
||||
if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
|
||||
return;
|
||||
}
|
||||
|
||||
vfio_spapr_remove_window(container,
|
||||
section->offset_within_address_space);
|
||||
if (vfio_host_win_del(container,
|
||||
if (vfio_host_win_del(scontainer,
|
||||
section->offset_within_address_space,
|
||||
section->offset_within_address_space +
|
||||
int128_get64(section->size) - 1) < 0) {
|
||||
@ -419,13 +440,26 @@ void vfio_container_del_section_window(VFIOContainer *container,
|
||||
}
|
||||
}
|
||||
|
||||
static VFIOIOMMUOps vfio_iommu_spapr_ops;
|
||||
|
||||
static void setup_spapr_ops(VFIOContainerBase *bcontainer)
|
||||
{
|
||||
vfio_iommu_spapr_ops = *bcontainer->ops;
|
||||
vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window;
|
||||
vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window;
|
||||
bcontainer->ops = &vfio_iommu_spapr_ops;
|
||||
}
|
||||
|
||||
int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||
{
|
||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||
container);
|
||||
struct vfio_iommu_spapr_tce_info info;
|
||||
bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
|
||||
int ret, fd = container->fd;
|
||||
|
||||
QLIST_INIT(&container->hostwin_list);
|
||||
QLIST_INIT(&scontainer->hostwin_list);
|
||||
|
||||
/*
|
||||
* The host kernel code implementing VFIO_IOMMU_DISABLE is called
|
||||
@ -439,13 +473,13 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||
return -errno;
|
||||
}
|
||||
} else {
|
||||
container->prereg_listener = vfio_prereg_listener;
|
||||
scontainer->prereg_listener = vfio_prereg_listener;
|
||||
|
||||
memory_listener_register(&container->prereg_listener,
|
||||
memory_listener_register(&scontainer->prereg_listener,
|
||||
&address_space_memory);
|
||||
if (container->error) {
|
||||
if (bcontainer->error) {
|
||||
ret = -1;
|
||||
error_propagate_prepend(errp, container->error,
|
||||
error_propagate_prepend(errp, bcontainer->error,
|
||||
"RAM memory listener initialization failed: ");
|
||||
goto listener_unregister_exit;
|
||||
}
|
||||
@ -461,7 +495,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||
}
|
||||
|
||||
if (v2) {
|
||||
container->pgsizes = info.ddw.pgsizes;
|
||||
bcontainer->pgsizes = info.ddw.pgsizes;
|
||||
/*
|
||||
* There is a default window in just created container.
|
||||
* To make region_add/del simpler, we better remove this
|
||||
@ -476,30 +510,34 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||
}
|
||||
} else {
|
||||
/* The default table uses 4K pages */
|
||||
container->pgsizes = 0x1000;
|
||||
vfio_host_win_add(container, info.dma32_window_start,
|
||||
bcontainer->pgsizes = 0x1000;
|
||||
vfio_host_win_add(scontainer, info.dma32_window_start,
|
||||
info.dma32_window_start +
|
||||
info.dma32_window_size - 1,
|
||||
0x1000);
|
||||
}
|
||||
|
||||
setup_spapr_ops(bcontainer);
|
||||
|
||||
return 0;
|
||||
|
||||
listener_unregister_exit:
|
||||
if (v2) {
|
||||
memory_listener_unregister(&container->prereg_listener);
|
||||
memory_listener_unregister(&scontainer->prereg_listener);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void vfio_spapr_container_deinit(VFIOContainer *container)
|
||||
{
|
||||
VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||
container);
|
||||
VFIOHostDMAWindow *hostwin, *next;
|
||||
|
||||
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
|
||||
memory_listener_unregister(&container->prereg_listener);
|
||||
memory_listener_unregister(&scontainer->prereg_listener);
|
||||
}
|
||||
QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
|
||||
QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next,
|
||||
next) {
|
||||
QLIST_REMOVE(hostwin, hostwin_next);
|
||||
g_free(hostwin);
|
||||
|
@ -116,8 +116,8 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re
|
||||
vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
|
||||
vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
|
||||
vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x"
|
||||
vfio_dma_unmap_overflow_workaround(void) ""
|
||||
vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64
|
||||
vfio_legacy_dma_unmap_overflow_workaround(void) ""
|
||||
vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64
|
||||
vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
|
||||
|
||||
# platform.c
|
||||
@ -164,3 +164,14 @@ vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcop
|
||||
vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
|
||||
vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
|
||||
vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
|
||||
|
||||
#iommufd.c
|
||||
|
||||
iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d"
|
||||
iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)"
|
||||
iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d"
|
||||
iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Successfully detached %s"
|
||||
iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
|
||||
iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d"
|
||||
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
|
||||
iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <linux/vfio.h>
|
||||
#endif
|
||||
#include "sysemu/sysemu.h"
|
||||
#include "hw/vfio/vfio-container-base.h"
|
||||
|
||||
#define VFIO_MSG_PREFIX "vfio %s: "
|
||||
|
||||
@ -72,54 +73,15 @@ typedef struct VFIOMigration {
|
||||
bool initial_data_sent;
|
||||
} VFIOMigration;
|
||||
|
||||
typedef struct VFIOAddressSpace {
|
||||
AddressSpace *as;
|
||||
QLIST_HEAD(, VFIOContainer) containers;
|
||||
QLIST_ENTRY(VFIOAddressSpace) list;
|
||||
} VFIOAddressSpace;
|
||||
|
||||
struct VFIOGroup;
|
||||
|
||||
typedef struct VFIOContainer {
|
||||
VFIOAddressSpace *space;
|
||||
VFIOContainerBase bcontainer;
|
||||
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
|
||||
MemoryListener listener;
|
||||
MemoryListener prereg_listener;
|
||||
unsigned iommu_type;
|
||||
Error *error;
|
||||
bool initialized;
|
||||
bool dirty_pages_supported;
|
||||
uint64_t dirty_pgsizes;
|
||||
uint64_t max_dirty_bitmap_size;
|
||||
unsigned long pgsizes;
|
||||
unsigned int dma_max_mappings;
|
||||
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||
QLIST_HEAD(, VFIOGroup) group_list;
|
||||
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
|
||||
QLIST_ENTRY(VFIOContainer) next;
|
||||
QLIST_HEAD(, VFIODevice) device_list;
|
||||
GList *iova_ranges;
|
||||
} VFIOContainer;
|
||||
|
||||
typedef struct VFIOGuestIOMMU {
|
||||
VFIOContainer *container;
|
||||
IOMMUMemoryRegion *iommu_mr;
|
||||
hwaddr iommu_offset;
|
||||
IOMMUNotifier n;
|
||||
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
|
||||
} VFIOGuestIOMMU;
|
||||
|
||||
typedef struct VFIORamDiscardListener {
|
||||
VFIOContainer *container;
|
||||
MemoryRegion *mr;
|
||||
hwaddr offset_within_address_space;
|
||||
hwaddr size;
|
||||
uint64_t granularity;
|
||||
RamDiscardListener listener;
|
||||
QLIST_ENTRY(VFIORamDiscardListener) next;
|
||||
} VFIORamDiscardListener;
|
||||
|
||||
typedef struct VFIOHostDMAWindow {
|
||||
hwaddr min_iova;
|
||||
hwaddr max_iova;
|
||||
@ -127,6 +89,14 @@ typedef struct VFIOHostDMAWindow {
|
||||
QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
|
||||
} VFIOHostDMAWindow;
|
||||
|
||||
typedef struct IOMMUFDBackend IOMMUFDBackend;
|
||||
|
||||
typedef struct VFIOIOMMUFDContainer {
|
||||
VFIOContainerBase bcontainer;
|
||||
IOMMUFDBackend *be;
|
||||
uint32_t ioas_id;
|
||||
} VFIOIOMMUFDContainer;
|
||||
|
||||
typedef struct VFIODeviceOps VFIODeviceOps;
|
||||
|
||||
typedef struct VFIODevice {
|
||||
@ -134,7 +104,7 @@ typedef struct VFIODevice {
|
||||
QLIST_ENTRY(VFIODevice) container_next;
|
||||
QLIST_ENTRY(VFIODevice) global_next;
|
||||
struct VFIOGroup *group;
|
||||
VFIOContainer *container;
|
||||
VFIOContainerBase *bcontainer;
|
||||
char *sysfsdev;
|
||||
char *name;
|
||||
DeviceState *dev;
|
||||
@ -154,6 +124,8 @@ typedef struct VFIODevice {
|
||||
OnOffAuto pre_copy_dirty_page_tracking;
|
||||
bool dirty_pages_supported;
|
||||
bool dirty_tracking;
|
||||
int devid;
|
||||
IOMMUFDBackend *iommufd;
|
||||
} VFIODevice;
|
||||
|
||||
struct VFIODeviceOps {
|
||||
@ -201,31 +173,10 @@ typedef struct VFIODisplay {
|
||||
} dmabuf;
|
||||
} VFIODisplay;
|
||||
|
||||
typedef struct {
|
||||
unsigned long *bitmap;
|
||||
hwaddr size;
|
||||
hwaddr pages;
|
||||
} VFIOBitmap;
|
||||
|
||||
VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
|
||||
void vfio_put_address_space(VFIOAddressSpace *space);
|
||||
bool vfio_devices_all_running_and_saving(VFIOContainer *container);
|
||||
|
||||
/* container->fd */
|
||||
int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
|
||||
ram_addr_t size, IOMMUTLBEntry *iotlb);
|
||||
int vfio_dma_map(VFIOContainer *container, hwaddr iova,
|
||||
ram_addr_t size, void *vaddr, bool readonly);
|
||||
int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start);
|
||||
int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
|
||||
hwaddr iova, hwaddr size);
|
||||
|
||||
/* SPAPR specific */
|
||||
int vfio_container_add_section_window(VFIOContainer *container,
|
||||
MemoryRegionSection *section,
|
||||
Error **errp);
|
||||
void vfio_container_del_section_window(VFIOContainer *container,
|
||||
MemoryRegionSection *section);
|
||||
int vfio_spapr_container_init(VFIOContainer *container, Error **errp);
|
||||
void vfio_spapr_container_deinit(VFIOContainer *container);
|
||||
|
||||
@ -259,7 +210,8 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
|
||||
typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList;
|
||||
extern VFIOGroupList vfio_group_list;
|
||||
extern VFIODeviceList vfio_device_list;
|
||||
|
||||
extern const VFIOIOMMUOps vfio_legacy_ops;
|
||||
extern const VFIOIOMMUOps vfio_iommufd_ops;
|
||||
extern const MemoryListener vfio_memory_listener;
|
||||
extern int vfio_kvm_device_fd;
|
||||
|
||||
@ -292,11 +244,19 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
|
||||
void vfio_migration_exit(VFIODevice *vbasedev);
|
||||
|
||||
int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size);
|
||||
bool vfio_devices_all_running_and_mig_active(VFIOContainer *container);
|
||||
bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container);
|
||||
int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
|
||||
bool
|
||||
vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer);
|
||||
bool
|
||||
vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer);
|
||||
int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||
VFIOBitmap *vbmap, hwaddr iova,
|
||||
hwaddr size);
|
||||
int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||
uint64_t size, ram_addr_t ram_addr);
|
||||
int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
|
||||
uint64_t size, ram_addr_t ram_addr);
|
||||
|
||||
/* Returns 0 on success, or a negative errno. */
|
||||
int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
|
||||
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
|
||||
void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
|
||||
DeviceState *dev, bool ram_discard);
|
||||
#endif /* HW_VFIO_VFIO_COMMON_H */
|
||||
|
121
include/hw/vfio/vfio-container-base.h
Normal file
121
include/hw/vfio/vfio-container-base.h
Normal file
@ -0,0 +1,121 @@
|
||||
/*
|
||||
* VFIO BASE CONTAINER
|
||||
*
|
||||
* Copyright (C) 2023 Intel Corporation.
|
||||
* Copyright Red Hat, Inc. 2023
|
||||
*
|
||||
* Authors: Yi Liu <yi.l.liu@intel.com>
|
||||
* Eric Auger <eric.auger@redhat.com>
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
*/
|
||||
|
||||
#ifndef HW_VFIO_VFIO_CONTAINER_BASE_H
|
||||
#define HW_VFIO_VFIO_CONTAINER_BASE_H
|
||||
|
||||
#include "exec/memory.h"
|
||||
|
||||
typedef struct VFIODevice VFIODevice;
|
||||
typedef struct VFIOIOMMUOps VFIOIOMMUOps;
|
||||
|
||||
typedef struct {
|
||||
unsigned long *bitmap;
|
||||
hwaddr size;
|
||||
hwaddr pages;
|
||||
} VFIOBitmap;
|
||||
|
||||
typedef struct VFIOAddressSpace {
|
||||
AddressSpace *as;
|
||||
QLIST_HEAD(, VFIOContainerBase) containers;
|
||||
QLIST_ENTRY(VFIOAddressSpace) list;
|
||||
} VFIOAddressSpace;
|
||||
|
||||
/*
|
||||
* This is the base object for vfio container backends
|
||||
*/
|
||||
typedef struct VFIOContainerBase {
|
||||
const VFIOIOMMUOps *ops;
|
||||
VFIOAddressSpace *space;
|
||||
MemoryListener listener;
|
||||
Error *error;
|
||||
bool initialized;
|
||||
uint64_t dirty_pgsizes;
|
||||
uint64_t max_dirty_bitmap_size;
|
||||
unsigned long pgsizes;
|
||||
unsigned int dma_max_mappings;
|
||||
bool dirty_pages_supported;
|
||||
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
|
||||
QLIST_ENTRY(VFIOContainerBase) next;
|
||||
QLIST_HEAD(, VFIODevice) device_list;
|
||||
GList *iova_ranges;
|
||||
} VFIOContainerBase;
|
||||
|
||||
typedef struct VFIOGuestIOMMU {
|
||||
VFIOContainerBase *bcontainer;
|
||||
IOMMUMemoryRegion *iommu_mr;
|
||||
hwaddr iommu_offset;
|
||||
IOMMUNotifier n;
|
||||
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
|
||||
} VFIOGuestIOMMU;
|
||||
|
||||
typedef struct VFIORamDiscardListener {
|
||||
VFIOContainerBase *bcontainer;
|
||||
MemoryRegion *mr;
|
||||
hwaddr offset_within_address_space;
|
||||
hwaddr size;
|
||||
uint64_t granularity;
|
||||
RamDiscardListener listener;
|
||||
QLIST_ENTRY(VFIORamDiscardListener) next;
|
||||
} VFIORamDiscardListener;
|
||||
|
||||
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
void *vaddr, bool readonly);
|
||||
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
IOMMUTLBEntry *iotlb);
|
||||
int vfio_container_add_section_window(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section,
|
||||
Error **errp);
|
||||
void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section);
|
||||
int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||
bool start);
|
||||
int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||
VFIOBitmap *vbmap,
|
||||
hwaddr iova, hwaddr size);
|
||||
|
||||
void vfio_container_init(VFIOContainerBase *bcontainer,
|
||||
VFIOAddressSpace *space,
|
||||
const VFIOIOMMUOps *ops);
|
||||
void vfio_container_destroy(VFIOContainerBase *bcontainer);
|
||||
|
||||
struct VFIOIOMMUOps {
|
||||
/* basic feature */
|
||||
int (*dma_map)(const VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
void *vaddr, bool readonly);
|
||||
int (*dma_unmap)(const VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
IOMMUTLBEntry *iotlb);
|
||||
int (*attach_device)(const char *name, VFIODevice *vbasedev,
|
||||
AddressSpace *as, Error **errp);
|
||||
void (*detach_device)(VFIODevice *vbasedev);
|
||||
/* migration feature */
|
||||
int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
|
||||
bool start);
|
||||
int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
|
||||
VFIOBitmap *vbmap,
|
||||
hwaddr iova, hwaddr size);
|
||||
/* PCI specific */
|
||||
int (*pci_hot_reset)(VFIODevice *vbasedev, bool single);
|
||||
|
||||
/* SPAPR specific */
|
||||
int (*add_window)(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section,
|
||||
Error **errp);
|
||||
void (*del_window)(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section);
|
||||
};
|
||||
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
|
16
include/qemu/chardev_open.h
Normal file
16
include/qemu/chardev_open.h
Normal file
@ -0,0 +1,16 @@
|
||||
/*
|
||||
* QEMU Chardev Helper
|
||||
*
|
||||
* Copyright (C) 2023 Intel Corporation.
|
||||
*
|
||||
* Authors: Yi Liu <yi.l.liu@intel.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef QEMU_CHARDEV_OPEN_H
|
||||
#define QEMU_CHARDEV_OPEN_H
|
||||
|
||||
int open_cdev(const char *devpath, dev_t cdev);
|
||||
#endif
|
38
include/sysemu/iommufd.h
Normal file
38
include/sysemu/iommufd.h
Normal file
@ -0,0 +1,38 @@
|
||||
#ifndef SYSEMU_IOMMUFD_H
|
||||
#define SYSEMU_IOMMUFD_H
|
||||
|
||||
#include "qom/object.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "exec/hwaddr.h"
|
||||
#include "exec/cpu-common.h"
|
||||
|
||||
#define TYPE_IOMMUFD_BACKEND "iommufd"
|
||||
OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
|
||||
|
||||
struct IOMMUFDBackendClass {
|
||||
ObjectClass parent_class;
|
||||
};
|
||||
|
||||
struct IOMMUFDBackend {
|
||||
Object parent;
|
||||
|
||||
/*< protected >*/
|
||||
int fd; /* /dev/iommu file descriptor */
|
||||
bool owned; /* is the /dev/iommu opened internally */
|
||||
QemuMutex lock;
|
||||
uint32_t users;
|
||||
|
||||
/*< public >*/
|
||||
};
|
||||
|
||||
int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
|
||||
void iommufd_backend_disconnect(IOMMUFDBackend *be);
|
||||
|
||||
int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
Error **errp);
|
||||
void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
|
||||
int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||
ram_addr_t size, void *vaddr, bool readonly);
|
||||
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
hwaddr iova, ram_addr_t size);
|
||||
#endif
|
@ -794,6 +794,23 @@
|
||||
{ 'struct': 'VfioUserServerProperties',
|
||||
'data': { 'socket': 'SocketAddress', 'device': 'str' } }
|
||||
|
||||
##
|
||||
# @IOMMUFDProperties:
|
||||
#
|
||||
# Properties for iommufd objects.
|
||||
#
|
||||
# @fd: file descriptor name previously passed via 'getfd' command,
|
||||
# which represents a pre-opened /dev/iommu. This allows the
|
||||
# iommufd object to be shared accross several subsystems
|
||||
# (VFIO, VDPA, ...), and the file descriptor to be shared
|
||||
# with other process, e.g. DPDK. (default: QEMU opens
|
||||
# /dev/iommu by itself)
|
||||
#
|
||||
# Since: 9.0
|
||||
##
|
||||
{ 'struct': 'IOMMUFDProperties',
|
||||
'data': { '*fd': 'str' } }
|
||||
|
||||
##
|
||||
# @RngProperties:
|
||||
#
|
||||
@ -934,6 +951,7 @@
|
||||
'input-barrier',
|
||||
{ 'name': 'input-linux',
|
||||
'if': 'CONFIG_LINUX' },
|
||||
'iommufd',
|
||||
'iothread',
|
||||
'main-loop',
|
||||
{ 'name': 'memory-backend-epc',
|
||||
@ -1003,6 +1021,7 @@
|
||||
'input-barrier': 'InputBarrierProperties',
|
||||
'input-linux': { 'type': 'InputLinuxProperties',
|
||||
'if': 'CONFIG_LINUX' },
|
||||
'iommufd': 'IOMMUFDProperties',
|
||||
'iothread': 'IothreadProperties',
|
||||
'main-loop': 'MainLoopProperties',
|
||||
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
|
||||
|
@ -5224,6 +5224,18 @@ SRST
|
||||
|
||||
The ``share`` boolean option is on by default with memfd.
|
||||
|
||||
``-object iommufd,id=id[,fd=fd]``
|
||||
Creates an iommufd backend which allows control of DMA mapping
|
||||
through the ``/dev/iommu`` device.
|
||||
|
||||
The ``id`` parameter is a unique ID which frontends (such as
|
||||
vfio-pci of vdpa) will use to connect with the iommufd backend.
|
||||
|
||||
The ``fd`` parameter is an optional pre-opened file descriptor
|
||||
resulting from ``/dev/iommu`` opening. Usually the iommufd is shared
|
||||
across all subsystems, bringing the benefit of centralized
|
||||
reference counting.
|
||||
|
||||
``-object rng-builtin,id=id``
|
||||
Creates a random number generator backend which obtains entropy
|
||||
from QEMU builtin functions. The ``id`` parameter is a unique ID
|
||||
|
81
util/chardev_open.c
Normal file
81
util/chardev_open.c
Normal file
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Mellanox Technologies. All rights reserved.
|
||||
* Copyright (C) 2023 Intel Corporation.
|
||||
*
|
||||
* This software is available to you under a choice of one of two
|
||||
* licenses. You may choose to be licensed under the terms of the GNU
|
||||
* General Public License (GPL) Version 2, available from the file
|
||||
* COPYING in the main directory of this source tree, or the
|
||||
* OpenIB.org BSD license below:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors: Yi Liu <yi.l.liu@intel.com>
|
||||
*
|
||||
* Copied from
|
||||
* https://github.com/linux-rdma/rdma-core/blob/master/util/open_cdev.c
|
||||
*
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/chardev_open.h"
|
||||
|
||||
static int open_cdev_internal(const char *path, dev_t cdev)
|
||||
{
|
||||
struct stat st;
|
||||
int fd;
|
||||
|
||||
fd = qemu_open_old(path, O_RDWR);
|
||||
if (fd == -1) {
|
||||
return -1;
|
||||
}
|
||||
if (fstat(fd, &st) || !S_ISCHR(st.st_mode) ||
|
||||
(cdev != 0 && st.st_rdev != cdev)) {
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
|
||||
static int open_cdev_robust(dev_t cdev)
|
||||
{
|
||||
g_autofree char *devpath = NULL;
|
||||
|
||||
/*
|
||||
* This assumes that udev is being used and is creating the /dev/char/
|
||||
* symlinks.
|
||||
*/
|
||||
devpath = g_strdup_printf("/dev/char/%u:%u", major(cdev), minor(cdev));
|
||||
return open_cdev_internal(devpath, cdev);
|
||||
}
|
||||
|
||||
int open_cdev(const char *devpath, dev_t cdev)
|
||||
{
|
||||
int fd;
|
||||
|
||||
fd = open_cdev_internal(devpath, cdev);
|
||||
if (fd == -1 && cdev != 0) {
|
||||
return open_cdev_robust(cdev);
|
||||
}
|
||||
return fd;
|
||||
}
|
@ -108,6 +108,7 @@ if have_block
|
||||
util_ss.add(files('filemonitor-stub.c'))
|
||||
endif
|
||||
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
|
||||
util_ss.add(when: 'CONFIG_LINUX', if_true: files('chardev_open.c'))
|
||||
endif
|
||||
|
||||
if cpu == 'aarch64'
|
||||
|
Loading…
Reference in New Issue
Block a user