vfio/pci: Introduce a vfio pci hot reset interface

Legacy vfio pci and iommufd cdev have different process to hot reset
vfio device, expand current code to abstract out pci_hot_reset callback
for legacy vfio, this same interface will also be used by iommufd
cdev vfio device.

Rename vfio_pci_hot_reset to vfio_legacy_pci_hot_reset and move it
into container.c.

vfio_pci_[pre/post]_reset and vfio_pci_host_match are exported so
they could be called in legacy and iommufd pci_hot_reset callback.

Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
This commit is contained in:
Zhenzhong Duan 2023-11-21 16:44:07 +08:00 committed by Cédric Le Goater
parent 4d36ec23a7
commit c328e7e8ad
4 changed files with 182 additions and 162 deletions

View File

@ -33,6 +33,7 @@
#include "trace.h"
#include "qapi/error.h"
#include "migration/migration.h"
#include "pci.h"
VFIOGroupList vfio_group_list =
QLIST_HEAD_INITIALIZER(vfio_group_list);
@ -922,6 +923,174 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev)
vfio_put_group(group);
}
static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
VFIOGroup *group;
struct vfio_pci_hot_reset_info *info = NULL;
struct vfio_pci_dependent_device *devices;
struct vfio_pci_hot_reset *reset;
int32_t *fds;
int ret, i, count;
bool multi = false;
trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
if (!single) {
vfio_pci_pre_reset(vdev);
}
vdev->vbasedev.needs_reset = false;
ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
if (ret) {
goto out_single;
}
devices = &info->devices[0];
trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
/* Verify that we have all the groups required */
for (i = 0; i < info->count; i++) {
PCIHostDeviceAddress host;
VFIOPCIDevice *tmp;
VFIODevice *vbasedev_iter;
host.domain = devices[i].segment;
host.bus = devices[i].bus;
host.slot = PCI_SLOT(devices[i].devfn);
host.function = PCI_FUNC(devices[i].devfn);
trace_vfio_pci_hot_reset_dep_devices(host.domain,
host.bus, host.slot, host.function, devices[i].group_id);
if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
continue;
}
QLIST_FOREACH(group, &vfio_group_list, next) {
if (group->groupid == devices[i].group_id) {
break;
}
}
if (!group) {
if (!vdev->has_pm_reset) {
error_report("vfio: Cannot reset device %s, "
"depends on group %d which is not owned.",
vdev->vbasedev.name, devices[i].group_id);
}
ret = -EPERM;
goto out;
}
/* Prep dependent devices for reset and clear our marker. */
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (!vbasedev_iter->dev->realized ||
vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
if (single) {
ret = -EINVAL;
goto out_single;
}
vfio_pci_pre_reset(tmp);
tmp->vbasedev.needs_reset = false;
multi = true;
break;
}
}
}
if (!single && !multi) {
ret = -EINVAL;
goto out_single;
}
/* Determine how many group fds need to be passed */
count = 0;
QLIST_FOREACH(group, &vfio_group_list, next) {
for (i = 0; i < info->count; i++) {
if (group->groupid == devices[i].group_id) {
count++;
break;
}
}
}
reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
fds = &reset->group_fds[0];
/* Fill in group fds */
QLIST_FOREACH(group, &vfio_group_list, next) {
for (i = 0; i < info->count; i++) {
if (group->groupid == devices[i].group_id) {
fds[reset->count++] = group->fd;
break;
}
}
}
/* Bus reset! */
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
g_free(reset);
if (ret) {
ret = -errno;
}
trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
ret ? strerror(errno) : "Success");
out:
/* Re-enable INTx on affected devices */
for (i = 0; i < info->count; i++) {
PCIHostDeviceAddress host;
VFIOPCIDevice *tmp;
VFIODevice *vbasedev_iter;
host.domain = devices[i].segment;
host.bus = devices[i].bus;
host.slot = PCI_SLOT(devices[i].devfn);
host.function = PCI_FUNC(devices[i].devfn);
if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
continue;
}
QLIST_FOREACH(group, &vfio_group_list, next) {
if (group->groupid == devices[i].group_id) {
break;
}
}
if (!group) {
break;
}
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (!vbasedev_iter->dev->realized ||
vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
vfio_pci_post_reset(tmp);
break;
}
}
}
out_single:
if (!single) {
vfio_pci_post_reset(vdev);
}
g_free(info);
return ret;
}
const VFIOIOMMUOps vfio_legacy_ops = {
.dma_map = vfio_legacy_dma_map,
.dma_unmap = vfio_legacy_dma_unmap,
@ -929,4 +1098,5 @@ const VFIOIOMMUOps vfio_legacy_ops = {
.detach_device = vfio_legacy_detach_device,
.set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking,
.query_dirty_bitmap = vfio_legacy_query_dirty_bitmap,
.pci_hot_reset = vfio_legacy_pci_hot_reset,
};

View File

@ -2374,7 +2374,7 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
return 0;
}
static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
uint16_t cmd;
@ -2411,7 +2411,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
}
static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
void vfio_pci_post_reset(VFIOPCIDevice *vdev)
{
Error *err = NULL;
int nr;
@ -2435,7 +2435,7 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
vfio_quirk_reset(vdev);
}
static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
{
char tmp[13];
@ -2485,166 +2485,10 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
{
VFIOGroup *group;
struct vfio_pci_hot_reset_info *info = NULL;
struct vfio_pci_dependent_device *devices;
struct vfio_pci_hot_reset *reset;
int32_t *fds;
int ret, i, count;
bool multi = false;
VFIODevice *vbasedev = &vdev->vbasedev;
const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops;
trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
if (!single) {
vfio_pci_pre_reset(vdev);
}
vdev->vbasedev.needs_reset = false;
ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
if (ret) {
goto out_single;
}
devices = &info->devices[0];
trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
/* Verify that we have all the groups required */
for (i = 0; i < info->count; i++) {
PCIHostDeviceAddress host;
VFIOPCIDevice *tmp;
VFIODevice *vbasedev_iter;
host.domain = devices[i].segment;
host.bus = devices[i].bus;
host.slot = PCI_SLOT(devices[i].devfn);
host.function = PCI_FUNC(devices[i].devfn);
trace_vfio_pci_hot_reset_dep_devices(host.domain,
host.bus, host.slot, host.function, devices[i].group_id);
if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
continue;
}
QLIST_FOREACH(group, &vfio_group_list, next) {
if (group->groupid == devices[i].group_id) {
break;
}
}
if (!group) {
if (!vdev->has_pm_reset) {
error_report("vfio: Cannot reset device %s, "
"depends on group %d which is not owned.",
vdev->vbasedev.name, devices[i].group_id);
}
ret = -EPERM;
goto out;
}
/* Prep dependent devices for reset and clear our marker. */
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (!vbasedev_iter->dev->realized ||
vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
if (single) {
ret = -EINVAL;
goto out_single;
}
vfio_pci_pre_reset(tmp);
tmp->vbasedev.needs_reset = false;
multi = true;
break;
}
}
}
if (!single && !multi) {
ret = -EINVAL;
goto out_single;
}
/* Determine how many group fds need to be passed */
count = 0;
QLIST_FOREACH(group, &vfio_group_list, next) {
for (i = 0; i < info->count; i++) {
if (group->groupid == devices[i].group_id) {
count++;
break;
}
}
}
reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
fds = &reset->group_fds[0];
/* Fill in group fds */
QLIST_FOREACH(group, &vfio_group_list, next) {
for (i = 0; i < info->count; i++) {
if (group->groupid == devices[i].group_id) {
fds[reset->count++] = group->fd;
break;
}
}
}
/* Bus reset! */
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
g_free(reset);
trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
ret ? strerror(errno) : "Success");
out:
/* Re-enable INTx on affected devices */
for (i = 0; i < info->count; i++) {
PCIHostDeviceAddress host;
VFIOPCIDevice *tmp;
VFIODevice *vbasedev_iter;
host.domain = devices[i].segment;
host.bus = devices[i].bus;
host.slot = PCI_SLOT(devices[i].devfn);
host.function = PCI_FUNC(devices[i].devfn);
if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
continue;
}
QLIST_FOREACH(group, &vfio_group_list, next) {
if (group->groupid == devices[i].group_id) {
break;
}
}
if (!group) {
break;
}
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (!vbasedev_iter->dev->realized ||
vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
vfio_pci_post_reset(tmp);
break;
}
}
}
out_single:
if (!single) {
vfio_pci_post_reset(vdev);
}
g_free(info);
return ret;
return ops->pci_hot_reset(vbasedev, single);
}
/*

View File

@ -218,6 +218,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
void vfio_pci_pre_reset(VFIOPCIDevice *vdev);
void vfio_pci_post_reset(VFIOPCIDevice *vdev);
bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name);
int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
struct vfio_pci_hot_reset_info **info_p);

View File

@ -106,6 +106,9 @@ struct VFIOIOMMUOps {
int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start);
int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap,
hwaddr iova, hwaddr size);
/* PCI specific */
int (*pci_hot_reset)(VFIODevice *vbasedev, bool single);
/* SPAPR specific */
int (*add_window)(VFIOContainerBase *bcontainer,
MemoryRegionSection *section,