From f36d4fb85f41604038386e1eb4295acd7d372d86 Mon Sep 17 00:00:00 2001 From: Kunkun Jiang Date: Wed, 27 Oct 2021 17:04:05 +0800 Subject: [PATCH 1/2] vfio/pci: Add support for mmapping sub-page MMIO BARs after live migration We can expand MemoryRegions of sub-page MMIO BARs in vfio_pci_write_config() to improve IO performance for some devices. However, the MemoryRegions of destination VM are not expanded any more after live migration. Because their addresses have been updated in vmstate_load_state() (vfio_pci_load_config) and vfio_sub_page_bar_update_mapping() will not be called. This may result in poor performance after live migration. So iterate BARs in vfio_pci_load_config() and try to update sub-page BARs. Reported-by: Nianyao Tang Reported-by: Qixin Gan Signed-off-by: Kunkun Jiang Link: https://lore.kernel.org/r/20211027090406.761-2-jiangkunkun@huawei.com Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 5cdf1d4298..7b45353ce2 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2453,7 +2453,12 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); PCIDevice *pdev = &vdev->pdev; - int ret; + pcibus_t old_addr[PCI_NUM_REGIONS - 1]; + int bar, ret; + + for (bar = 0; bar < PCI_ROM_SLOT; bar++) { + old_addr[bar] = pdev->io_regions[bar].addr; + } ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1); if (ret) { @@ -2463,6 +2468,18 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) vfio_pci_write_config(pdev, PCI_COMMAND, pci_get_word(pdev->config + PCI_COMMAND), 2); + for (bar = 0; bar < PCI_ROM_SLOT; bar++) { + /* + * The address may not be changed in some scenarios + * (e.g. the VF driver isn't loaded in VM). + */ + if (old_addr[bar] != pdev->io_regions[bar].addr && + vdev->bars[bar].region.size > 0 && + vdev->bars[bar].region.size < qemu_real_host_page_size) { + vfio_sub_page_bar_update_mapping(pdev, bar); + } + } + if (msi_enabled(pdev)) { vfio_msi_enable(vdev); } else if (msix_enabled(pdev)) { From e4b34708388b20f1ceb55f1d563d8da925a32424 Mon Sep 17 00:00:00 2001 From: Kunkun Jiang Date: Wed, 27 Oct 2021 17:04:06 +0800 Subject: [PATCH 2/2] vfio/common: Add a trace point when a MMIO RAM section cannot be mapped The MSI-X structures of some devices and other non-MSI-X structures may be in the same BAR. They may share one host page, especially in the case of large page granularity, such as 64K. For example, MSIX-Table size of 82599 NIC is 0x30 and the offset in Bar 3(size 64KB) is 0x0. vfio_listener_region_add() will be called to map the remaining range (0x30-0xffff). If host page size is 64KB, it will return early at 'int128_ge((int128_make64(iova), llend))' without any message. Let's add a trace point to inform users like commit 5c08600547c0 ("vfio: Use a trace point when a RAM section cannot be DMA mapped") did. Signed-off-by: Kunkun Jiang Link: https://lore.kernel.org/r/20211027090406.761-3-jiangkunkun@huawei.com Signed-off-by: Alex Williamson --- hw/vfio/common.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index a784b219e6..dd387b0d39 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -893,6 +893,13 @@ static void vfio_listener_region_add(MemoryListener *listener, llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); if (int128_ge(int128_make64(iova), llend)) { + if (memory_region_is_ram_device(section->mr)) { + trace_vfio_listener_region_add_no_dma_map( + memory_region_name(section->mr), + section->offset_within_address_space, + int128_getlo(section->size), + qemu_real_host_page_size); + } return; } end = int128_get64(int128_sub(llend, int128_one()));