VFIO updates 2016-06-30

- Fix VGA quirks (stable 2.6) (Alex Williamson)
  - Registering PCIe extended capabilities (Chen Fan)
  - Hide read-only SR-IOV capability from VM (Alex Williamson)
  - MemoryRegionIOMMUOps.notify_started/stopped (Alexey Kardashevskiy)
  - hw_error on intel_iommu notify_started  (Alex Williamson)
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.14 (GNU/Linux)
 
 iQIcBAABAgAGBQJXdXbzAAoJECObm247sIsi4BoP/1ecLOWSWul+c3+2iIU3XHCj
 tNbu5WglavycoQIEOkYiniD8aOasq3GXBJViFcfu6oXekmZglWMq0QMkwIB5VCzh
 A5qKr5w5CqJtvz2Pjz5zpQGmk3SICeCPyTeZJ9DNtq5qWC4OTWNgfOdJK08dcREA
 owpqE53p9G4rIPnFnTmsMi2BOvs4XpnpF9A2WstOemOPmHtFJdLVak3oKB6LxgMZ
 du+23n7pIWm7IDvmFzLtJxuXWvlPNqg6sfRrAp6glGGokxQUVcIBTLfAoFgiOYMy
 7lAIyB84aXHkEBC6HV4+xGROjqJkvP19hkGtG/XPgafjYn1jxkSDklMo3yXZddLv
 384N2O75hwD16XjLlzvVNgIP1AcJSe1jlihBIxJ/GyHUapf8lkWdHgivrAUSwlBp
 sczlPtK3PYeukSS7Bjc2Dgx+7lD0nO1m/5Bg9kEYawjZmeEVGP2G6gX6hOlkIdhU
 LMVlmyLywyG9XkkLWRjkL2CcwLq3+8Ite50Cl2CxDIkyIB4cYkLCbtSYwBp7Eo6L
 Cwb/8/dPh0AGYrP3izHveG9U6wHTnz6f5VHLoAqWHtvUkk8M64dP3Xt9qkgmuRSc
 vi4v6JyVmP4Di5+M3O7BPmb2qk4+5DiCI12WI5Mhxs0FoWxzpftzL+ky2EmBd7o3
 Mr/wzN97Q+D7fXCJNrP1
 =bvAn
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20160630.0' into staging

VFIO updates 2016-06-30

 - Fix VGA quirks (stable 2.6) (Alex Williamson)
 - Registering PCIe extended capabilities (Chen Fan)
 - Hide read-only SR-IOV capability from VM (Alex Williamson)
 - MemoryRegionIOMMUOps.notify_started/stopped (Alexey Kardashevskiy)
 - hw_error on intel_iommu notify_started  (Alex Williamson)

# gpg: Signature made Thu 30 Jun 2016 20:45:55 BST
# gpg:                using RSA key 0x239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-update-20160630.0:
  intel_iommu: Throw hw_error on notify_started
  memory: Add MemoryRegionIOMMUOps.notify_started/stopped callbacks
  vfio/pci: Hide SR-IOV capability
  vfio: add pcie extended capability support
  vfio/pci: Fix VGA quirks

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2016-07-01 11:52:14 +01:00
commit 94e31093ff
8 changed files with 136 additions and 10 deletions

View File

@ -24,6 +24,7 @@
#include "exec/address-spaces.h"
#include "intel_iommu_internal.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
/*#define DEBUG_INTEL_IOMMU*/
#ifdef DEBUG_INTEL_IOMMU
@ -1871,6 +1872,16 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
return ret;
}
static void vtd_iommu_notify_started(MemoryRegion *iommu)
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
hw_error("Device at bus %s addr %02x.%d requires iommu notifier which "
"is currently not supported by intel-iommu emulation",
vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn));
}
static const VMStateDescription vtd_vmstate = {
.name = "iommu-intel",
.unmigratable = 1,
@ -1938,6 +1949,7 @@ static void vtd_init(IntelIOMMUState *s)
memset(s->womask, 0, DMAR_REG_SIZE);
s->iommu_ops.translate = vtd_iommu_translate;
s->iommu_ops.notify_started = vtd_iommu_notify_started;
s->root = 0;
s->root_extended = false;
s->dmar_enabled = false;

View File

@ -455,7 +455,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
if (giommu->iommu == section->mr) {
memory_region_unregister_iommu_notifier(&giommu->n);
memory_region_unregister_iommu_notifier(giommu->iommu,
&giommu->n);
QLIST_REMOVE(giommu, giommu_next);
g_free(giommu);
break;
@ -991,7 +992,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
QLIST_REMOVE(container, next);
QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
memory_region_unregister_iommu_notifier(&giommu->n);
memory_region_unregister_iommu_notifier(giommu->iommu, &giommu->n);
QLIST_REMOVE(giommu, giommu_next);
g_free(giommu);
}

View File

@ -318,7 +318,7 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
/* This windows doesn't seem to be used except by legacy VGA code */
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
!vdev->has_vga || nr != 4) {
!vdev->vga || nr != 4) {
return;
}
@ -366,7 +366,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
/* Only enable on newer devices where BAR2 is 64bit */
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64) {
!vdev->vga || nr != 2 || !vdev->bars[2].mem64) {
return;
}
@ -660,7 +660,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
VFIOConfigWindowQuirk *window;
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
!vdev->has_vga || nr != 5) {
!vdev->vga || nr != 5) {
return;
}
@ -776,7 +776,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
/* The 0x1800 offset mirror only seems to get used by legacy VGA */
if (vdev->has_vga) {
if (vdev->vga) {
quirk = g_malloc0(sizeof(*quirk));
mirror = quirk->data = g_malloc0(sizeof(*mirror));
mirror->mem = quirk->mem = g_new0(MemoryRegion, 1);

View File

@ -1502,6 +1502,21 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
return next - pos;
}
static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos)
{
uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE;
for (tmp = PCI_CONFIG_SPACE_SIZE; tmp;
tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) {
if (tmp > pos && tmp < next) {
next = tmp;
}
}
return next - pos;
}
static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
{
pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
@ -1749,16 +1764,100 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
return 0;
}
static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
uint32_t header;
uint16_t cap_id, next, size;
uint8_t cap_ver;
uint8_t *config;
/* Only add extended caps if we have them and the guest can see them */
if (!pci_is_express(pdev) || !pci_bus_is_express(pdev->bus) ||
!pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
return 0;
}
/*
* pcie_add_capability always inserts the new capability at the tail
* of the chain. Therefore to end up with a chain that matches the
* physical device, we cache the config space to avoid overwriting
* the original config space when we parse the extended capabilities.
*/
config = g_memdup(pdev->config, vdev->config_size);
/*
* Extended capabilities are chained with each pointing to the next, so we
* can drop anything other than the head of the chain simply by modifying
* the previous next pointer. For the head of the chain, we can modify the
* capability ID to something that cannot match a valid capability. ID
* 0 is reserved for this since absence of capabilities is indicated by
* 0 for the ID, version, AND next pointer. However, pcie_add_capability()
* uses ID 0 as reserved for list management and will incorrectly match and
* assert if we attempt to pre-load the head of the chain with with this
* ID. Use ID 0xFFFF temporarily since it is also seems to be reserved in
* part for identifying absence of capabilities in a root complex register
* block. If the ID still exists after adding capabilities, switch back to
* zero. We'll mark this entire first dword as emulated for this purpose.
*/
pci_set_long(pdev->config + PCI_CONFIG_SPACE_SIZE,
PCI_EXT_CAP(0xFFFF, 0, 0));
pci_set_long(pdev->wmask + PCI_CONFIG_SPACE_SIZE, 0);
pci_set_long(vdev->emulated_config_bits + PCI_CONFIG_SPACE_SIZE, ~0);
for (next = PCI_CONFIG_SPACE_SIZE; next;
next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
header = pci_get_long(config + next);
cap_id = PCI_EXT_CAP_ID(header);
cap_ver = PCI_EXT_CAP_VER(header);
/*
* If it becomes important to configure extended capabilities to their
* actual size, use this as the default when it's something we don't
* recognize. Since QEMU doesn't actually handle many of the config
* accesses, exact size doesn't seem worthwhile.
*/
size = vfio_ext_cap_max_size(config, next);
/* Use emulated next pointer to allow dropping extended caps */
pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
PCI_EXT_CAP_NEXT_MASK);
switch (cap_id) {
case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
break;
default:
pcie_add_capability(pdev, cap_id, cap_ver, next, size);
}
}
/* Cleanup chain head ID if necessary */
if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) {
pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0);
}
g_free(config);
return 0;
}
static int vfio_add_capabilities(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
int ret;
if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
!pdev->config[PCI_CAPABILITY_LIST]) {
return 0; /* Nothing to add */
}
return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
if (ret) {
return ret;
}
return vfio_add_ext_cap(vdev);
}
static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)

View File

@ -135,7 +135,6 @@ typedef struct VFIOPCIDevice {
int32_t bootindex;
uint32_t igd_gms;
uint8_t pm_cap;
bool has_vga;
bool pci_aer;
bool req_enabled;
bool has_flr;

View File

@ -37,6 +37,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
vfio_initfn(const char *name, int group_id) " (%s) group %d"
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s %x@%x"
vfio_pci_reset(const char *name) " (%s)"
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
vfio_pci_reset_pm(const char *name) "%s PCI PM Reset"

View File

@ -153,6 +153,10 @@ struct MemoryRegionIOMMUOps {
IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
/* Returns minimum supported page size */
uint64_t (*get_min_page_size)(MemoryRegion *iommu);
/* Called when the first notifier is set */
void (*notify_started)(MemoryRegion *iommu);
/* Called when the last notifier is removed */
void (*notify_stopped)(MemoryRegion *iommu);
};
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
@ -622,9 +626,11 @@ void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write);
* memory_region_unregister_iommu_notifier: unregister a notifier for
* changes to IOMMU translation entries.
*
* @mr: the memory region which was observed and for which notity_stopped()
* needs to be called
* @n: the notifier to be removed.
*/
void memory_region_unregister_iommu_notifier(Notifier *n);
void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n);
/**
* memory_region_name: get a memory region's name

View File

@ -1499,6 +1499,10 @@ bool memory_region_is_logging(MemoryRegion *mr, uint8_t client)
void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n)
{
if (mr->iommu_ops->notify_started &&
QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
mr->iommu_ops->notify_started(mr);
}
notifier_list_add(&mr->iommu_notify, n);
}
@ -1532,9 +1536,13 @@ void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write)
}
}
void memory_region_unregister_iommu_notifier(Notifier *n)
void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n)
{
notifier_remove(n);
if (mr->iommu_ops->notify_stopped &&
QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
mr->iommu_ops->notify_stopped(mr);
}
}
void memory_region_notify_iommu(MemoryRegion *mr,