ec132efaa8
NVIDIA V100 GPUs have on-board RAM which is mapped into the host memory space and accessible as normal RAM via an NVLink bus. The VFIO-PCI driver implements special regions for such GPUs and emulates an NVLink bridge. NVLink2-enabled POWER9 CPUs also provide address translation services which includes an ATS shootdown (ATSD) register exported via the NVLink bridge device. This adds a quirk to VFIO to map the GPU memory and create an MR; the new MR is stored in a PCI device as a QOM link. The sPAPR PCI uses this to get the MR and map it to the system address space. Another quirk does the same for ATSD. This adds additional steps to sPAPR PHB setup: 1. Search for specific GPUs and NPUs, collect findings in sPAPRPHBState::nvgpus, manage system address space mappings; 2. Add device-specific properties such as "ibm,npu", "ibm,gpu", "memory-block", "link-speed" to advertise the NVLink2 function to the guest; 3. Add "mmio-atsd" to vPHB to advertise the ATSD capability; 4. Add new memory blocks (with extra "linux,memory-usable" to prevent the guest OS from accessing the new memory until it is onlined) and npuphb# nodes representing an NPU unit for every vPHB as the GPU driver uses it for link discovery. This allocates space for GPU RAM and ATSD like we do for MMIOs by adding 2 new parameters to the phb_placement() hook. Older machine types set these to zero. This puts new memory nodes in a separate NUMA node to as the GPU RAM needs to be configured equally distant from any other node in the system. Unlike the host setup which assigns numa ids from 255 downwards, this adds new NUMA nodes after the user configures nodes or from 1 if none were configured. This adds requirement similar to EEH - one IOMMU group per vPHB. The reason for this is that ATSD registers belong to a physical NPU so they cannot invalidate translations on GPUs attached to another NPU. It is guaranteed by the host platform as it does not mix NVLink bridges or GPUs from different NPU in the same IOMMU group. If more than one IOMMU group is detected on a vPHB, this disables ATSD support for that vPHB and prints a warning. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> [aw: for vfio portions] Acked-by: Alex Williamson <alex.williamson@redhat.com> Message-Id: <20190312082103.130561-1-aik@ozlabs.ru> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
145 lines
11 KiB
Plaintext
145 lines
11 KiB
Plaintext
# See docs/devel/tracing.txt for syntax documentation.
|
|
|
|
# pci.c
|
|
vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c"
|
|
vfio_intx_eoi(const char *name) " (%s) EOI"
|
|
vfio_intx_enable_kvm(const char *name) " (%s) KVM INTx accel enabled"
|
|
vfio_intx_disable_kvm(const char *name) " (%s) KVM INTx accel disabled"
|
|
vfio_intx_update(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d"
|
|
vfio_intx_enable(const char *name) " (%s)"
|
|
vfio_intx_disable(const char *name) " (%s)"
|
|
vfio_msi_interrupt(const char *name, int index, uint64_t addr, int data) " (%s) vector %d 0x%"PRIx64"/0x%x"
|
|
vfio_msix_vector_do_use(const char *name, int index) " (%s) vector %d used"
|
|
vfio_msix_vector_release(const char *name, int index) " (%s) vector %d released"
|
|
vfio_msix_enable(const char *name) " (%s)"
|
|
vfio_msix_pba_disable(const char *name) " (%s)"
|
|
vfio_msix_pba_enable(const char *name) " (%s)"
|
|
vfio_msix_disable(const char *name) " (%s)"
|
|
vfio_msix_fixup(const char *name, int bar, uint64_t start, uint64_t end) " (%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]"
|
|
vfio_msix_relo(const char *name, int bar, uint64_t offset) " (%s) BAR %d offset 0x%"PRIx64""
|
|
vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors"
|
|
vfio_msi_disable(const char *name) " (%s)"
|
|
vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
|
|
vfio_rom_read(const char *name, uint64_t addr, int size, uint64_t data) " (%s, 0x%"PRIx64", 0x%x) = 0x%"PRIx64
|
|
vfio_pci_size_rom(const char *name, int size) "%s ROM size 0x%x"
|
|
vfio_vga_write(uint64_t addr, uint64_t data, int size) " (0x%"PRIx64", 0x%"PRIx64", %d)"
|
|
vfio_vga_read(uint64_t addr, int size, uint64_t data) " (0x%"PRIx64", %d) = 0x%"PRIx64
|
|
vfio_pci_read_config(const char *name, int addr, int len, int val) " (%s, @0x%x, len=0x%x) 0x%x"
|
|
vfio_pci_write_config(const char *name, int addr, int val, int len) " (%s, @0x%x, 0x%x, len=0x%x)"
|
|
vfio_msi_setup(const char *name, int pos) "%s PCI MSI CAP @0x%x"
|
|
vfio_msix_early_setup(const char *name, int pos, int table_bar, int offset, int entries) "%s PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d"
|
|
vfio_check_pcie_flr(const char *name) "%s Supports FLR via PCIe cap"
|
|
vfio_check_pm_reset(const char *name) "%s Supports PM reset"
|
|
vfio_check_af_flr(const char *name) "%s Supports FLR via AF cap"
|
|
vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s"
|
|
vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:"
|
|
vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d"
|
|
vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
|
|
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
|
|
vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
|
|
vfio_realize(const char *name, int group_id) " (%s) group %d"
|
|
vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
|
|
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
|
|
vfio_pci_reset(const char *name) " (%s)"
|
|
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
|
|
vfio_pci_reset_pm(const char *name) "%s PCI PM Reset"
|
|
vfio_pci_emulated_vendor_id(const char *name, uint16_t val) "%s 0x%04x"
|
|
vfio_pci_emulated_device_id(const char *name, uint16_t val) "%s 0x%04x"
|
|
vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t val) "%s 0x%04x"
|
|
vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s 0x%04x"
|
|
|
|
# pci-quirks.c
|
|
vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x"
|
|
vfio_quirk_generic_window_address_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64
|
|
vfio_quirk_generic_window_data_read(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64
|
|
vfio_quirk_generic_window_data_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64
|
|
vfio_quirk_generic_mirror_read(const char *name, const char * region_name, uint64_t addr, uint64_t data) "%s %s 0x%"PRIx64": 0x%"PRIx64
|
|
vfio_quirk_generic_mirror_write(const char *name, const char * region_name, uint64_t addr, uint64_t data) "%s %s 0x%"PRIx64": 0x%"PRIx64
|
|
vfio_quirk_ati_3c3_read(const char *name, uint64_t data) "%s 0x%"PRIx64
|
|
vfio_quirk_ati_3c3_probe(const char *name) "%s"
|
|
vfio_quirk_ati_bar4_probe(const char *name) "%s"
|
|
vfio_quirk_ati_bar2_probe(const char *name) "%s"
|
|
vfio_quirk_nvidia_3d0_state(const char *name, const char *state) "%s %s"
|
|
vfio_quirk_nvidia_3d0_read(const char *name, uint8_t offset, unsigned size, uint64_t val) " (%s, @0x%x, len=0x%x) 0x%"PRIx64
|
|
vfio_quirk_nvidia_3d0_write(const char *name, uint8_t offset, uint64_t data, unsigned size) "(%s, @0x%x, 0x%"PRIx64", len=0x%x)"
|
|
vfio_quirk_nvidia_3d0_probe(const char *name) "%s"
|
|
vfio_quirk_nvidia_bar5_state(const char *name, const char *state) "%s %s"
|
|
vfio_quirk_nvidia_bar5_probe(const char *name) "%s"
|
|
vfio_quirk_nvidia_bar0_msi_ack(const char *name) "%s"
|
|
vfio_quirk_nvidia_bar0_probe(const char *name) "%s"
|
|
vfio_quirk_rtl8168_fake_latch(const char *name, uint64_t val) "%s 0x%"PRIx64
|
|
vfio_quirk_rtl8168_msix_write(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table write[0x%x]: 0x%"PRIx64
|
|
vfio_quirk_rtl8168_msix_read(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table read[0x%x]: 0x%"PRIx64
|
|
vfio_quirk_rtl8168_probe(const char *name) "%s"
|
|
|
|
vfio_quirk_ati_bonaire_reset_skipped(const char *name) "%s"
|
|
vfio_quirk_ati_bonaire_reset_no_smc(const char *name) "%s"
|
|
vfio_quirk_ati_bonaire_reset_timeout(const char *name) "%s"
|
|
vfio_quirk_ati_bonaire_reset_done(const char *name) "%s"
|
|
vfio_quirk_ati_bonaire_reset(const char *name) "%s"
|
|
vfio_ioeventfd_exit(const char *name, uint64_t addr, unsigned size, uint64_t data) "%s+0x%"PRIx64"[%d]:0x%"PRIx64
|
|
vfio_ioeventfd_handler(const char *name, uint64_t addr, unsigned size, uint64_t data) "%s+0x%"PRIx64"[%d] -> 0x%"PRIx64
|
|
vfio_ioeventfd_init(const char *name, uint64_t addr, unsigned size, uint64_t data, bool vfio) "%s+0x%"PRIx64"[%d]:0x%"PRIx64" vfio:%d"
|
|
vfio_pci_igd_bar4_write(const char *name, uint32_t index, uint32_t data, uint32_t base) "%s [0x%03x] 0x%08x -> 0x%08x"
|
|
vfio_pci_igd_bdsm_enabled(const char *name, int size) "%s %dMB"
|
|
vfio_pci_igd_opregion_enabled(const char *name) "%s"
|
|
vfio_pci_igd_host_bridge_enabled(const char *name) "%s"
|
|
vfio_pci_igd_lpc_bridge_enabled(const char *name) "%s"
|
|
|
|
vfio_pci_nvidia_gpu_setup_quirk(const char *name, uint64_t tgt, uint64_t size) "%s tgt=0x%"PRIx64" size=0x%"PRIx64
|
|
vfio_pci_nvlink2_setup_quirk_ssatgt(const char *name, uint64_t tgt, uint64_t size) "%s tgt=0x%"PRIx64" size=0x%"PRIx64
|
|
vfio_pci_nvlink2_setup_quirk_lnkspd(const char *name, uint32_t link_speed) "%s link_speed=0x%x"
|
|
|
|
# common.c
|
|
vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"
|
|
vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64
|
|
vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "iommu %s @ 0x%"PRIx64" - 0x%"PRIx64
|
|
vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add 0x%"PRIx64" - 0x%"PRIx64
|
|
vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
|
|
vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64
|
|
vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
|
|
vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
|
|
vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64
|
|
vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
|
|
vfio_disconnect_container(int fd) "close container->fd=%d"
|
|
vfio_put_group(int fd) "close group->fd=%d"
|
|
vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
|
|
vfio_put_base_device(int fd) "close vdev->fd=%d"
|
|
vfio_region_setup(const char *dev, int index, const char *name, unsigned long flags, unsigned long offset, unsigned long size) "Device %s, region %d \"%s\", flags: 0x%lx, offset: 0x%lx, size: 0x%lx"
|
|
vfio_region_mmap_fault(const char *name, int index, unsigned long offset, unsigned long size, int fault) "Region %s mmaps[%d], [0x%lx - 0x%lx], fault: %d"
|
|
vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Region %s [0x%lx - 0x%lx]"
|
|
vfio_region_exit(const char *name, int index) "Device %s, region %d"
|
|
vfio_region_finalize(const char *name, int index) "Device %s, region %d"
|
|
vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
|
|
vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
|
|
vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
|
|
vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
|
|
vfio_dma_unmap_overflow_workaround(void) ""
|
|
|
|
# platform.c
|
|
vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"
|
|
vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s"
|
|
vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)"
|
|
vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow path"
|
|
vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)"
|
|
vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)"
|
|
vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d: count %d, flags=0x%x"
|
|
vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING"
|
|
vfio_platform_start_level_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d"
|
|
vfio_platform_start_edge_irqfd_injection(int index, int fd) "IRQ index=%d, fd = %d"
|
|
|
|
# spapr.c
|
|
vfio_prereg_listener_region_add_skip(uint64_t start, uint64_t end) "0x%"PRIx64" - 0x%"PRIx64
|
|
vfio_prereg_listener_region_del_skip(uint64_t start, uint64_t end) "0x%"PRIx64" - 0x%"PRIx64
|
|
vfio_prereg_register(uint64_t va, uint64_t size, int ret) "va=0x%"PRIx64" size=0x%"PRIx64" ret=%d"
|
|
vfio_prereg_unregister(uint64_t va, uint64_t size, int ret) "va=0x%"PRIx64" size=0x%"PRIx64" ret=%d"
|
|
vfio_spapr_create_window(int ps, unsigned int levels, uint64_t ws, uint64_t off) "pageshift=0x%x levels=%u winsize=0x%"PRIx64" offset=0x%"PRIx64
|
|
vfio_spapr_remove_window(uint64_t off) "offset=0x%"PRIx64
|
|
|
|
# display.c
|
|
vfio_display_edid_available(void) ""
|
|
vfio_display_edid_link_up(void) ""
|
|
vfio_display_edid_link_down(void) ""
|
|
vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u"
|
|
vfio_display_edid_write_error(void) ""
|