diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 2ff123f745cc..0b655810f32d 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -300,7 +300,7 @@ void eeh_dev_release(struct pci_dev *pdev); struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group); int eeh_pe_set_option(struct eeh_pe *pe, int option); int eeh_pe_get_state(struct eeh_pe *pe); -int eeh_pe_reset(struct eeh_pe *pe, int option); +int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed); int eeh_pe_configure(struct eeh_pe *pe); int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 08e094eaeccf..f191ef0d2a0a 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -53,7 +53,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev); struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr); void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); -int eeh_pe_reset_full(struct eeh_pe *pe); +int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed); void eeh_save_bars(struct eeh_dev *edev); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 8d32587b07dc..416d1ef49762 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -877,6 +877,24 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) return NULL; } +static void eeh_pe_refreeze_passed(struct eeh_pe *root) +{ + struct eeh_pe *pe; + int state; + + eeh_for_each_pe(root, pe) { + if (eeh_pe_passed(pe)) { + state = eeh_ops->get_state(pe, NULL); + if (state & + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) { + pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n", + pe->phb->global_number, pe->addr); + eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE); + } + } + } +} + /** * eeh_pe_reset_full - Complete a full reset process on the indicated PE * @pe: EEH PE @@ -889,7 +907,7 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) * * This function will attempt to reset a PE three times before failing. */ -int eeh_pe_reset_full(struct eeh_pe *pe) +int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) { int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); int type = EEH_RESET_HOT; @@ -911,11 +929,11 @@ int eeh_pe_reset_full(struct eeh_pe *pe) /* Make three attempts at resetting the bus */ for (i = 0; i < 3; i++) { - ret = eeh_pe_reset(pe, type); + ret = eeh_pe_reset(pe, type, include_passed); if (ret) break; - ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, include_passed); if (ret) break; @@ -936,6 +954,12 @@ int eeh_pe_reset_full(struct eeh_pe *pe) __func__, state, pe->phb->global_number, pe->addr, (i + 1)); } + /* Resetting the PE may have unfrozen child PEs. If those PEs have been + * (potentially) passed through to a guest, re-freeze them: + */ + if (!include_passed) + eeh_pe_refreeze_passed(pe); + eeh_pe_state_clear(pe, reset_state, true); return ret; } @@ -1611,13 +1635,12 @@ int eeh_pe_get_state(struct eeh_pe *pe) } EXPORT_SYMBOL_GPL(eeh_pe_get_state); -static int eeh_pe_reenable_devices(struct eeh_pe *pe) +static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed) { struct eeh_dev *edev, *tmp; struct pci_dev *pdev; int ret = 0; - /* Restore config space */ eeh_pe_restore_bars(pe); /* @@ -1638,9 +1661,13 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) } /* The PE is still in frozen state */ - ret = eeh_unfreeze_pe(pe); + if (include_passed || !eeh_pe_passed(pe)) { + ret = eeh_unfreeze_pe(pe); + } else + pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n", + pe->phb->global_number, pe->addr); if (!ret) - eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed); return ret; } @@ -1654,7 +1681,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) * indicated type, either fundamental reset or hot reset. * PE reset is the most important part for error recovery. */ -int eeh_pe_reset(struct eeh_pe *pe, int option) +int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed) { int ret = 0; @@ -1668,11 +1695,11 @@ int eeh_pe_reset(struct eeh_pe *pe, int option) switch (option) { case EEH_RESET_DEACTIVATE: ret = eeh_ops->reset(pe, option); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed); if (ret) break; - ret = eeh_pe_reenable_devices(pe); + ret = eeh_pe_reenable_devices(pe, include_passed); break; case EEH_RESET_HOT: case EEH_RESET_FUNDAMENTAL: diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 91629b3f3b74..89623962c727 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -510,22 +510,11 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) * support EEH. So we just care about PCI devices for * simplicity here. */ - if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) - return NULL; - - /* - * We rely on count-based pcibios_release_device() to - * detach permanently offlined PEs. Unfortunately, that's - * not reliable enough. We might have the permanently - * offlined PEs attached, but we needn't take care of - * them and their child devices. - */ - if (eeh_dev_removed(edev)) + if (!eeh_edev_actionable(edev) || + (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) return NULL; if (rmv_data) { - if (eeh_pe_passed(edev->pe)) - return NULL; driver = eeh_pcid_get(dev); if (driver) { if (driver->err_handler && @@ -539,8 +528,8 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) } /* Remove it from PCI subsystem */ - pr_debug("EEH: Removing %s without EEH sensitive driver\n", - pci_name(dev)); + pr_info("EEH: Removing %s without EEH sensitive driver\n", + pci_name(dev)); edev->mode |= EEH_DEV_DISCONNECTED; if (rmv_data) rmv_data->removed_dev_count++; @@ -624,7 +613,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); /* Issue reset */ - ret = eeh_pe_reset_full(pe); + ret = eeh_pe_reset_full(pe, true); if (ret) { eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); return ret; @@ -664,6 +653,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, time64_t tstamp; int cnt, rc; struct eeh_dev *edev; + struct eeh_pe *tmp_pe; + bool any_passed = false; + + eeh_for_each_pe(pe, tmp_pe) + any_passed |= eeh_pe_passed(tmp_pe); /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; @@ -676,7 +670,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * into pci_hp_add_devices(). */ eeh_pe_state_mark(pe, EEH_PE_KEEP); - if (driver_eeh_aware || (pe->type & EEH_PE_VF)) { + if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) { eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } else { pci_lock_rescan_remove(); @@ -693,7 +687,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - rc = eeh_pe_reset_full(pe); + rc = eeh_pe_reset_full(pe, false); if (rc) return rc; @@ -704,7 +698,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, eeh_pe_restore_bars(pe); /* Clear frozen state */ - rc = eeh_clear_pe_frozen_state(pe, true); + rc = eeh_clear_pe_frozen_state(pe, false); if (rc) { pci_unlock_rescan_remove(); return rc; diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 38edeb4729a9..1a742fe8f6db 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -74,13 +74,13 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, ret = eeh_pe_get_state(pe); break; case VFIO_EEH_PE_RESET_DEACTIVATE: - ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true); break; case VFIO_EEH_PE_RESET_HOT: - ret = eeh_pe_reset(pe, EEH_RESET_HOT); + ret = eeh_pe_reset(pe, EEH_RESET_HOT, true); break; case VFIO_EEH_PE_RESET_FUNDAMENTAL: - ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL); + ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true); break; case VFIO_EEH_PE_CONFIGURE: ret = eeh_pe_configure(pe);