diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 5a1732b78707..b728f700340d 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -301,3 +301,25 @@ Contact: Emil Velikov Description: This file contains the revision field of the the PCI device. The value comes from device config space. The file is read only. + +What: /sys/bus/pci/devices/.../sriov_drivers_autoprobe +Date: April 2017 +Contact: Bodong Wang +Description: + This file is associated with the PF of a device that + supports SR-IOV. It determines whether newly-enabled VFs + are immediately bound to a driver. It initially contains + 1, which means the kernel automatically binds VFs to a + compatible driver immediately after they are enabled. If + an application writes 0 to the file before enabling VFs, + the kernel will not bind VFs to a driver. + + A typical use case is to write 0 to this file, then enable + VFs, then assign the newly-created VFs to virtual machines. + Note that changing this file does not affect already- + enabled VFs. In this scenario, the user must first disable + the VFs, write 0 to sriov_drivers_autoprobe, then re-enable + the VFs. + + This is similar to /sys/bus/pci/drivers_autoprobe, but + affects only the VFs associated with a specific PF. diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt index 2d91ae251982..d2a84151e99c 100644 --- a/Documentation/PCI/pci-iov-howto.txt +++ b/Documentation/PCI/pci-iov-howto.txt @@ -68,6 +68,18 @@ To disable SR-IOV capability: echo 0 > \ /sys/bus/pci/devices//sriov_numvfs +To enable auto probing VFs by a compatible driver on the host, run +command below before enabling SR-IOV capabilities. This is the +default behavior. + echo 1 > \ + /sys/bus/pci/devices//sriov_drivers_autoprobe + +To disable auto probing VFs by a compatible driver on the host, run +command below before enabling SR-IOV capabilities. Updating this +entry will not affect VFs which are already probed. + echo 0 > \ + /sys/bus/pci/devices//sriov_drivers_autoprobe + 3.2 Usage example Following piece of code illustrates the usage of the SR-IOV API. diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 121a4c920f1b..d037f72e4d96 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13610,14 +13610,14 @@ static void init_chip(struct hfi1_devdata *dd) dd_dev_info(dd, "Resetting CSRs with FLR\n"); /* do the FLR, the DC reset will remain */ - hfi1_pcie_flr(dd); + pcie_flr(dd->pcidev); /* restore command and BARs */ restore_pci_variables(dd); if (is_ax(dd)) { dd_dev_info(dd, "Resetting CSRs with FLR\n"); - hfi1_pcie_flr(dd); + pcie_flr(dd->pcidev); restore_pci_variables(dd); } } else { diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 0808e3c3ba39..40d7559fa723 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1764,7 +1764,6 @@ int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *); void hfi1_pcie_cleanup(struct pci_dev *); int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); -void hfi1_pcie_flr(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *); void request_msix(struct hfi1_devdata *, u32 *, struct hfi1_msix_entry *); void hfi1_enable_intx(struct pci_dev *); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 0829fce06172..c81556e84831 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -240,36 +240,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) iounmap(dd->piobase); } -/* - * Do a Function Level Reset (FLR) on the device. - * Based on static function drivers/pci/pci.c:pcie_flr(). - */ -void hfi1_pcie_flr(struct hfi1_devdata *dd) -{ - int i; - u16 status; - - /* no need to check for the capability - we know the device has it */ - - /* wait for Transaction Pending bit to clear, at most a few ms */ - for (i = 0; i < 4; i++) { - if (i) - msleep((1 << (i - 1)) * 100); - - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVSTA, &status); - if (!(status & PCI_EXP_DEVSTA_TRPND)) - goto clear; - } - - dd_dev_err(dd, "Transaction Pending bit is not clearing, proceeding with reset anyway\n"); - -clear: - pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_BCR_FLR); - /* PCIe spec requires the function to be back within 100ms */ - msleep(100); -} - static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt, struct hfi1_msix_entry *hfi1_msix_entry) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a7a430a7be2c..543ddde5f8e2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7112,18 +7112,6 @@ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter) } #ifdef CONFIG_PCI_IOV -static inline void ixgbe_issue_vf_flr(struct ixgbe_adapter *adapter, - struct pci_dev *vfdev) -{ - if (!pci_wait_for_pending_transaction(vfdev)) - e_dev_warn("Issuing VFLR with pending transactions\n"); - - e_dev_err("Issuing VFLR for VF %s\n", pci_name(vfdev)); - pcie_capability_set_word(vfdev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); - - msleep(100); -} - static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -7156,7 +7144,7 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) pci_read_config_word(vfdev, PCI_STATUS, &status_reg); if (status_reg != IXGBE_FAILED_READ_CFG_WORD && status_reg & PCI_STATUS_REC_MASTER_ABORT) - ixgbe_issue_vf_flr(adapter, vfdev); + pcie_flr(vfdev); } } @@ -10244,7 +10232,7 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, * VFLR. Just clean up the AER in that case. */ if (vfdev) { - ixgbe_issue_vf_flr(adapter, vfdev); + pcie_flr(vfdev); /* Free device reference count */ pci_dev_put(vfdev); } diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 2479ae876482..d9dc7363ac77 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -450,6 +450,7 @@ found: iov->total_VFs = total; iov->pgsz = pgsz; iov->self = dev; + iov->drivers_autoprobe = true; pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 8ec136164e93..192e7b681b96 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -394,6 +394,18 @@ void __weak pcibios_free_irq(struct pci_dev *dev) { } +#ifdef CONFIG_PCI_IOV +static inline bool pci_device_can_probe(struct pci_dev *pdev) +{ + return (!pdev->is_virtfn || pdev->physfn->sriov->drivers_autoprobe); +} +#else +static inline bool pci_device_can_probe(struct pci_dev *pdev) +{ + return true; +} +#endif + static int pci_device_probe(struct device *dev) { int error; @@ -405,10 +417,12 @@ static int pci_device_probe(struct device *dev) return error; pci_dev_get(pci_dev); - error = __pci_device_probe(drv, pci_dev); - if (error) { - pcibios_free_irq(pci_dev); - pci_dev_put(pci_dev); + if (pci_device_can_probe(pci_dev)) { + error = __pci_device_probe(drv, pci_dev); + if (error) { + pcibios_free_irq(pci_dev); + pci_dev_put(pci_dev); + } } return error; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 10feb98a2b1d..31e99613a12e 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -526,10 +526,37 @@ exit: return count; } +static ssize_t sriov_drivers_autoprobe_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe); +} + +static ssize_t sriov_drivers_autoprobe_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + bool drivers_autoprobe; + + if (kstrtobool(buf, &drivers_autoprobe) < 0) + return -EINVAL; + + pdev->sriov->drivers_autoprobe = drivers_autoprobe; + + return count; +} + static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs); static struct device_attribute sriov_numvfs_attr = __ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP), sriov_numvfs_show, sriov_numvfs_store); +static struct device_attribute sriov_drivers_autoprobe_attr = + __ATTR(sriov_drivers_autoprobe, (S_IRUGO|S_IWUSR|S_IWGRP), + sriov_drivers_autoprobe_show, sriov_drivers_autoprobe_store); #endif /* CONFIG_PCI_IOV */ static ssize_t driver_override_store(struct device *dev, @@ -1539,6 +1566,7 @@ static struct attribute_group pci_dev_hp_attr_group = { static struct attribute *sriov_dev_attrs[] = { &sriov_totalvfs_attr.attr, &sriov_numvfs_attr.attr, + &sriov_drivers_autoprobe_attr.attr, NULL, }; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2363922b3b95..b01bd5bba8e6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3860,24 +3860,41 @@ static void pci_flr_wait(struct pci_dev *dev) (i - 1) * 100); } -static int pcie_flr(struct pci_dev *dev, int probe) +/** + * pcie_has_flr - check if a device supports function level resets + * @dev: device to check + * + * Returns true if the device advertises support for PCIe function level + * resets. + */ +static bool pcie_has_flr(struct pci_dev *dev) { u32 cap; + if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET) + return false; + pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap); - if (!(cap & PCI_EXP_DEVCAP_FLR)) - return -ENOTTY; - - if (probe) - return 0; + return cap & PCI_EXP_DEVCAP_FLR; +} +/** + * pcie_flr - initiate a PCIe function level reset + * @dev: device to reset + * + * Initiate a function level reset on @dev. The caller should ensure the + * device supports FLR before calling this function, e.g. by using the + * pcie_has_flr() helper. + */ +void pcie_flr(struct pci_dev *dev) +{ if (!pci_wait_for_pending_transaction(dev)) dev_err(&dev->dev, "timed out waiting for pending transaction; performing function level reset anyway\n"); pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); pci_flr_wait(dev); - return 0; } +EXPORT_SYMBOL_GPL(pcie_flr); static int pci_af_flr(struct pci_dev *dev, int probe) { @@ -3888,6 +3905,9 @@ static int pci_af_flr(struct pci_dev *dev, int probe) if (!pos) return -ENOTTY; + if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET) + return -ENOTTY; + pci_read_config_byte(dev, pos + PCI_AF_CAP, &cap); if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) return -ENOTTY; @@ -4058,9 +4078,12 @@ static int __pci_dev_reset(struct pci_dev *dev, int probe) if (rc != -ENOTTY) goto done; - rc = pcie_flr(dev, probe); - if (rc != -ENOTTY) + if (pcie_has_flr(dev)) { + if (!probe) + pcie_flr(dev); + rc = 0; goto done; + } rc = pci_af_flr(dev, probe); if (rc != -ENOTTY) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 71fa82359b5b..586e63f55013 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -272,6 +272,7 @@ struct pci_sriov { struct pci_dev *self; /* this PF */ struct mutex lock; /* lock for setting sriov_numvfs in sysfs */ resource_size_t barsz[PCI_SRIOV_NUM_BARS]; /* VF BAR size */ + bool drivers_autoprobe; /* auto probing of VFs by driver */ }; /* pci_dev priv_flags */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 65dfe58e136c..085fb787aa9e 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3666,19 +3666,11 @@ static int reset_intel_82599_sfp_virtfn(struct pci_dev *dev, int probe) * * The 82599 supports FLR on VFs, but FLR support is reported only * in the PF DEVCAP (sec 9.3.10.4), not in the VF DEVCAP (sec 9.5). - * Therefore, we can't use pcie_flr(), which checks the VF DEVCAP. + * Thus we must call pcie_flr() directly without first checking if it is + * supported. */ - - if (probe) - return 0; - - if (!pci_wait_for_pending_transaction(dev)) - dev_err(&dev->dev, "transaction is not cleared; proceeding with reset anyway\n"); - - pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); - - msleep(100); - + if (!probe) + pcie_flr(dev); return 0; } @@ -3783,20 +3775,7 @@ static int reset_chelsio_generic_dev(struct pci_dev *dev, int probe) PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); - /* - * Start of pcie_flr() code sequence. This reset code is a copy of - * the guts of pcie_flr() because that's not an exported function. - */ - - if (!pci_wait_for_pending_transaction(dev)) - dev_err(&dev->dev, "transaction is not cleared; proceeding with reset anyway\n"); - - pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); - msleep(100); - - /* - * End of pcie_flr() code sequence. - */ + pcie_flr(dev); /* * Restore the configuration information (BAR values, etc.) including @@ -4677,3 +4656,11 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); + +/* FLR may cause some 82579 devices to hang. */ +static void quirk_intel_no_flr(struct pci_dev *dev) +{ + dev->dev_flags |= PCI_DEV_FLAGS_NO_FLR_RESET; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_intel_no_flr); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_intel_no_flr); diff --git a/include/linux/pci.h b/include/linux/pci.h index bbd17d49c947..88185ffcbf47 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -181,6 +181,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8), /* a non-root bridge where translation occurs, stop alias search here */ PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9), + /* Do not use FLR even if device advertises PCI_AF_CAP */ + PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), }; enum pci_irq_reroute_variant { @@ -1037,6 +1039,7 @@ int pcie_get_mps(struct pci_dev *dev); int pcie_set_mps(struct pci_dev *dev, int mps); int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +void pcie_flr(struct pci_dev *dev); int __pci_reset_function(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev);