From d3a78d8bf759d8848339dcc367c4c1678b57a08b Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 23 Mar 2017 14:58:10 -0700 Subject: [PATCH 1/5] PCI: hv: Properly handle PCI bus remove hv_pci_devices_present() is called in hv_pci_remove() when we remove a PCI device from the host, e.g., by disabling SR-IOV on a device. In hv_pci_remove(), the bus is already removed before the call, so we don't need to rescan the bus in the workqueue scheduled from hv_pci_devices_present(). By introducing bus state hv_pcibus_removed, we can avoid this situation. Reported-by: Xiaofeng Wang Signed-off-by: Long Li Signed-off-by: Bjorn Helgaas Acked-by: K. Y. Srinivasan --- drivers/pci/host/pci-hyperv.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index ada98569b78e..39fafda4deff 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -350,6 +350,7 @@ enum hv_pcibus_state { hv_pcibus_init = 0, hv_pcibus_probed, hv_pcibus_installed, + hv_pcibus_removed, hv_pcibus_maximum }; @@ -1504,13 +1505,24 @@ static void pci_devices_present_work(struct work_struct *work) put_pcichild(hpdev, hv_pcidev_ref_initial); } - /* Tell the core to rescan bus because there may have been changes. */ - if (hbus->state == hv_pcibus_installed) { + switch(hbus->state) { + case hv_pcibus_installed: + /* + * Tell the core to rescan bus + * because there may have been changes. + */ pci_lock_rescan_remove(); pci_scan_child_bus(hbus->pci_bus); pci_unlock_rescan_remove(); - } else { + break; + + case hv_pcibus_init: + case hv_pcibus_probed: survey_child_resources(hbus); + break; + + default: + break; } up(&hbus->enum_sem); @@ -2185,6 +2197,7 @@ static int hv_pci_probe(struct hv_device *hdev, hbus = kzalloc(sizeof(*hbus), GFP_KERNEL); if (!hbus) return -ENOMEM; + hbus->state = hv_pcibus_init; /* * The PCI bus "domain" is what is called "segment" in ACPI and @@ -2348,6 +2361,7 @@ static int hv_pci_remove(struct hv_device *hdev) pci_stop_root_bus(hbus->pci_bus); pci_remove_root_bus(hbus->pci_bus); pci_unlock_rescan_remove(); + hbus->state = hv_pcibus_removed; } hv_pci_bus_exit(hdev); From 414428c5da1c71986727c2fa5cdf1ed071e398d7 Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 23 Mar 2017 14:58:32 -0700 Subject: [PATCH 2/5] PCI: hv: Lock PCI bus on device eject A PCI_EJECT message can arrive at the same time we are calling pci_scan_child_bus() in the workqueue for the previous PCI_BUS_RELATIONS message or in create_root_hv_pci_bus(). In this case we could potentially modify the bus from multiple places. Properly lock the bus access. Thanks Dexuan Cui for pointing out the race condition in create_root_hv_pci_bus(). Reported-by: Xiaofeng Wang Signed-off-by: Long Li Signed-off-by: Bjorn Helgaas Acked-by: K. Y. Srinivasan --- drivers/pci/host/pci-hyperv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 39fafda4deff..a1b3c1957d43 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -1209,9 +1209,11 @@ static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus) hbus->pci_bus->msi = &hbus->msi_chip; hbus->pci_bus->msi->dev = &hbus->hdev->device; + pci_lock_rescan_remove(); pci_scan_child_bus(hbus->pci_bus); pci_bus_assign_resources(hbus->pci_bus); pci_bus_add_devices(hbus->pci_bus); + pci_unlock_rescan_remove(); hbus->state = hv_pcibus_installed; return 0; } @@ -1612,8 +1614,10 @@ static void hv_eject_device_work(struct work_struct *work) pdev = pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain, 0, wslot); if (pdev) { + pci_lock_rescan_remove(); pci_stop_and_remove_bus_device(pdev); pci_dev_put(pdev); + pci_unlock_rescan_remove(); } spin_lock_irqsave(&hpdev->hbus->device_list_lock, flags); From 433fcf6b7b31f1f233dd50aeb9d066a0f6ed4b9d Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 24 Mar 2017 11:07:21 -0700 Subject: [PATCH 3/5] PCI: hv: Specify CPU_AFFINITY_ALL for MSI affinity when >= 32 CPUs When we have 32 or more CPUs in the affinity mask, we should use a special constant to specify that to the host. Fix this issue. Signed-off-by: K. Y. Srinivasan Signed-off-by: Bjorn Helgaas Reviewed-by: Long Li Cc: --- drivers/pci/host/pci-hyperv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index a1b3c1957d43..8c952f694c2f 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -72,6 +72,7 @@ enum { PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1 }; +#define CPU_AFFINITY_ALL -1ULL #define PCI_CONFIG_MMIO_LENGTH 0x2000 #define CFG_PAGE_OFFSET 0x1000 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET) @@ -898,9 +899,13 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) * processors because Hyper-V only supports 64 in a guest. */ affinity = irq_data_get_affinity_mask(data); - for_each_cpu_and(cpu, affinity, cpu_online_mask) { - int_pkt->int_desc.cpu_mask |= - (1ULL << vmbus_cpu_number_to_vp_number(cpu)); + if (cpumask_weight(affinity) >= 32) { + int_pkt->int_desc.cpu_mask = CPU_AFFINITY_ALL; + } else { + for_each_cpu_and(cpu, affinity, cpu_online_mask) { + int_pkt->int_desc.cpu_mask |= + (1ULL << vmbus_cpu_number_to_vp_number(cpu)); + } } ret = vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt, From 59c58ceeea9cdc6144d7b0303753e6bd26d87455 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 24 Mar 2017 11:07:22 -0700 Subject: [PATCH 4/5] PCI: hv: Allocate interrupt descriptors with GFP_ATOMIC The memory allocation here needs to be non-blocking. Fix the issue. Signed-off-by: K. Y. Srinivasan Signed-off-by: Bjorn Helgaas Reviewed-by: Long Li Cc: --- drivers/pci/host/pci-hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 8c952f694c2f..e73880c5d979 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -878,7 +878,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) hv_int_desc_free(hpdev, int_desc); } - int_desc = kzalloc(sizeof(*int_desc), GFP_KERNEL); + int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); if (!int_desc) goto drop_reference; From 24196f0c7d4bba093dfa8074507f31509970319f Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Tue, 18 Apr 2017 09:02:48 -0500 Subject: [PATCH 5/5] PCI: hv: Convert hv_pci_dev.refs from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Reviewed-by: Stephen Hemminger --- drivers/pci/host/pci-hyperv.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index e73880c5d979..84936383e269 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -56,6 +56,7 @@ #include #include #include +#include #include /* @@ -423,7 +424,7 @@ enum hv_pcidev_ref_reason { struct hv_pci_dev { /* List protected by pci_rescan_remove_lock */ struct list_head list_entry; - atomic_t refs; + refcount_t refs; enum hv_pcichild_state state; struct pci_function_description desc; bool reported_missing; @@ -1262,13 +1263,13 @@ static void q_resource_requirements(void *context, struct pci_response *resp, static void get_pcichild(struct hv_pci_dev *hpdev, enum hv_pcidev_ref_reason reason) { - atomic_inc(&hpdev->refs); + refcount_inc(&hpdev->refs); } static void put_pcichild(struct hv_pci_dev *hpdev, enum hv_pcidev_ref_reason reason) { - if (atomic_dec_and_test(&hpdev->refs)) + if (refcount_dec_and_test(&hpdev->refs)) kfree(hpdev); } @@ -1322,7 +1323,7 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus, wait_for_completion(&comp_pkt.host_event); hpdev->desc = *desc; - get_pcichild(hpdev, hv_pcidev_ref_initial); + refcount_set(&hpdev->refs, 1); get_pcichild(hpdev, hv_pcidev_ref_childlist); spin_lock_irqsave(&hbus->device_list_lock, flags);