From ac3abf2c37a9b0be604ea9825705a8510a9a6ba3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 8 Nov 2010 23:20:27 -0500 Subject: [PATCH 1/6] PCI hotplug: ibmphp: Add check to prevent reading beyond mapped area While testing various randconfigs with ktest.pl, I hit the following panic: BUG: unable to handle kernel paging request at f7e54b03 IP: [] ibmphp_access_ebda+0x101/0x19bb Adding printks, I found that the loop that reads the ebda blocks can move out of the mapped section. ibmphp_access_ebda: start=f7e44c00 size=5120 end=f7e46000 ibmphp_access_ebda: io_mem=f7e44d80 offset=384 ibmphp_access_ebda: io_mem=f7e54b03 offset=65283 The start of the iomap was at f7e44c00 and had a size of 5120, making the end f7e46000. We start with an offset of 0x180 or 384, giving the first read at 0xf7e44d80. Reading that location yields 65283, which is much bigger than the 5120 that was allocated and makes the next read at f7e54b03 which is outside the mapped area. Perhaps this is a bug in the driver, or buggy hardware, but this patch is more about not crashing my box on start up and just giving a warning if it detects this error. This patch at least lets my box boot with just a warning. Cc: Chandru Siddalingappa Signed-off-by: Steven Rostedt Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/ibmphp_ebda.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c index 5becbdee4027..2850e64dedae 100644 --- a/drivers/pci/hotplug/ibmphp_ebda.c +++ b/drivers/pci/hotplug/ibmphp_ebda.c @@ -276,6 +276,12 @@ int __init ibmphp_access_ebda (void) for (;;) { offset = next_offset; + + /* Make sure what we read is still in the mapped section */ + if (WARN(offset > (ebda_sz * 1024 - 4), + "ibmphp_ebda: next read is beyond ebda_sz\n")) + break; + next_offset = readw (io_mem + offset); /* offset of next blk */ offset += 2; From 4723d0f2f96e6c910f951d595067eb31e0dd2d01 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 22 Sep 2010 11:09:19 -0600 Subject: [PATCH 2/6] x86/PCI: coalesce overlapping host bridge windows Some BIOSes provide PCI host bridge windows that overlap, e.g., pci_root PNP0A03:00: host bridge window [mem 0xb0000000-0xffffffff] pci_root PNP0A03:00: host bridge window [mem 0xafffffff-0xdfffffff] pci_root PNP0A03:00: host bridge window [mem 0xf0000000-0xffffffff] If we simply insert these as children of iomem_resource, the second window fails because it conflicts with the first, and the third is inserted as a child of the first, i.e., b0000000-ffffffff PCI Bus 0000:00 f0000000-ffffffff PCI Bus 0000:00 When we claim PCI device resources, this can cause collisions like this if we put them in the first window: pci 0000:00:01.0: address space collision: [mem 0xff300000-0xff4fffff] conflicts with PCI Bus 0000:00 [mem 0xf0000000-0xffffffff] Host bridge windows are top-level resources by definition, so it doesn't make sense to make the third window a child of the first. This patch coalesces any host bridge windows that overlap. For the example above, the result is this single window: pci_root PNP0A03:00: host bridge window [mem 0xafffffff-0xffffffff] This fixes a 2.6.34 regression. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=17011 Reported-and-tested-by: Anisse Astier Reported-and-tested-by: Pramod Dematagoda Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 105 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 84 insertions(+), 21 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 15466c096ba5..0972315c3860 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -138,7 +138,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) struct acpi_resource_address64 addr; acpi_status status; unsigned long flags; - struct resource *root, *conflict; u64 start, end; status = resource_to_addr(acpi_res, &addr); @@ -146,12 +145,10 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; if (addr.resource_type == ACPI_MEMORY_RANGE) { - root = &iomem_resource; flags = IORESOURCE_MEM; if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY) flags |= IORESOURCE_PREFETCH; } else if (addr.resource_type == ACPI_IO_RANGE) { - root = &ioport_resource; flags = IORESOURCE_IO; } else return AE_OK; @@ -172,27 +169,92 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; } - conflict = insert_resource_conflict(root, res); - if (conflict) { - dev_err(&info->bridge->dev, - "address space collision: host bridge window %pR " - "conflicts with %s %pR\n", - res, conflict->name, conflict); - } else { - pci_bus_add_resource(info->bus, res, 0); - info->res_num++; - if (addr.translation_offset) - dev_info(&info->bridge->dev, "host bridge window %pR " - "(PCI address [%#llx-%#llx])\n", - res, res->start - addr.translation_offset, - res->end - addr.translation_offset); - else - dev_info(&info->bridge->dev, - "host bridge window %pR\n", res); - } + info->res_num++; + if (addr.translation_offset) + dev_info(&info->bridge->dev, "host bridge window %pR " + "(PCI address [%#llx-%#llx])\n", + res, res->start - addr.translation_offset, + res->end - addr.translation_offset); + else + dev_info(&info->bridge->dev, "host bridge window %pR\n", res); + return AE_OK; } +static bool resource_contains(struct resource *res, resource_size_t point) +{ + if (res->start <= point && point <= res->end) + return true; + return false; +} + +static void coalesce_windows(struct pci_root_info *info, int type) +{ + int i, j; + struct resource *res1, *res2; + + for (i = 0; i < info->res_num; i++) { + res1 = &info->res[i]; + if (!(res1->flags & type)) + continue; + + for (j = i + 1; j < info->res_num; j++) { + res2 = &info->res[j]; + if (!(res2->flags & type)) + continue; + + /* + * I don't like throwing away windows because then + * our resources no longer match the ACPI _CRS, but + * the kernel resource tree doesn't allow overlaps. + */ + if (resource_contains(res1, res2->start) || + resource_contains(res1, res2->end) || + resource_contains(res2, res1->start) || + resource_contains(res2, res1->end)) { + res1->start = min(res1->start, res2->start); + res1->end = max(res1->end, res2->end); + dev_info(&info->bridge->dev, + "host bridge window expanded to %pR; %pR ignored\n", + res1, res2); + res2->flags = 0; + } + } + } +} + +static void add_resources(struct pci_root_info *info) +{ + int i; + struct resource *res, *root, *conflict; + + if (!pci_use_crs) + return; + + coalesce_windows(info, IORESOURCE_MEM); + coalesce_windows(info, IORESOURCE_IO); + + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + else if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + continue; + + conflict = insert_resource_conflict(root, res); + if (conflict) + dev_err(&info->bridge->dev, + "address space collision: host bridge window %pR " + "conflicts with %s %pR\n", + res, conflict->name, conflict); + else + pci_bus_add_resource(info->bus, res, 0); + } +} + static void get_current_resources(struct acpi_device *device, int busnum, int domain, struct pci_bus *bus) @@ -224,6 +286,7 @@ get_current_resources(struct acpi_device *device, int busnum, acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, &info); + add_resources(&info); return; name_alloc_fail: From 3b519e4ea618b6943a82931630872907f9ac2c2b Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 10 Nov 2010 11:03:21 +0100 Subject: [PATCH 3/6] PCI: fix size checks for mmap() on /proc/bus/pci files The checks for valid mmaps of PCI resources made through /proc/bus/pci files that were introduced in 9eff02e2042f96fb2aedd02e032eca1c5333d767 have several problems: 1. mmap() calls on /proc/bus/pci files are made with real file offsets > 0, whereas under /sys/bus/pci/devices, the start of the resource corresponds to offset 0. This may lead to false negatives in pci_mmap_fits(), which implicitly assumes the /sys/bus/pci/devices layout. 2. The loop in proc_bus_pci_mmap doesn't skip empty resouces. This leads to false positives, because pci_mmap_fits() doesn't treat empty resources correctly (the calculated size is 1 << (8*sizeof(resource_size_t)-PAGE_SHIFT) in this case!). 3. If a user maps resources with BAR > 0, pci_mmap_fits will emit bogus WARNINGS for the first resources that don't fit until the correct one is found. On many controllers the first 2-4 BARs are used, and the others are empty. In this case, an mmap attempt will first fail on the non-empty BARs (including the "right" BAR because of 1.) and emit bogus WARNINGS because of 3., and finally succeed on the first empty BAR because of 2. This is certainly not the intended behaviour. This patch addresses all 3 issues. Updated with an enum type for the additional parameter for pci_mmap_fits(). Cc: stable@kernel.org Signed-off-by: Martin Wilck Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 22 ++++++++++++++++------ drivers/pci/pci.h | 7 ++++++- drivers/pci/proc.c | 2 +- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index b5a7d9bfcb24..25accc9dda3b 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -705,17 +705,21 @@ void pci_remove_legacy_files(struct pci_bus *b) #ifdef HAVE_PCI_MMAP -int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma) +int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma, + enum pci_mmap_api mmap_api) { - unsigned long nr, start, size; + unsigned long nr, start, size, pci_start; + if (pci_resource_len(pdev, resno) == 0) + return 0; nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; start = vma->vm_pgoff; size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1; - if (start < size && size - start >= nr) + pci_start = (mmap_api == PCI_MMAP_SYSFS) ? + pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0; + if (start >= pci_start && start < pci_start + size && + start + nr <= pci_start + size) return 1; - WARN(1, "process \"%s\" tried to map 0x%08lx-0x%08lx on %s BAR %d (size 0x%08lx)\n", - current->comm, start, start+nr, pci_name(pdev), resno, size); return 0; } @@ -745,8 +749,14 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, if (i >= PCI_ROM_RESOURCE) return -ENODEV; - if (!pci_mmap_fits(pdev, i, vma)) + if (!pci_mmap_fits(pdev, i, vma, PCI_MMAP_SYSFS)) { + WARN(1, "process \"%s\" tried to map 0x%08lx bytes " + "at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n", + current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff, + pci_name(pdev), i, + pci_resource_start(pdev, i), pci_resource_len(pdev, i)); return -EINVAL; + } /* pci_mmap_page_range() expects the same kind of entry as coming * from /proc/bus/pci/ which is a "user visible" value. If this is diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f5c7c382765f..7d33f6673868 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -22,8 +22,13 @@ extern void pci_remove_firmware_label_files(struct pci_dev *pdev); #endif extern void pci_cleanup_rom(struct pci_dev *dev); #ifdef HAVE_PCI_MMAP +enum pci_mmap_api { + PCI_MMAP_SYSFS, /* mmap on /sys/bus/pci/devices//resource */ + PCI_MMAP_PROCFS /* mmap on /proc/bus/pci/ */ +}; extern int pci_mmap_fits(struct pci_dev *pdev, int resno, - struct vm_area_struct *vma); + struct vm_area_struct *vmai, + enum pci_mmap_api mmap_api); #endif int pci_probe_reset_function(struct pci_dev *dev); diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 297b72c880a1..ea00647f4732 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -257,7 +257,7 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) /* Make sure the caller is mapping a real resource for this device */ for (i = 0; i < PCI_ROM_RESOURCE; i++) { - if (pci_mmap_fits(dev, i, vma)) + if (pci_mmap_fits(dev, i, vma, PCI_MMAP_PROCFS)) break; } From 97c145f7c87453cec90e91238fba5fe2c1561b32 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 5 Nov 2010 15:16:36 -0400 Subject: [PATCH 4/6] PCI: read current power state at enable time When we enable a PCI device, we avoid doing a lot of the initial setup work if the device's enable count is non-zero. If we don't fetch the power state though, we may later fail to set up MSI due to the unknown status. So pick it up before we short circuit the rest due to a pre-existing enable or mismatched enable/disable pair (as happens with VGA devices, which are special in a special way). Tested-by: Jesse Brandeburg Reported-by: Dave Airlie Tested-by: Dave Airlie Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e98c8104297b..710c8a29be0d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1007,6 +1007,18 @@ static int __pci_enable_device_flags(struct pci_dev *dev, int err; int i, bars = 0; + /* + * Power state could be unknown at this point, either due to a fresh + * boot or a device removal call. So get the current power state + * so that things like MSI message writing will behave as expected + * (e.g. if the device really is in D0 at enable time). + */ + if (dev->pm_cap) { + u16 pmcsr; + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); + dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); + } + if (atomic_add_return(1, &dev->enable_cnt) > 1) return 0; /* already enabled */ From 82e3e767c21fef2b1b38868e20eb4e470a1e38e3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 10 Nov 2010 10:26:07 -0700 Subject: [PATCH 5/6] PCI: fix pci_bus_alloc_resource() hang, prefer positive decode When a PCI bus has two resources with the same start/end, e.g., pci_bus 0000:04: resource 2 [mem 0xd0000000-0xd7ffffff pref] pci_bus 0000:04: resource 7 [mem 0xd0000000-0xd7ffffff] the previous pci_bus_find_resource_prev() implementation would alternate between them forever: pci_bus_find_resource_prev(... [mem 0xd0000000-0xd7ffffff pref]) returns [mem 0xd0000000-0xd7ffffff] pci_bus_find_resource_prev(... [mem 0xd0000000-0xd7ffffff]) returns [mem 0xd0000000-0xd7ffffff pref] pci_bus_find_resource_prev(... [mem 0xd0000000-0xd7ffffff pref]) returns [mem 0xd0000000-0xd7ffffff] ... This happened because there was no ordering between two resources with the same start and end. A resource that had the same start and end as the cursor, but was not itself the cursor, was considered to be before the cursor. This patch fixes the hang by making a fixed ordering between any two resources. In addition, it tries to allocate from positively decoded regions before using any subtractively decoded resources. This means we will use a positive decode region before a subtractive decode one, even if it means using a smaller address. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=22062 Reported-by: Borislav Petkov Tested-by: Borislav Petkov Acked-by: Linus Torvalds Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 70 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 5624db8c9ad0..003170ea2e39 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -64,17 +64,57 @@ void pci_bus_remove_resources(struct pci_bus *bus) } } +static bool pci_bus_resource_better(struct resource *res1, bool pos1, + struct resource *res2, bool pos2) +{ + /* If exactly one is positive decode, always prefer that one */ + if (pos1 != pos2) + return pos1 ? true : false; + + /* Prefer the one that contains the highest address */ + if (res1->end != res2->end) + return (res1->end > res2->end) ? true : false; + + /* Otherwise, prefer the one with highest "center of gravity" */ + if (res1->start != res2->start) + return (res1->start > res2->start) ? true : false; + + /* Otherwise, choose one arbitrarily (but consistently) */ + return (res1 > res2) ? true : false; +} + +static bool pci_bus_resource_positive(struct pci_bus *bus, struct resource *res) +{ + struct pci_bus_resource *bus_res; + + /* + * This relies on the fact that pci_bus.resource[] refers to P2P or + * CardBus bridge base/limit registers, which are always positively + * decoded. The pci_bus.resources list contains host bridge or + * subtractively decoded resources. + */ + list_for_each_entry(bus_res, &bus->resources, list) { + if (bus_res->res == res) + return (bus_res->flags & PCI_SUBTRACTIVE_DECODE) ? + false : true; + } + return true; +} + /* - * Find the highest-address bus resource below the cursor "res". If the - * cursor is NULL, return the highest resource. + * Find the next-best bus resource after the cursor "res". If the cursor is + * NULL, return the best resource. "Best" means that we prefer positive + * decode regions over subtractive decode, then those at higher addresses. */ static struct resource *pci_bus_find_resource_prev(struct pci_bus *bus, unsigned int type, struct resource *res) { + bool res_pos, r_pos, prev_pos = false; struct resource *r, *prev = NULL; int i; + res_pos = pci_bus_resource_positive(bus, res); pci_bus_for_each_resource(bus, r, i) { if (!r) continue; @@ -82,26 +122,14 @@ static struct resource *pci_bus_find_resource_prev(struct pci_bus *bus, if ((r->flags & IORESOURCE_TYPE_BITS) != type) continue; - /* If this resource is at or past the cursor, skip it */ - if (res) { - if (r == res) - continue; - if (r->end > res->end) - continue; - if (r->end == res->end && r->start > res->start) - continue; + r_pos = pci_bus_resource_positive(bus, r); + if (!res || pci_bus_resource_better(res, res_pos, r, r_pos)) { + if (!prev || pci_bus_resource_better(r, r_pos, + prev, prev_pos)) { + prev = r; + prev_pos = r_pos; + } } - - if (!prev) - prev = r; - - /* - * A small resource is higher than a large one that ends at - * the same address. - */ - if (r->end > prev->end || - (r->end == prev->end && r->start > prev->start)) - prev = r; } return prev; From e25cd062b16ed1d41a157aec5a108abd6ff2e9f9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 13 Nov 2010 08:44:33 -0800 Subject: [PATCH 6/6] PCI: sysfs: fix printk warnings Cast pci_resource_start() and pci_resource_len() to u64 for printk. drivers/pci/pci-sysfs.c:753: warning: format '%16Lx' expects type 'long long unsigned int', but argument 9 has type 'resource_size_t' drivers/pci/pci-sysfs.c:753: warning: format '%16Lx' expects type 'long long unsigned int', but argument 10 has type 'resource_size_t' Signed-off-by: Randy Dunlap Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 25accc9dda3b..95712a375cd5 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -754,7 +754,8 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, "at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n", current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff, pci_name(pdev), i, - pci_resource_start(pdev, i), pci_resource_len(pdev, i)); + (u64)pci_resource_start(pdev, i), + (u64)pci_resource_len(pdev, i)); return -EINVAL; }