From 16fbdce62d9c89b794e303f4a232e4749b77e9ac Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 10 May 2012 13:01:43 -0700 Subject: [PATCH 1/8] proc/pid/pagemap: correctly report non-present ptes and holes between vmas Reset the current pagemap-entry if the current pte isn't present, or if current vma is over. Otherwise pagemap reports last entry again and again. Non-present pte reporting was broken in commit 092b50bacd1c ("pagemap: introduce data structure for pagemap entry") Reporting for holes was broken in commit 5aaabe831eb5 ("pagemap: avoid splitting thp when reading /proc/pid/pagemap") Signed-off-by: Konstantin Khlebnikov Reported-by: Pavel Emelyanov Cc: Naoya Horiguchi Cc: KAMEZAWA Hiroyuki Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2d60492d6df8..1030a716d155 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -747,6 +747,8 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte) else if (pte_present(pte)) *pme = make_pme(PM_PFRAME(pte_pfn(pte)) | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + else + *pme = make_pme(PM_NOT_PRESENT); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -761,6 +763,8 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, if (pmd_present(pmd)) *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + else + *pme = make_pme(PM_NOT_PRESENT); } #else static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, @@ -801,8 +805,10 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, /* check to see if we've left 'vma' behind * and need a new, higher one */ - if (vma && (addr >= vma->vm_end)) + if (vma && (addr >= vma->vm_end)) { vma = find_vma(walk->mm, addr); + pme = make_pme(PM_NOT_PRESENT); + } /* check that 'vma' actually covers this address, * and that it isn't a huge page vma */ @@ -830,6 +836,8 @@ static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, if (pte_present(pte)) *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + else + *pme = make_pme(PM_NOT_PRESENT); } /* This function walks within one hugetlb entry in the single call */ @@ -839,7 +847,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, { struct pagemapread *pm = walk->private; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); + pagemap_entry_t pme; for (; addr != end; addr += PAGE_SIZE) { int offset = (addr & ~hmask) >> PAGE_SHIFT; From 93278814d3590eba0ee360b8d69a35c7f2203ea8 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 10 May 2012 13:01:44 -0700 Subject: [PATCH 2/8] mm: fix division by 0 in percpu_pagelist_fraction() percpu_pagelist_fraction_sysctl_handler() has only considered -EINVAL as a possible error from proc_dointvec_minmax(). If any other error is returned, it would proceed to divide by zero since percpu_pagelist_fraction wasn't getting initialized at any point. For example, writing 0 bytes into the proc file would trigger the issue. Signed-off-by: Sasha Levin Reviewed-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a712fb9e04ce..b21b3db15a7f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -105,7 +105,7 @@ unsigned long totalreserve_pages __read_mostly; */ unsigned long dirty_balance_reserve __read_mostly; -int percpu_pagelist_fraction; +int percpu_pagelist_fraction = 8; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; #ifdef CONFIG_PM_SLEEP @@ -5203,7 +5203,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, int ret; ret = proc_dointvec_minmax(table, write, buffer, length, ppos); - if (!write || (ret == -EINVAL)) + if (!write || (ret < 0)) return ret; for_each_populated_zone(zone) { for_each_possible_cpu(cpu) { From 4998a6c0edce7fae9c0a5463f6ec3fa585258ee7 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 10 May 2012 13:01:44 -0700 Subject: [PATCH 3/8] hugetlb: prevent BUG_ON in hugetlb_fault() -> hugetlb_cow() Commit 66aebce747eaf ("hugetlb: fix race condition in hugetlb_fault()") added code to avoid a race condition by elevating the page refcount in hugetlb_fault() while calling hugetlb_cow(). However, one code path in hugetlb_cow() includes an assertion that the page count is 1, whereas it may now also have the value 2 in this path. The consensus is that this BUG_ON has served its purpose, so rather than extending it to cover both cases, we just remove it. Signed-off-by: Chris Metcalf Acked-by: Mel Gorman Acked-by: Hillf Danton Acked-by: Hugh Dickins Cc: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: [3.0.29+, 3.2.16+, 3.3.3+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5a16423a512c..ae8f708e3d75 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2498,7 +2498,6 @@ retry_avoidcopy: if (outside_reserve) { BUG_ON(huge_pte_none(pte)); if (unmap_ref_private(mm, vma, old_page, address)) { - BUG_ON(page_count(old_page) != 1); BUG_ON(huge_pte_none(pte)); spin_lock(&mm->page_table_lock); ptep = huge_pte_offset(mm, address & huge_page_mask(h)); From 5e2bf0142231194d36fdc9596b36a261ed2b9fe7 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 10 May 2012 13:01:45 -0700 Subject: [PATCH 4/8] namespaces, pid_ns: fix leakage on fork() failure Fork() failure post namespace creation for a child cloned with CLONE_NEWPID leaks pid_namespace/mnt_cache due to proc being mounted during creation, but not unmounted during cleanup. Call pid_ns_release_proc() during cleanup. Signed-off-by: Mike Galbraith Acked-by: Oleg Nesterov Reviewed-by: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Cyrill Gorcunov Cc: Louis Rilling Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/fork.c b/kernel/fork.c index b9372a0bff18..687a15d56243 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1464,6 +1465,8 @@ bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: + if (unlikely(clone_flags & CLONE_NEWPID)) + pid_ns_release_proc(p->nsproxy->pid_ns); exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) From 8c7577637ca31385e92769a77e2ab5b428e8b99c Mon Sep 17 00:00:00 2001 From: Sha Zhengju Date: Thu, 10 May 2012 13:01:45 -0700 Subject: [PATCH 5/8] memcg: free spare array to avoid memory leak When the last event is unregistered, there is no need to keep the spare array anymore. So free it to avoid memory leak. Signed-off-by: Sha Zhengju Acked-by: KAMEZAWA Hiroyuki Reviewed-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 31ab9c3f0178..b659260c56ad 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4507,6 +4507,12 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp, swap_buffers: /* Swap primary and spare array */ thresholds->spare = thresholds->primary; + /* If all events are unregistered, free the spare array */ + if (!new) { + kfree(thresholds->spare); + thresholds->spare = NULL; + } + rcu_assign_pointer(thresholds->primary, new); /* To be sure that nobody uses thresholds */ From b8cd742acfd78a4689148fb80cf74bc26e7f1f3c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 May 2012 13:01:46 -0700 Subject: [PATCH 6/8] drivers/leds: correct __devexit annotations __devexit functions are discarded without CONFIG_HOTPLUG, so they need to be referenced carefully. A __devexit function may also not be called from a __devinit function. Signed-off-by: Arnd Bergmann Signed-off-by: Mathieu Poirier Cc: Bryan Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/leds/leds-netxbig.c | 4 ++-- drivers/leds/leds-ns2.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c index d8433f2d53bc..73973fdbd8be 100644 --- a/drivers/leds/leds-netxbig.c +++ b/drivers/leds/leds-netxbig.c @@ -112,7 +112,7 @@ err_free_addr: return err; } -static void __devexit gpio_ext_free(struct netxbig_gpio_ext *gpio_ext) +static void gpio_ext_free(struct netxbig_gpio_ext *gpio_ext) { int i; @@ -294,7 +294,7 @@ static ssize_t netxbig_led_sata_show(struct device *dev, static DEVICE_ATTR(sata, 0644, netxbig_led_sata_show, netxbig_led_sata_store); -static void __devexit delete_netxbig_led(struct netxbig_led_data *led_dat) +static void delete_netxbig_led(struct netxbig_led_data *led_dat) { if (led_dat->mode_val[NETXBIG_LED_SATA] != NETXBIG_LED_INVALID_MODE) device_remove_file(led_dat->cdev.dev, &dev_attr_sata); diff --git a/drivers/leds/leds-ns2.c b/drivers/leds/leds-ns2.c index 2f0a14421a73..01cf89ec6944 100644 --- a/drivers/leds/leds-ns2.c +++ b/drivers/leds/leds-ns2.c @@ -255,7 +255,7 @@ err_free_cmd: return ret; } -static void __devexit delete_ns2_led(struct ns2_led_data *led_dat) +static void delete_ns2_led(struct ns2_led_data *led_dat) { device_remove_file(led_dat->cdev.dev, &dev_attr_sata); led_classdev_unregister(&led_dat->cdev); From 6bc2e853c6b46a6041980d58200ad9b0a73a60ff Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Thu, 10 May 2012 13:01:46 -0700 Subject: [PATCH 7/8] mm: nobootmem: fix sign extend problem in __free_pages_memory() Systems with 8 TBytes of memory or greater can hit a problem where only the the first 8 TB of memory shows up. This is due to "int i" being smaller than "unsigned long start_aligned", causing the high bits to be dropped. The fix is to change `i' to unsigned long to match start_aligned and end_aligned. Thanks to Jack Steiner for assistance tracking this down. Signed-off-by: Russ Anderson Cc: Jack Steiner Cc: Johannes Weiner Cc: Tejun Heo Cc: David S. Miller Cc: Yinghai Lu Cc: Gavin Shan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/nobootmem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/nobootmem.c b/mm/nobootmem.c index e53bb8a256b1..1983fb1c7026 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -82,8 +82,7 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size) static void __init __free_pages_memory(unsigned long start, unsigned long end) { - int i; - unsigned long start_aligned, end_aligned; + unsigned long i, start_aligned, end_aligned; int order = ilog2(BITS_PER_LONG); start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); From 17ff3c1fa4c7bd0c38d751715033023ebf32fc96 Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Thu, 10 May 2012 13:01:47 -0700 Subject: [PATCH 8/8] MAINTAINERS: add maintainer for LED subsystem Add Bryan Wu as the primary maintainer for drivers/leds Signed-off-by: Bryan Wu Acked-by: Richard Purdie Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 707163365a93..de4e2806030f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4034,6 +4034,7 @@ F: Documentation/scsi/53c700.txt F: drivers/scsi/53c700* LED SUBSYSTEM +M: Bryan Wu M: Richard Purdie S: Maintained F: drivers/leds/