From 01945fa248ff6d34f5fdb8106118910b77b76f91 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Fri, 27 Feb 2015 15:51:40 -0800 Subject: [PATCH 01/13] ocfs2: update web page + git tree in documentation We (the Ocfs2 project) recently moved the location of our ocfs2-tools git tree and project web page. The pertinent discussion can be seen here: https://oss.oracle.com/pipermail/ocfs2-devel/2015-February/010579.html The following patch updates the Ocfs2 documentation in MAINTAINERS, ocfs2.txt, and dlmfs.txt. I added our new official web page, changed the location of our tools git tree and removed the link to Joel's ancient kernel git tree - Andrew has handled our patches for a while now. Signed-off-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/dlmfs.txt | 4 ++-- Documentation/filesystems/ocfs2.txt | 4 ++-- MAINTAINERS | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/dlmfs.txt b/Documentation/filesystems/dlmfs.txt index 1b528b2ad809..fcf4d509d118 100644 --- a/Documentation/filesystems/dlmfs.txt +++ b/Documentation/filesystems/dlmfs.txt @@ -5,8 +5,8 @@ system. dlmfs is built with OCFS2 as it requires most of its infrastructure. -Project web page: http://oss.oracle.com/projects/ocfs2 -Tools web page: http://oss.oracle.com/projects/ocfs2-tools +Project web page: http://ocfs2.wiki.kernel.org +Tools web page: https://github.com/markfasheh/ocfs2-tools OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ All code copyright 2005 Oracle except when otherwise noted. diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 28f8c08201e2..4c49e5410595 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -8,8 +8,8 @@ also make it attractive for non-clustered use. You'll want to install the ocfs2-tools package in order to at least get "mount.ocfs2" and "ocfs2_hb_ctl". -Project web page: http://oss.oracle.com/projects/ocfs2 -Tools web page: http://oss.oracle.com/projects/ocfs2-tools +Project web page: http://ocfs2.wiki.kernel.org +Tools git tree: https://github.com/markfasheh/ocfs2-tools OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ All code copyright 2005 Oracle except when otherwise noted. diff --git a/MAINTAINERS b/MAINTAINERS index ddc5a8cf9a8a..eaf999638a65 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7213,8 +7213,7 @@ ORACLE CLUSTER FILESYSTEM 2 (OCFS2) M: Mark Fasheh M: Joel Becker L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers) -W: http://oss.oracle.com/projects/ocfs2/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2.git +W: http://ocfs2.wiki.kernel.org S: Supported F: Documentation/filesystems/ocfs2.txt F: Documentation/filesystems/dlmfs.txt From da616534ed7f6e8ffaab699258b55c8d78d0b4ea Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 27 Feb 2015 15:51:43 -0800 Subject: [PATCH 02/13] mm/nommu: fix memory leak Maxime reported the following memory leak regression due to commit dbc8358c7237 ("mm/nommu: use alloc_pages_exact() rather than its own implementation"). On v3.19, I am facing a memory leak. Each time I run a command one page is lost. Here an example with busybox's free command: / # free total used free shared buffers cached Mem: 7928 1972 5956 0 0 492 -/+ buffers/cache: 1480 6448 / # free total used free shared buffers cached Mem: 7928 1976 5952 0 0 492 -/+ buffers/cache: 1484 6444 / # free total used free shared buffers cached Mem: 7928 1980 5948 0 0 492 -/+ buffers/cache: 1488 6440 / # free total used free shared buffers cached Mem: 7928 1984 5944 0 0 492 -/+ buffers/cache: 1492 6436 / # free total used free shared buffers cached Mem: 7928 1988 5940 0 0 492 -/+ buffers/cache: 1496 6432 At some point, the system fails to sastisfy 256KB allocations: free: page allocation failure: order:6, mode:0xd0 CPU: 0 PID: 67 Comm: free Not tainted 3.19.0-05389-gacf2cf1-dirty #64 Hardware name: STM32 (Device Tree Support) show_stack+0xb/0xc warn_alloc_failed+0x97/0xbc __alloc_pages_nodemask+0x295/0x35c __get_free_pages+0xb/0x24 alloc_pages_exact+0x19/0x24 do_mmap_pgoff+0x423/0x658 vm_mmap_pgoff+0x3f/0x4e load_flat_file+0x20d/0x4f8 load_flat_binary+0x3f/0x26c search_binary_handler+0x51/0xe4 do_execveat_common+0x271/0x35c do_execve+0x19/0x1c ret_fast_syscall+0x1/0x4a Mem-info: Normal per-cpu: CPU 0: hi: 0, btch: 1 usd: 0 active_anon:0 inactive_anon:0 isolated_anon:0 active_file:0 inactive_file:0 isolated_file:0 unevictable:123 dirty:0 writeback:0 unstable:0 free:1515 slab_reclaimable:17 slab_unreclaimable:139 mapped:0 shmem:0 pagetables:0 bounce:0 free_cma:0 Normal free:6060kB min:352kB low:440kB high:528kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:492kB isolated(anon):0ks lowmem_reserve[]: 0 0 Normal: 23*4kB (U) 22*8kB (U) 24*16kB (U) 23*32kB (U) 23*64kB (U) 23*128kB (U) 1*256kB (U) 0*512kB 0*1024kB 0*2048kB 0*4096kB = 6060kB 123 total pagecache pages 2048 pages of RAM 1538 free pages 66 reserved pages 109 slab pages -46 pages shared 0 pages swap cached nommu: Allocation of length 221184 from process 67 (free) failed Normal per-cpu: CPU 0: hi: 0, btch: 1 usd: 0 active_anon:0 inactive_anon:0 isolated_anon:0 active_file:0 inactive_file:0 isolated_file:0 unevictable:123 dirty:0 writeback:0 unstable:0 free:1515 slab_reclaimable:17 slab_unreclaimable:139 mapped:0 shmem:0 pagetables:0 bounce:0 free_cma:0 Normal free:6060kB min:352kB low:440kB high:528kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:492kB isolated(anon):0ks lowmem_reserve[]: 0 0 Normal: 23*4kB (U) 22*8kB (U) 24*16kB (U) 23*32kB (U) 23*64kB (U) 23*128kB (U) 1*256kB (U) 0*512kB 0*1024kB 0*2048kB 0*4096kB = 6060kB 123 total pagecache pages Unable to allocate RAM for process text/data, errno 12 SEGV This problem happens because we allocate ordered page through __get_free_pages() in do_mmap_private() in some cases and we try to free individual pages rather than ordered page in free_page_series(). In this case, freeing pages whose refcount is not 0 won't be freed to the page allocator so memory leak happens. To fix the problem, this patch changes __get_free_pages() to alloc_pages_exact() since alloc_pages_exact() returns physically-contiguous pages but each pages are refcounted. Fixes: dbc8358c7237 ("mm/nommu: use alloc_pages_exact() rather than its own implementation"). Reported-by: Maxime Coquelin Tested-by: Maxime Coquelin Signed-off-by: Joonsoo Kim Cc: [3.19] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/nommu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/nommu.c b/mm/nommu.c index 7296360fc057..3e67e7538ecf 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1213,11 +1213,9 @@ static int do_mmap_private(struct vm_area_struct *vma, if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) { total = point; kdebug("try to alloc exact %lu pages", total); - base = alloc_pages_exact(len, GFP_KERNEL); - } else { - base = (void *)__get_free_pages(GFP_KERNEL, order); } + base = alloc_pages_exact(total << PAGE_SHIFT, GFP_KERNEL); if (!base) goto enomem; From 4e54dede38b45052a941bcf709f7d29f2e18174d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 27 Feb 2015 15:51:46 -0800 Subject: [PATCH 03/13] memcg: fix low limit calculation A memcg is considered low limited even when the current usage is equal to the low limit. This leads to interesting side effects e.g. groups/hierarchies with no memory accounted are considered protected and so the reclaim will emit MEMCG_LOW event when encountering them. Another and much bigger issue was reported by Joonsoo Kim. He has hit a NULL ptr dereference with the legacy cgroup API which even doesn't have low limit exposed. The limit is 0 by default but the initial check fails for memcg with 0 consumption and parent_mem_cgroup() would return NULL if use_hierarchy is 0 and so page_counter_read would try to dereference NULL. I suppose that the current implementation is just an overlook because the documentation in Documentation/cgroups/unified-hierarchy.txt says: "The memory.low boundary on the other hand is a top-down allocated reserve. A cgroup enjoys reclaim protection when it and all its ancestors are below their low boundaries" Fix the usage and the low limit comparision in mem_cgroup_low accordingly. Fixes: 241994ed8649 (mm: memcontrol: default hierarchy interface for memory) Reported-by: Joonsoo Kim Signed-off-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d18d3a6e7337..76c5a1b1dd21 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5426,7 +5426,7 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) if (memcg == root_mem_cgroup) return false; - if (page_counter_read(&memcg->memory) > memcg->low) + if (page_counter_read(&memcg->memory) >= memcg->low) return false; while (memcg != root) { @@ -5435,7 +5435,7 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) if (memcg == root_mem_cgroup) break; - if (page_counter_read(&memcg->memory) > memcg->low) + if (page_counter_read(&memcg->memory) >= memcg->low) return false; } return true; From 682354d4e0bf9a28f11b52ef9be7fb38b901bd5c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 27 Feb 2015 15:51:48 -0800 Subject: [PATCH 04/13] rtc: ds1685: fix ds1685_rtc_alarm_irq_enable build error The newly added ds1685 driver causes a build error when enabled without CONFIG_RTC_INTF_DEV: drivers/rtc/rtc-ds1685.c:919:22: error: 'ds1685_rtc_alarm_irq_enable' undeclared here (not in a function) .alarm_irq_enable = ds1685_rtc_alarm_irq_enable, Apparently the driver was incorrectly changed to reflect the interface change from 16380c153a69c ("RTC: Convert rtc drivers to use the alarm_irq_enable method"), which removed the respective #ifdef from all other rtc drivers. This does the same change that was merged for the other drivers before and removes the #ifdef, allowing the interrupts to be enabled through the in-kernel rtc interface independent of the existence of /dev/rtc. Fixes: aaaf5fbf56f ("rtc: add driver for DS1685 family of real time clocks") Signed-off-by: Arnd Bergmann Acked-by: Joshua Kinard Cc: Ralf Baechle Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-ds1685.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index 8c3bfcb115b7..5dc1ef9d0008 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -528,7 +528,6 @@ ds1685_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) /* ----------------------------------------------------------------------- */ /* /dev/rtcX Interface functions */ -#ifdef CONFIG_RTC_INTF_DEV /** * ds1685_rtc_alarm_irq_enable - replaces ioctl() RTC_AIE on/off. * @dev: pointer to device structure. @@ -557,7 +556,6 @@ ds1685_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) return 0; } -#endif /* ----------------------------------------------------------------------- */ From 39ea34cc07fb1ca8059239290967e006bbdacceb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 27 Feb 2015 15:51:51 -0800 Subject: [PATCH 05/13] rtc: ds1685: remove superfluous checks for out-of-range u8 values drivers/rtc/rtc-ds1685.c: In function `ds1685_rtc_read_alarm': drivers/rtc/rtc-ds1685.c:402: warning: comparison is always true due to limited range of data type drivers/rtc/rtc-ds1685.c:409: warning: comparison is always true due to limited range of data type drivers/rtc/rtc-ds1685.c:416: warning: comparison is always true due to limited range of data type drivers/rtc/rtc-ds1685.c: In function `ds1685_rtc_set_alarm': drivers/rtc/rtc-ds1685.c:475: warning: comparison is always true due to limited range of data type drivers/rtc/rtc-ds1685.c:478: warning: comparison is always true due to limited range of data type drivers/rtc/rtc-ds1685.c:481: warning: comparison is always true due to limited range of data type u8 cannot contain a value larger than 0xff, hence drop the checks. Wrapping the checks in unlikely() indicated some sense of humor, though ;-) Signed-off-by: Geert Uytterhoeven Acked-by: Joshua Kinard Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-ds1685.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index 5dc1ef9d0008..29b461c76072 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -399,21 +399,21 @@ ds1685_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) * of this RTC chip. We check for it anyways in case support is * added in the future. */ - if (unlikely((seconds >= 0xc0) && (seconds <= 0xff))) + if (unlikely(seconds >= 0xc0)) alrm->time.tm_sec = -1; else alrm->time.tm_sec = ds1685_rtc_bcd2bin(rtc, seconds, RTC_SECS_BCD_MASK, RTC_SECS_BIN_MASK); - if (unlikely((minutes >= 0xc0) && (minutes <= 0xff))) + if (unlikely(minutes >= 0xc0)) alrm->time.tm_min = -1; else alrm->time.tm_min = ds1685_rtc_bcd2bin(rtc, minutes, RTC_MINS_BCD_MASK, RTC_MINS_BIN_MASK); - if (unlikely((hours >= 0xc0) && (hours <= 0xff))) + if (unlikely(hours >= 0xc0)) alrm->time.tm_hour = -1; else alrm->time.tm_hour = ds1685_rtc_bcd2bin(rtc, hours, @@ -472,13 +472,13 @@ ds1685_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) * field, and we only support four fields. We put the support * here anyways for the future. */ - if (unlikely((seconds >= 0xc0) && (seconds <= 0xff))) + if (unlikely(seconds >= 0xc0)) seconds = 0xff; - if (unlikely((minutes >= 0xc0) && (minutes <= 0xff))) + if (unlikely(minutes >= 0xc0)) minutes = 0xff; - if (unlikely((hours >= 0xc0) && (hours <= 0xff))) + if (unlikely(hours >= 0xc0)) hours = 0xff; alrm->time.tm_mon = -1; From 586a1a125eb157f0c2b9925ba878f24f4fe0c667 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 27 Feb 2015 15:51:53 -0800 Subject: [PATCH 06/13] scripts/gdb: add empty package initialization script This got lost during the initial merge process: Python requires an __init__.py script, even if empty, in order to accept a directory as package. Add it, this time as a non-empty file. Signed-off-by: Jan Kiszka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/gdb/linux/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 scripts/gdb/linux/__init__.py diff --git a/scripts/gdb/linux/__init__.py b/scripts/gdb/linux/__init__.py new file mode 100644 index 000000000000..4680fb176337 --- /dev/null +++ b/scripts/gdb/linux/__init__.py @@ -0,0 +1 @@ +# nothing to do for the initialization of this package From 957ed60b53b519064a54988c4e31e0087e47d091 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 27 Feb 2015 15:51:56 -0800 Subject: [PATCH 07/13] nilfs2: fix potential memory overrun on inode Each inode of nilfs2 stores a root node of a b-tree, and it turned out to have a memory overrun issue: Each b-tree node of nilfs2 stores a set of key-value pairs and the number of them (in "bn_nchildren" member of nilfs_btree_node struct), as well as a few other "bn_*" members. Since the value of "bn_nchildren" is used for operations on the key-values within the b-tree node, it can cause memory access overrun if a large number is incorrectly set to "bn_nchildren". For instance, nilfs_btree_node_lookup() function determines the range of binary search with it, and too large "bn_nchildren" leads nilfs_btree_node_get_key() in that function to overrun. As for intermediate b-tree nodes, this is prevented by a sanity check performed when each node is read from a drive, however, no sanity check has been done for root nodes stored in inodes. This patch fixes the issue by adding missing sanity check against b-tree root nodes so that it's called when on-memory inodes are read from ifile, inode metadata file. Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/btree.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index b2e3ff347620..ecdbae19a766 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -31,6 +31,8 @@ #include "alloc.h" #include "dat.h" +static void __nilfs_btree_init(struct nilfs_bmap *bmap); + static struct nilfs_btree_path *nilfs_btree_alloc_path(void) { struct nilfs_btree_path *path; @@ -368,6 +370,34 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, return ret; } +/** + * nilfs_btree_root_broken - verify consistency of btree root node + * @node: btree root node to be examined + * @ino: inode number + * + * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. + */ +static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, + unsigned long ino) +{ + int level, flags, nchildren; + int ret = 0; + + level = nilfs_btree_node_get_level(node); + flags = nilfs_btree_node_get_flags(node); + nchildren = nilfs_btree_node_get_nchildren(node); + + if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || + level > NILFS_BTREE_LEVEL_MAX || + nchildren < 0 || + nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { + pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", + ino, level, flags, nchildren); + ret = 1; + } + return ret; +} + int nilfs_btree_broken_node_block(struct buffer_head *bh) { int ret; @@ -1713,7 +1743,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, /* convert and insert */ dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; - nilfs_btree_init(btree); + __nilfs_btree_init(btree); if (nreq != NULL) { nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); @@ -2294,12 +2324,23 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_gather_data = NULL, }; -int nilfs_btree_init(struct nilfs_bmap *bmap) +static void __nilfs_btree_init(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops; bmap->b_nchildren_per_block = NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); - return 0; +} + +int nilfs_btree_init(struct nilfs_bmap *bmap) +{ + int ret = 0; + + __nilfs_btree_init(bmap); + + if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), + bmap->b_inode->i_ino)) + ret = -EIO; + return ret; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) From b00eeaedece2e8cb1607cb015f10e572e2607c49 Mon Sep 17 00:00:00 2001 From: Joshua Kinard Date: Fri, 27 Feb 2015 15:51:59 -0800 Subject: [PATCH 08/13] drivers/rtc/rtc-ds1685.c: fix conditional in ds1685_rtc_sysfs_time_regs_{show,store} Fix a conditional statement checking for NULL in both ds1685_rtc_sysfs_time_regs_show and ds1685_rtc_sysfs_time_regs_store that was using a logical AND when it should be using a logical OR so that we fail out of the function properly if the condition ever evaluates to true. Fixes: aaaf5fbf56f1 ("rtc: add driver for DS1685 family of real time clocks") Signed-off-by: Joshua Kinard Reported-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-ds1685.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index 29b461c76072..803869c7d7c2 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -1610,7 +1610,7 @@ ds1685_rtc_sysfs_time_regs_show(struct device *dev, ds1685_rtc_sysfs_time_regs_lookup(attr->attr.name, false); /* Make sure we actually matched something. */ - if (!bcd_reg_info && !bin_reg_info) + if (!bcd_reg_info || !bin_reg_info) return -EINVAL; /* bcd_reg_info->reg == bin_reg_info->reg. */ @@ -1648,7 +1648,7 @@ ds1685_rtc_sysfs_time_regs_store(struct device *dev, return -EINVAL; /* Make sure we actually matched something. */ - if (!bcd_reg_info && !bin_reg_info) + if (!bcd_reg_info || !bin_reg_info) return -EINVAL; /* Check for a valid range. */ From 2ea55a2caee016daee511431217861fb04767b27 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 27 Feb 2015 15:52:01 -0800 Subject: [PATCH 09/13] zram: use proper type to update max_used_pages max_used_pages is defined as atomic_long_t so we need to use unsigned long to keep temporary value for it rather than int which is smaller than unsigned long in a 64 bit system. Signed-off-by: Joonsoo Kim Cc: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8e233edd7a09..871bd3550cb0 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -528,7 +528,7 @@ out_cleanup: static inline void update_used_max(struct zram *zram, const unsigned long pages) { - int old_max, cur_max; + unsigned long old_max, cur_max; old_max = atomic_long_read(&zram->stats.max_used_pages); From d2973697b377e8b061e179c6057e94151759a73d Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 27 Feb 2015 15:52:04 -0800 Subject: [PATCH 10/13] mm: memcontrol: use "max" instead of "infinity" in control knobs The memcg control knobs indicate the highest possible value using the symbolic name "infinity", which is long and awkward to type. Switch to the string "max", which is just as descriptive but shorter and sweeter. This changes a user interface, so do it before the release and before the development flag is dropped from the default hierarchy. Signed-off-by: Johannes Weiner Cc: Michal Hocko Cc: Tejun Heo Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/unified-hierarchy.txt | 4 ++-- mm/memcontrol.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt index 71daa35ec2d9..eb102fb72213 100644 --- a/Documentation/cgroups/unified-hierarchy.txt +++ b/Documentation/cgroups/unified-hierarchy.txt @@ -404,8 +404,8 @@ supported and the interface files "release_agent" and be understood as an underflow into the highest possible value, -2 or -10M etc. do not work, so it's not consistent. - memory.low, memory.high, and memory.max will use the string - "infinity" to indicate and set the highest possible value. + memory.low, memory.high, and memory.max will use the string "max" to + indicate and set the highest possible value. 5. Planned Changes diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 76c5a1b1dd21..9fe07692eaad 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5247,7 +5247,7 @@ static int memory_low_show(struct seq_file *m, void *v) unsigned long low = ACCESS_ONCE(memcg->low); if (low == PAGE_COUNTER_MAX) - seq_puts(m, "infinity\n"); + seq_puts(m, "max\n"); else seq_printf(m, "%llu\n", (u64)low * PAGE_SIZE); @@ -5262,7 +5262,7 @@ static ssize_t memory_low_write(struct kernfs_open_file *of, int err; buf = strstrip(buf); - err = page_counter_memparse(buf, "infinity", &low); + err = page_counter_memparse(buf, "max", &low); if (err) return err; @@ -5277,7 +5277,7 @@ static int memory_high_show(struct seq_file *m, void *v) unsigned long high = ACCESS_ONCE(memcg->high); if (high == PAGE_COUNTER_MAX) - seq_puts(m, "infinity\n"); + seq_puts(m, "max\n"); else seq_printf(m, "%llu\n", (u64)high * PAGE_SIZE); @@ -5292,7 +5292,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, int err; buf = strstrip(buf); - err = page_counter_memparse(buf, "infinity", &high); + err = page_counter_memparse(buf, "max", &high); if (err) return err; @@ -5307,7 +5307,7 @@ static int memory_max_show(struct seq_file *m, void *v) unsigned long max = ACCESS_ONCE(memcg->memory.limit); if (max == PAGE_COUNTER_MAX) - seq_puts(m, "infinity\n"); + seq_puts(m, "max\n"); else seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE); @@ -5322,7 +5322,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, int err; buf = strstrip(buf); - err = page_counter_memparse(buf, "infinity", &max); + err = page_counter_memparse(buf, "max", &max); if (err) return err; From 39afb5ee4640b4ed2cdd9e12b2a67cf785cfced8 Mon Sep 17 00:00:00 2001 From: Jon DeVree Date: Fri, 27 Feb 2015 15:52:07 -0800 Subject: [PATCH 11/13] kernel/sys.c: fix UNAME26 for 4.0 There's a uname workaround for broken userspace which can't handle kernel versions of 3.x. Update it for 4.x. Signed-off-by: Jon DeVree Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sys.c b/kernel/sys.c index 667b2e62fad2..a03d9cd23ed7 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1108,6 +1108,7 @@ DECLARE_RWSEM(uts_sem); /* * Work around broken programs that cannot handle "Linux 3.0". * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 + * And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60. */ static int override_release(char __user *release, size_t len) { @@ -1127,7 +1128,7 @@ static int override_release(char __user *release, size_t len) break; rest++; } - v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; + v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 60; copy = clamp_t(size_t, len, 1, sizeof(buf)); copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); ret = copy_to_user(release, buf, copy + 1); From cc87317726f851531ae8422e0c2d3d6e2d7b1955 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 27 Feb 2015 15:52:09 -0800 Subject: [PATCH 12/13] mm: page_alloc: revert inadvertent !__GFP_FS retry behavior change Historically, !__GFP_FS allocations were not allowed to invoke the OOM killer once reclaim had failed, but nevertheless kept looping in the allocator. Commit 9879de7373fc ("mm: page_alloc: embed OOM killing naturally into allocation slowpath"), which should have been a simple cleanup patch, accidentally changed the behavior to aborting the allocation at that point. This creates problems with filesystem callers (?) that currently rely on the allocator waiting for other tasks to intervene. Revert the behavior as it shouldn't have been changed as part of a cleanup patch. Fixes: 9879de7373fc ("mm: page_alloc: embed OOM killing naturally into allocation slowpath") Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reported-by: Tetsuo Handa Cc: Theodore Ts'o Cc: Dave Chinner Acked-by: David Rientjes Cc: Oleg Nesterov Cc: Mel Gorman Cc: [3.19.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a47f0b229a1a..7abfa70cdc1a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2353,8 +2353,15 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, if (ac->high_zoneidx < ZONE_NORMAL) goto out; /* The OOM killer does not compensate for light reclaim */ - if (!(gfp_mask & __GFP_FS)) + if (!(gfp_mask & __GFP_FS)) { + /* + * XXX: Page reclaim didn't yield anything, + * and the OOM killer can't be invoked, but + * keep looping as per should_alloc_retry(). + */ + *did_some_progress = 1; goto out; + } /* * GFP_THISNODE contains __GFP_NORETRY and we never hit this. * Sanity check for bare calls of __GFP_THISNODE, not real OOM. From c07af4f1ce413d009ea76ee69099f06f73ce75b2 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 27 Feb 2015 15:52:12 -0800 Subject: [PATCH 13/13] mm: add missing __PAGETABLE_{PUD,PMD}_FOLDED defines Core mm expects __PAGETABLE_{PUD,PMD}_FOLDED to be defined if these page table levels folded. Usually, these defines are provided by and . But some architectures fold page table levels in a custom way. They need to define these macros themself. This patch adds missing defines. The patch fixes mm->nr_pmds underflow and eliminates dead __pmd_alloc() and __pud_alloc() on architectures without these page table levels. Signed-off-by: Kirill A. Shutemov Cc: Aaro Koskinen Cc: David Howells Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Koichi Yasutake Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/include/asm/pgtable.h | 2 ++ arch/m32r/include/asm/pgtable-2level.h | 1 + arch/m68k/include/asm/pgtable_mm.h | 2 ++ arch/mn10300/include/asm/pgtable.h | 2 ++ arch/parisc/include/asm/pgtable.h | 1 + arch/s390/include/asm/pgtable.h | 2 ++ 6 files changed, 10 insertions(+) diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h index 93bcf2abd1a1..07d7a7ef8bd5 100644 --- a/arch/frv/include/asm/pgtable.h +++ b/arch/frv/include/asm/pgtable.h @@ -123,12 +123,14 @@ extern unsigned long empty_zero_page; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) #define PTRS_PER_PGD 64 +#define __PAGETABLE_PUD_FOLDED #define PUD_SHIFT 26 #define PTRS_PER_PUD 1 #define PUD_SIZE (1UL << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE - 1)) #define PUE_SIZE 256 +#define __PAGETABLE_PMD_FOLDED #define PMD_SHIFT 26 #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE - 1)) diff --git a/arch/m32r/include/asm/pgtable-2level.h b/arch/m32r/include/asm/pgtable-2level.h index 8fd8ee70266a..421e6ba3a173 100644 --- a/arch/m32r/include/asm/pgtable-2level.h +++ b/arch/m32r/include/asm/pgtable-2level.h @@ -13,6 +13,7 @@ * the M32R is two-level, so we don't really have any * PMD directory physically. */ +#define __PAGETABLE_PMD_FOLDED #define PMD_SHIFT 22 #define PTRS_PER_PMD 1 diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index 28a145bfbb71..35ed4a9981ae 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -54,10 +54,12 @@ */ #ifdef CONFIG_SUN3 #define PTRS_PER_PTE 16 +#define __PAGETABLE_PMD_FOLDED #define PTRS_PER_PMD 1 #define PTRS_PER_PGD 2048 #elif defined(CONFIG_COLDFIRE) #define PTRS_PER_PTE 512 +#define __PAGETABLE_PMD_FOLDED #define PTRS_PER_PMD 1 #define PTRS_PER_PGD 1024 #else diff --git a/arch/mn10300/include/asm/pgtable.h b/arch/mn10300/include/asm/pgtable.h index afab728ab65e..96d3f9deb59c 100644 --- a/arch/mn10300/include/asm/pgtable.h +++ b/arch/mn10300/include/asm/pgtable.h @@ -56,7 +56,9 @@ extern void paging_init(void); #define PGDIR_SHIFT 22 #define PTRS_PER_PGD 1024 #define PTRS_PER_PUD 1 /* we don't really have any PUD physically */ +#define __PAGETABLE_PUD_FOLDED #define PTRS_PER_PMD 1 /* we don't really have any PMD physically */ +#define __PAGETABLE_PMD_FOLDED #define PTRS_PER_PTE 1024 #define PGD_SIZE PAGE_SIZE diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 8c966b2270aa..15207b9362bf 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -96,6 +96,7 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); #if PT_NLEVELS == 3 #define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) #else +#define __PAGETABLE_PMD_FOLDED #define BITS_PER_PMD 0 #endif #define PTRS_PER_PMD (1UL << BITS_PER_PMD) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index fbb5ee3ae57c..e08ec38f8c6e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -91,7 +91,9 @@ extern unsigned long zero_page_mask; */ #define PTRS_PER_PTE 256 #ifndef CONFIG_64BIT +#define __PAGETABLE_PUD_FOLDED #define PTRS_PER_PMD 1 +#define __PAGETABLE_PMD_FOLDED #define PTRS_PER_PUD 1 #else /* CONFIG_64BIT */ #define PTRS_PER_PMD 2048