ae687e58b3
When we map pages in the buffer cache, we can do so in GFP_NOFS contexts. However, the vmap interfaces do not provide any method of communicating this information to memory reclaim, and hence we get lockdep complaining about it regularly and occassionally see hangs that may be vmap related reclaim deadlocks. We can also see these same problems from anywhere where we use vmalloc for a large buffer (e.g. attribute code) inside a transaction context. A typical lockdep report shows up as a reclaim state warning like so: [14046.101458] ================================= [14046.102850] [ INFO: inconsistent lock state ] [14046.102850] 3.14.0-rc4+ #2 Not tainted [14046.102850] --------------------------------- [14046.102850] inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. [14046.102850] kswapd0/14 [HC0[0]:SC0[0]:HE1:SE1] takes: [14046.102850] (&xfs_dir_ilock_class){++++?+}, at: [<791a04bb>] xfs_ilock+0xff/0x16a [14046.102850] {RECLAIM_FS-ON-W} state was registered at: [14046.102850] [<7904cdb1>] mark_held_locks+0x81/0xe7 [14046.102850] [<7904d390>] lockdep_trace_alloc+0x5c/0xb4 [14046.102850] [<790c2c28>] kmem_cache_alloc_trace+0x2b/0x11e [14046.102850] [<790ba7f4>] vm_map_ram+0x119/0x3e6 [14046.102850] [<7914e124>] _xfs_buf_map_pages+0x5b/0xcf [14046.102850] [<7914ed74>] xfs_buf_get_map+0x67/0x13f [14046.102850] [<7917506f>] xfs_attr_rmtval_set+0x396/0x4d5 [14046.102850] [<7916e8bb>] xfs_attr_leaf_addname+0x18f/0x37d [14046.102850] [<7916ed9e>] xfs_attr_set_int+0x2f5/0x3e8 [14046.102850] [<7916eefc>] xfs_attr_set+0x6b/0x74 [14046.102850] [<79168355>] xfs_xattr_set+0x61/0x81 [14046.102850] [<790e5b10>] generic_setxattr+0x59/0x68 [14046.102850] [<790e4c06>] __vfs_setxattr_noperm+0x58/0xce [14046.102850] [<790e4d0a>] vfs_setxattr+0x8e/0x92 [14046.102850] [<790e4ddd>] setxattr+0xcf/0x159 [14046.102850] [<790e5423>] SyS_lsetxattr+0x88/0xbb [14046.102850] [<79268438>] sysenter_do_call+0x12/0x36 Now, we can't completely remove these traces - mainly because vm_map_ram() will do GFP_KERNEL allocation and that generates the above warning before we get into the reclaim code, but we can turn them all into false positive warnings. To do that, use the method that DM and other IO context code uses to avoid this problem: there is a process flag to tell memory reclaim not to do IO that we can set appropriately. That prevents GFP_KERNEL context reclaim being done from deep inside the vmalloc code in places we can't directly pass a GFP_NOFS context to. That interface has a pair of wrapper functions: memalloc_noio_save() and memalloc_noio_restore(). Adding them around vm_map_ram and the vzalloc call in kmem_alloc_large() will prevent deadlocks and most lockdep reports for this issue. Also, convert the vzalloc() call in kmem_alloc_large() to use __vmalloc() so that we can pass the correct gfp context to the data page allocation routine inside __vmalloc() so that it is clear that GFP_NOFS context is important to this vmalloc call. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
139 lines
3.3 KiB
C
139 lines
3.3 KiB
C
/*
|
|
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it would be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
#include <linux/mm.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/backing-dev.h>
|
|
#include "time.h"
|
|
#include "kmem.h"
|
|
#include "xfs_message.h"
|
|
|
|
/*
|
|
* Greedy allocation. May fail and may return vmalloced memory.
|
|
*/
|
|
void *
|
|
kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
|
|
{
|
|
void *ptr;
|
|
size_t kmsize = maxsize;
|
|
|
|
while (!(ptr = vzalloc(kmsize))) {
|
|
if ((kmsize >>= 1) <= minsize)
|
|
kmsize = minsize;
|
|
}
|
|
if (ptr)
|
|
*size = kmsize;
|
|
return ptr;
|
|
}
|
|
|
|
void *
|
|
kmem_alloc(size_t size, xfs_km_flags_t flags)
|
|
{
|
|
int retries = 0;
|
|
gfp_t lflags = kmem_flags_convert(flags);
|
|
void *ptr;
|
|
|
|
do {
|
|
ptr = kmalloc(size, lflags);
|
|
if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
|
|
return ptr;
|
|
if (!(++retries % 100))
|
|
xfs_err(NULL,
|
|
"possible memory allocation deadlock in %s (mode:0x%x)",
|
|
__func__, lflags);
|
|
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
} while (1);
|
|
}
|
|
|
|
void *
|
|
kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
|
|
{
|
|
unsigned noio_flag = 0;
|
|
void *ptr;
|
|
gfp_t lflags;
|
|
|
|
ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
|
|
if (ptr)
|
|
return ptr;
|
|
|
|
/*
|
|
* __vmalloc() will allocate data pages and auxillary structures (e.g.
|
|
* pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context
|
|
* here. Hence we need to tell memory reclaim that we are in such a
|
|
* context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering
|
|
* the filesystem here and potentially deadlocking.
|
|
*/
|
|
if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
|
|
noio_flag = memalloc_noio_save();
|
|
|
|
lflags = kmem_flags_convert(flags);
|
|
ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
|
|
|
|
if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
|
|
memalloc_noio_restore(noio_flag);
|
|
|
|
return ptr;
|
|
}
|
|
|
|
void
|
|
kmem_free(const void *ptr)
|
|
{
|
|
if (!is_vmalloc_addr(ptr)) {
|
|
kfree(ptr);
|
|
} else {
|
|
vfree(ptr);
|
|
}
|
|
}
|
|
|
|
void *
|
|
kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
|
|
xfs_km_flags_t flags)
|
|
{
|
|
void *new;
|
|
|
|
new = kmem_alloc(newsize, flags);
|
|
if (ptr) {
|
|
if (new)
|
|
memcpy(new, ptr,
|
|
((oldsize < newsize) ? oldsize : newsize));
|
|
kmem_free(ptr);
|
|
}
|
|
return new;
|
|
}
|
|
|
|
void *
|
|
kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags)
|
|
{
|
|
int retries = 0;
|
|
gfp_t lflags = kmem_flags_convert(flags);
|
|
void *ptr;
|
|
|
|
do {
|
|
ptr = kmem_cache_alloc(zone, lflags);
|
|
if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
|
|
return ptr;
|
|
if (!(++retries % 100))
|
|
xfs_err(NULL,
|
|
"possible memory allocation deadlock in %s (mode:0x%x)",
|
|
__func__, lflags);
|
|
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
} while (1);
|
|
}
|