tmpfs: convert shmem_getpage_gfp to radix-swap
Convert shmem_getpage_gfp(), the engine-room of shmem, to expect page or swap entry returned from radix tree by find_lock_page(). Whereas the repetitive old method proceeded mainly under info->lock, dropping and repeating whenever one of the conditions needed was not met, now we can proceed without it, leaving shmem_add_to_page_cache() to check for a race. This way there is no need to preallocate a page, no need for an early radix_tree_preload(), no need for mem_cgroup_shmem_charge_fallback(). Move the error unwinding down to the bottom instead of repeating it throughout. ENOSPC handling is a little different from before: there is no longer any race between find_lock_page() and finding swap, but we can arrive at ENOSPC before calling shmem_recalc_inode(), which might occasionally discover freed space. Be stricter to check i_size before returning. info->lock is used for little but alloced, swapped, i_blocks updates. Move i_blocks updates out from under the max_blocks check, so even an unlimited size=0 mount can show accurate du. Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
46f65ec15c
commit
54af604218
259
mm/shmem.c
259
mm/shmem.c
|
@ -166,15 +166,6 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
|
||||||
static LIST_HEAD(shmem_swaplist);
|
static LIST_HEAD(shmem_swaplist);
|
||||||
static DEFINE_MUTEX(shmem_swaplist_mutex);
|
static DEFINE_MUTEX(shmem_swaplist_mutex);
|
||||||
|
|
||||||
static void shmem_free_blocks(struct inode *inode, long pages)
|
|
||||||
{
|
|
||||||
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
|
||||||
if (sbinfo->max_blocks) {
|
|
||||||
percpu_counter_add(&sbinfo->used_blocks, -pages);
|
|
||||||
inode->i_blocks -= pages*BLOCKS_PER_PAGE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int shmem_reserve_inode(struct super_block *sb)
|
static int shmem_reserve_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
|
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
|
||||||
|
@ -219,9 +210,12 @@ static void shmem_recalc_inode(struct inode *inode)
|
||||||
|
|
||||||
freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
|
freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
|
||||||
if (freed > 0) {
|
if (freed > 0) {
|
||||||
|
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
||||||
|
if (sbinfo->max_blocks)
|
||||||
|
percpu_counter_add(&sbinfo->used_blocks, -freed);
|
||||||
info->alloced -= freed;
|
info->alloced -= freed;
|
||||||
|
inode->i_blocks -= freed * BLOCKS_PER_PAGE;
|
||||||
shmem_unacct_blocks(info->flags, freed);
|
shmem_unacct_blocks(info->flags, freed);
|
||||||
shmem_free_blocks(inode, freed);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -888,205 +882,180 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
|
||||||
struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
|
struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
|
||||||
{
|
{
|
||||||
struct address_space *mapping = inode->i_mapping;
|
struct address_space *mapping = inode->i_mapping;
|
||||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
struct shmem_inode_info *info;
|
||||||
struct shmem_sb_info *sbinfo;
|
struct shmem_sb_info *sbinfo;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
struct page *prealloc_page = NULL;
|
|
||||||
swp_entry_t swap;
|
swp_entry_t swap;
|
||||||
int error;
|
int error;
|
||||||
|
int once = 0;
|
||||||
|
|
||||||
if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
|
if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
|
||||||
return -EFBIG;
|
return -EFBIG;
|
||||||
repeat:
|
repeat:
|
||||||
|
swap.val = 0;
|
||||||
page = find_lock_page(mapping, index);
|
page = find_lock_page(mapping, index);
|
||||||
if (page) {
|
if (radix_tree_exceptional_entry(page)) {
|
||||||
|
swap = radix_to_swp_entry(page);
|
||||||
|
page = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sgp != SGP_WRITE &&
|
||||||
|
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
|
||||||
|
error = -EINVAL;
|
||||||
|
goto failed;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (page || (sgp == SGP_READ && !swap.val)) {
|
||||||
/*
|
/*
|
||||||
* Once we can get the page lock, it must be uptodate:
|
* Once we can get the page lock, it must be uptodate:
|
||||||
* if there were an error in reading back from swap,
|
* if there were an error in reading back from swap,
|
||||||
* the page would not be inserted into the filecache.
|
* the page would not be inserted into the filecache.
|
||||||
*/
|
*/
|
||||||
BUG_ON(!PageUptodate(page));
|
BUG_ON(page && !PageUptodate(page));
|
||||||
goto done;
|
*pagep = page;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to preload while we can wait, to not make a habit of
|
* Fast cache lookup did not find it:
|
||||||
* draining atomic reserves; but don't latch on to this cpu.
|
* bring it back from swap or allocate.
|
||||||
*/
|
*/
|
||||||
error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
|
info = SHMEM_I(inode);
|
||||||
if (error)
|
sbinfo = SHMEM_SB(inode->i_sb);
|
||||||
goto out;
|
|
||||||
radix_tree_preload_end();
|
|
||||||
|
|
||||||
if (sgp != SGP_READ && !prealloc_page) {
|
|
||||||
prealloc_page = shmem_alloc_page(gfp, info, index);
|
|
||||||
if (prealloc_page) {
|
|
||||||
SetPageSwapBacked(prealloc_page);
|
|
||||||
if (mem_cgroup_cache_charge(prealloc_page,
|
|
||||||
current->mm, GFP_KERNEL)) {
|
|
||||||
page_cache_release(prealloc_page);
|
|
||||||
prealloc_page = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_lock(&info->lock);
|
|
||||||
shmem_recalc_inode(inode);
|
|
||||||
swap = shmem_get_swap(info, index);
|
|
||||||
if (swap.val) {
|
if (swap.val) {
|
||||||
/* Look it up and read it in.. */
|
/* Look it up and read it in.. */
|
||||||
page = lookup_swap_cache(swap);
|
page = lookup_swap_cache(swap);
|
||||||
if (!page) {
|
if (!page) {
|
||||||
spin_unlock(&info->lock);
|
|
||||||
/* here we actually do the io */
|
/* here we actually do the io */
|
||||||
if (fault_type)
|
if (fault_type)
|
||||||
*fault_type |= VM_FAULT_MAJOR;
|
*fault_type |= VM_FAULT_MAJOR;
|
||||||
page = shmem_swapin(swap, gfp, info, index);
|
page = shmem_swapin(swap, gfp, info, index);
|
||||||
if (!page) {
|
if (!page) {
|
||||||
swp_entry_t nswap = shmem_get_swap(info, index);
|
|
||||||
if (nswap.val == swap.val) {
|
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
goto out;
|
goto failed;
|
||||||
}
|
}
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
wait_on_page_locked(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
goto repeat;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We have to do this with page locked to prevent races */
|
/* We have to do this with page locked to prevent races */
|
||||||
if (!trylock_page(page)) {
|
lock_page(page);
|
||||||
spin_unlock(&info->lock);
|
|
||||||
wait_on_page_locked(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
if (PageWriteback(page)) {
|
|
||||||
spin_unlock(&info->lock);
|
|
||||||
wait_on_page_writeback(page);
|
|
||||||
unlock_page(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
if (!PageUptodate(page)) {
|
if (!PageUptodate(page)) {
|
||||||
spin_unlock(&info->lock);
|
|
||||||
unlock_page(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
error = -EIO;
|
error = -EIO;
|
||||||
goto out;
|
goto failed;
|
||||||
|
}
|
||||||
|
wait_on_page_writeback(page);
|
||||||
|
|
||||||
|
/* Someone may have already done it for us */
|
||||||
|
if (page->mapping) {
|
||||||
|
if (page->mapping == mapping &&
|
||||||
|
page->index == index)
|
||||||
|
goto done;
|
||||||
|
error = -EEXIST;
|
||||||
|
goto failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = add_to_page_cache_locked(page, mapping,
|
error = shmem_add_to_page_cache(page, mapping, index,
|
||||||
index, GFP_NOWAIT);
|
gfp, swp_to_radix_entry(swap));
|
||||||
if (error) {
|
if (error)
|
||||||
|
goto failed;
|
||||||
|
|
||||||
|
spin_lock(&info->lock);
|
||||||
|
info->swapped--;
|
||||||
|
shmem_recalc_inode(inode);
|
||||||
spin_unlock(&info->lock);
|
spin_unlock(&info->lock);
|
||||||
if (error == -ENOMEM) {
|
|
||||||
/*
|
|
||||||
* reclaim from proper memory cgroup and
|
|
||||||
* call memcg's OOM if needed.
|
|
||||||
*/
|
|
||||||
error = mem_cgroup_shmem_charge_fallback(
|
|
||||||
page, current->mm, gfp);
|
|
||||||
if (error) {
|
|
||||||
unlock_page(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
unlock_page(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
|
|
||||||
delete_from_swap_cache(page);
|
delete_from_swap_cache(page);
|
||||||
shmem_put_swap(info, index, (swp_entry_t){0});
|
|
||||||
info->swapped--;
|
|
||||||
spin_unlock(&info->lock);
|
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
swap_free(swap);
|
swap_free(swap);
|
||||||
|
|
||||||
} else if (sgp == SGP_READ) {
|
} else {
|
||||||
page = find_get_page(mapping, index);
|
if (shmem_acct_block(info->flags)) {
|
||||||
if (page && !trylock_page(page)) {
|
error = -ENOSPC;
|
||||||
spin_unlock(&info->lock);
|
goto failed;
|
||||||
wait_on_page_locked(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
goto repeat;
|
|
||||||
}
|
}
|
||||||
spin_unlock(&info->lock);
|
|
||||||
|
|
||||||
} else if (prealloc_page) {
|
|
||||||
sbinfo = SHMEM_SB(inode->i_sb);
|
|
||||||
if (sbinfo->max_blocks) {
|
if (sbinfo->max_blocks) {
|
||||||
if (percpu_counter_compare(&sbinfo->used_blocks,
|
if (percpu_counter_compare(&sbinfo->used_blocks,
|
||||||
sbinfo->max_blocks) >= 0 ||
|
sbinfo->max_blocks) >= 0) {
|
||||||
shmem_acct_block(info->flags))
|
error = -ENOSPC;
|
||||||
goto nospace;
|
goto unacct;
|
||||||
|
}
|
||||||
percpu_counter_inc(&sbinfo->used_blocks);
|
percpu_counter_inc(&sbinfo->used_blocks);
|
||||||
inode->i_blocks += BLOCKS_PER_PAGE;
|
|
||||||
} else if (shmem_acct_block(info->flags))
|
|
||||||
goto nospace;
|
|
||||||
|
|
||||||
page = prealloc_page;
|
|
||||||
prealloc_page = NULL;
|
|
||||||
|
|
||||||
swap = shmem_get_swap(info, index);
|
|
||||||
if (swap.val)
|
|
||||||
mem_cgroup_uncharge_cache_page(page);
|
|
||||||
else
|
|
||||||
error = add_to_page_cache_lru(page, mapping,
|
|
||||||
index, GFP_NOWAIT);
|
|
||||||
/*
|
|
||||||
* At add_to_page_cache_lru() failure,
|
|
||||||
* uncharge will be done automatically.
|
|
||||||
*/
|
|
||||||
if (swap.val || error) {
|
|
||||||
shmem_unacct_blocks(info->flags, 1);
|
|
||||||
shmem_free_blocks(inode, 1);
|
|
||||||
spin_unlock(&info->lock);
|
|
||||||
page_cache_release(page);
|
|
||||||
goto repeat;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
page = shmem_alloc_page(gfp, info, index);
|
||||||
|
if (!page) {
|
||||||
|
error = -ENOMEM;
|
||||||
|
goto decused;
|
||||||
|
}
|
||||||
|
|
||||||
|
SetPageSwapBacked(page);
|
||||||
|
__set_page_locked(page);
|
||||||
|
error = shmem_add_to_page_cache(page, mapping, index,
|
||||||
|
gfp, NULL);
|
||||||
|
if (error)
|
||||||
|
goto decused;
|
||||||
|
lru_cache_add_anon(page);
|
||||||
|
|
||||||
|
spin_lock(&info->lock);
|
||||||
info->alloced++;
|
info->alloced++;
|
||||||
|
inode->i_blocks += BLOCKS_PER_PAGE;
|
||||||
|
shmem_recalc_inode(inode);
|
||||||
spin_unlock(&info->lock);
|
spin_unlock(&info->lock);
|
||||||
|
|
||||||
clear_highpage(page);
|
clear_highpage(page);
|
||||||
flush_dcache_page(page);
|
flush_dcache_page(page);
|
||||||
SetPageUptodate(page);
|
SetPageUptodate(page);
|
||||||
if (sgp == SGP_DIRTY)
|
if (sgp == SGP_DIRTY)
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
|
|
||||||
} else {
|
|
||||||
spin_unlock(&info->lock);
|
|
||||||
error = -ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
*pagep = page;
|
/* Perhaps the file has been truncated since we checked */
|
||||||
error = 0;
|
if (sgp != SGP_WRITE &&
|
||||||
out:
|
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
|
||||||
if (prealloc_page) {
|
error = -EINVAL;
|
||||||
mem_cgroup_uncharge_cache_page(prealloc_page);
|
goto trunc;
|
||||||
page_cache_release(prealloc_page);
|
|
||||||
}
|
}
|
||||||
return error;
|
*pagep = page;
|
||||||
|
return 0;
|
||||||
|
|
||||||
nospace:
|
|
||||||
/*
|
/*
|
||||||
* Perhaps the page was brought in from swap between find_lock_page
|
* Error recovery.
|
||||||
* and taking info->lock? We allow for that at add_to_page_cache_lru,
|
|
||||||
* but must also avoid reporting a spurious ENOSPC while working on a
|
|
||||||
* full tmpfs.
|
|
||||||
*/
|
*/
|
||||||
page = find_get_page(mapping, index);
|
trunc:
|
||||||
|
ClearPageDirty(page);
|
||||||
|
delete_from_page_cache(page);
|
||||||
|
spin_lock(&info->lock);
|
||||||
|
info->alloced--;
|
||||||
|
inode->i_blocks -= BLOCKS_PER_PAGE;
|
||||||
spin_unlock(&info->lock);
|
spin_unlock(&info->lock);
|
||||||
|
decused:
|
||||||
|
if (sbinfo->max_blocks)
|
||||||
|
percpu_counter_add(&sbinfo->used_blocks, -1);
|
||||||
|
unacct:
|
||||||
|
shmem_unacct_blocks(info->flags, 1);
|
||||||
|
failed:
|
||||||
|
if (swap.val && error != -EINVAL) {
|
||||||
|
struct page *test = find_get_page(mapping, index);
|
||||||
|
if (test && !radix_tree_exceptional_entry(test))
|
||||||
|
page_cache_release(test);
|
||||||
|
/* Have another try if the entry has changed */
|
||||||
|
if (test != swp_to_radix_entry(swap))
|
||||||
|
error = -EEXIST;
|
||||||
|
}
|
||||||
if (page) {
|
if (page) {
|
||||||
|
unlock_page(page);
|
||||||
page_cache_release(page);
|
page_cache_release(page);
|
||||||
|
}
|
||||||
|
if (error == -ENOSPC && !once++) {
|
||||||
|
info = SHMEM_I(inode);
|
||||||
|
spin_lock(&info->lock);
|
||||||
|
shmem_recalc_inode(inode);
|
||||||
|
spin_unlock(&info->lock);
|
||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
error = -ENOSPC;
|
if (error == -EEXIST)
|
||||||
goto out;
|
goto repeat;
|
||||||
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||||
|
@ -1095,9 +1064,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||||
int error;
|
int error;
|
||||||
int ret = VM_FAULT_LOCKED;
|
int ret = VM_FAULT_LOCKED;
|
||||||
|
|
||||||
if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
|
|
||||||
return VM_FAULT_SIGBUS;
|
|
||||||
|
|
||||||
error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
|
error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
|
||||||
if (error)
|
if (error)
|
||||||
return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
|
return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
|
||||||
|
@ -2164,8 +2130,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
|
||||||
if (config.max_inodes < inodes)
|
if (config.max_inodes < inodes)
|
||||||
goto out;
|
goto out;
|
||||||
/*
|
/*
|
||||||
* Those tests also disallow limited->unlimited while any are in
|
* Those tests disallow limited->unlimited while any are in use;
|
||||||
* use, so i_blocks will always be zero when max_blocks is zero;
|
|
||||||
* but we must separately disallow unlimited->limited, because
|
* but we must separately disallow unlimited->limited, because
|
||||||
* in that case we have no record of how much is already in use.
|
* in that case we have no record of how much is already in use.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue