Merge branch 'akpm' (fixes from Andrew Morton)

Merge misc fixes from Andrew Morton:
 "15 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  MAINTAINERS: add IIO include files
  kernel/panic.c: update comments for print_tainted
  mem-hotplug: reset node present pages when hot-adding a new pgdat
  mem-hotplug: reset node managed pages when hot-adding a new pgdat
  mm/debug-pagealloc: correct freepage accounting and order resetting
  fanotify: fix notification of groups with inode & mount marks
  mm, compaction: prevent infinite loop in compact_zone
  mm: alloc_contig_range: demote pages busy message from warn to info
  mm/slab: fix unalignment problem on Malta with EVA due to slab merge
  mm/page_alloc: restrict max order of merging on isolated pageblock
  mm/page_alloc: move freepage counting logic to __free_one_page()
  mm/page_alloc: add freepage on isolate pageblock to correct buddy list
  mm/page_alloc: fix incorrect isolation behavior by rechecking migratetype
  mm/compaction: skip the range until proper target pageblock is met
  zram: avoid kunmap_atomic() of a NULL pointer
This commit is contained in:
Linus Torvalds 2014-11-13 16:57:25 -08:00
commit 971ad4e4d6
19 changed files with 240 additions and 76 deletions

View File

@ -4716,6 +4716,7 @@ L: linux-iio@vger.kernel.org
S: Maintained S: Maintained
F: drivers/iio/ F: drivers/iio/
F: drivers/staging/iio/ F: drivers/staging/iio/
F: include/linux/iio/
IKANOS/ADI EAGLE ADSL USB DRIVER IKANOS/ADI EAGLE ADSL USB DRIVER
M: Matthieu Castet <castet.matthieu@free.fr> M: Matthieu Castet <castet.matthieu@free.fr>

View File

@ -560,7 +560,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
} }
if (page_zero_filled(uncmem)) { if (page_zero_filled(uncmem)) {
kunmap_atomic(user_mem); if (user_mem)
kunmap_atomic(user_mem);
/* Free memory associated with this sector now. */ /* Free memory associated with this sector now. */
bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index); zram_free_page(zram, index);

View File

@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
&fsnotify_mark_srcu); &fsnotify_mark_srcu);
} }
/*
* We need to merge inode & vfsmount mark lists so that inode mark
* ignore masks are properly reflected for mount mark notifications.
* That's why this traversal is so complicated...
*/
while (inode_node || vfsmount_node) { while (inode_node || vfsmount_node) {
inode_group = vfsmount_group = NULL; inode_group = NULL;
inode_mark = NULL;
vfsmount_group = NULL;
vfsmount_mark = NULL;
if (inode_node) { if (inode_node) {
inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
vfsmount_group = vfsmount_mark->group; vfsmount_group = vfsmount_mark->group;
} }
if (inode_group > vfsmount_group) { if (inode_group && vfsmount_group) {
/* handle inode */ int cmp = fsnotify_compare_groups(inode_group,
ret = send_to_group(to_tell, inode_mark, NULL, mask, vfsmount_group);
data, data_is, cookie, file_name); if (cmp > 0) {
/* we didn't use the vfsmount_mark */ inode_group = NULL;
vfsmount_group = NULL; inode_mark = NULL;
} else if (vfsmount_group > inode_group) { } else if (cmp < 0) {
ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, vfsmount_group = NULL;
data, data_is, cookie, file_name); vfsmount_mark = NULL;
inode_group = NULL; }
} else {
ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
mask, data, data_is, cookie,
file_name);
} }
ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
data, data_is, cookie, file_name);
if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
goto out; goto out;

View File

@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group);
/* protects reads of inode and vfsmount marks list */ /* protects reads of inode and vfsmount marks list */
extern struct srcu_struct fsnotify_mark_srcu; extern struct srcu_struct fsnotify_mark_srcu;
/* compare two groups for sorting of marks lists */
extern int fsnotify_compare_groups(struct fsnotify_group *a,
struct fsnotify_group *b);
extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
__u32 mask); __u32 mask);
/* add a mark to an inode */ /* add a mark to an inode */

View File

@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
{ {
struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark *lmark, *last = NULL;
int ret = 0; int ret = 0;
int cmp;
mark->flags |= FSNOTIFY_MARK_FLAG_INODE; mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
goto out; goto out;
} }
if (mark->group->priority < lmark->group->priority) cmp = fsnotify_compare_groups(lmark->group, mark->group);
continue; if (cmp < 0)
if ((mark->group->priority == lmark->group->priority) &&
(mark->group < lmark->group))
continue; continue;
hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);

View File

@ -209,6 +209,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas
mark->ignored_mask = mask; mark->ignored_mask = mask;
} }
/*
* Sorting function for lists of fsnotify marks.
*
* Fanotify supports different notification classes (reflected as priority of
* notification group). Events shall be passed to notification groups in
* decreasing priority order. To achieve this marks in notification lists for
* inodes and vfsmounts are sorted so that priorities of corresponding groups
* are descending.
*
* Furthermore correct handling of the ignore mask requires processing inode
* and vfsmount marks of each group together. Using the group address as
* further sort criterion provides a unique sorting order and thus we can
* merge inode and vfsmount lists of marks in linear time and find groups
* present in both lists.
*
* A return value of 1 signifies that b has priority over a.
* A return value of 0 signifies that the two marks have to be handled together.
* A return value of -1 signifies that a has priority over b.
*/
int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
{
if (a == b)
return 0;
if (!a)
return 1;
if (!b)
return -1;
if (a->priority < b->priority)
return 1;
if (a->priority > b->priority)
return -1;
if (a < b)
return 1;
return -1;
}
/* /*
* Attach an initialized mark to a given group and fs object. * Attach an initialized mark to a given group and fs object.
* These marks may be used for the fsnotify backend to determine which * These marks may be used for the fsnotify backend to determine which

View File

@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
struct mount *m = real_mount(mnt); struct mount *m = real_mount(mnt);
struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark *lmark, *last = NULL;
int ret = 0; int ret = 0;
int cmp;
mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
goto out; goto out;
} }
if (mark->group->priority < lmark->group->priority) cmp = fsnotify_compare_groups(lmark->group, mark->group);
continue; if (cmp < 0)
if ((mark->group->priority == lmark->group->priority) &&
(mark->group < lmark->group))
continue; continue;
hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);

View File

@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
extern unsigned long free_all_bootmem(void); extern unsigned long free_all_bootmem(void);
extern void reset_node_managed_pages(pg_data_t *pgdat);
extern void reset_all_zones_managed_pages(void); extern void reset_all_zones_managed_pages(void);
extern void free_bootmem_node(pg_data_t *pgdat, extern void free_bootmem_node(pg_data_t *pgdat,

View File

@ -431,6 +431,15 @@ struct zone {
*/ */
int nr_migrate_reserve_block; int nr_migrate_reserve_block;
#ifdef CONFIG_MEMORY_ISOLATION
/*
* Number of isolated pageblock. It is used to solve incorrect
* freepage counting problem due to racy retrieving migratetype
* of pageblock. Protected by zone->lock.
*/
unsigned long nr_isolate_pageblock;
#endif
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG
/* see spanned/present_pages for more description */ /* see spanned/present_pages for more description */
seqlock_t span_seqlock; seqlock_t span_seqlock;

View File

@ -2,6 +2,10 @@
#define __LINUX_PAGEISOLATION_H #define __LINUX_PAGEISOLATION_H
#ifdef CONFIG_MEMORY_ISOLATION #ifdef CONFIG_MEMORY_ISOLATION
static inline bool has_isolate_pageblock(struct zone *zone)
{
return zone->nr_isolate_pageblock;
}
static inline bool is_migrate_isolate_page(struct page *page) static inline bool is_migrate_isolate_page(struct page *page)
{ {
return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype)
return migratetype == MIGRATE_ISOLATE; return migratetype == MIGRATE_ISOLATE;
} }
#else #else
static inline bool has_isolate_pageblock(struct zone *zone)
{
return false;
}
static inline bool is_migrate_isolate_page(struct page *page) static inline bool is_migrate_isolate_page(struct page *page)
{ {
return false; return false;

View File

@ -244,6 +244,7 @@ static const struct tnt tnts[] = {
* 'I' - Working around severe firmware bug. * 'I' - Working around severe firmware bug.
* 'O' - Out-of-tree module has been loaded. * 'O' - Out-of-tree module has been loaded.
* 'E' - Unsigned module has been loaded. * 'E' - Unsigned module has been loaded.
* 'L' - A soft lockup has previously occurred.
* *
* The string is overwritten by the next call to print_tainted(). * The string is overwritten by the next call to print_tainted().
*/ */

View File

@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
static int reset_managed_pages_done __initdata; static int reset_managed_pages_done __initdata;
static inline void __init reset_node_managed_pages(pg_data_t *pgdat) void reset_node_managed_pages(pg_data_t *pgdat)
{ {
struct zone *z; struct zone *z;
if (reset_managed_pages_done)
return;
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
z->managed_pages = 0; z->managed_pages = 0;
} }
@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void)
{ {
struct pglist_data *pgdat; struct pglist_data *pgdat;
if (reset_managed_pages_done)
return;
for_each_online_pgdat(pgdat) for_each_online_pgdat(pgdat)
reset_node_managed_pages(pgdat); reset_node_managed_pages(pgdat);
reset_managed_pages_done = 1; reset_managed_pages_done = 1;
} }

View File

@ -479,6 +479,16 @@ isolate_freepages_range(struct compact_control *cc,
block_end_pfn = min(block_end_pfn, end_pfn); block_end_pfn = min(block_end_pfn, end_pfn);
/*
* pfn could pass the block_end_pfn if isolated freepage
* is more than pageblock order. In this case, we adjust
* scanning range to right one.
*/
if (pfn >= block_end_pfn) {
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
block_end_pfn = min(block_end_pfn, end_pfn);
}
if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
break; break;
@ -1029,8 +1039,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
} }
acct_isolated(zone, cc); acct_isolated(zone, cc);
/* Record where migration scanner will be restarted */ /*
cc->migrate_pfn = low_pfn; * Record where migration scanner will be restarted. If we end up in
* the same pageblock as the free scanner, make the scanners fully
* meet so that compact_finished() terminates compaction.
*/
cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn;
return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
} }

View File

@ -108,6 +108,31 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
/* /*
* in mm/page_alloc.c * in mm/page_alloc.c
*/ */
/*
* Locate the struct page for both the matching buddy in our
* pair (buddy1) and the combined O(n+1) page they form (page).
*
* 1) Any buddy B1 will have an order O twin B2 which satisfies
* the following equation:
* B2 = B1 ^ (1 << O)
* For example, if the starting buddy (buddy2) is #8 its order
* 1 buddy is #10:
* B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
*
* 2) Any buddy B will have an order O+1 parent P which
* satisfies the following equation:
* P = B & ~(1 << O)
*
* Assumption: *_mem_map is contiguous at least up to MAX_ORDER
*/
static inline unsigned long
__find_buddy_index(unsigned long page_idx, unsigned int order)
{
return page_idx ^ (1 << order);
}
extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __free_pages_bootmem(struct page *page, unsigned int order); extern void __free_pages_bootmem(struct page *page, unsigned int order);
extern void prep_compound_page(struct page *page, unsigned long order); extern void prep_compound_page(struct page *page, unsigned long order);
#ifdef CONFIG_MEMORY_FAILURE #ifdef CONFIG_MEMORY_FAILURE

View File

@ -31,6 +31,7 @@
#include <linux/stop_machine.h> #include <linux/stop_machine.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/bootmem.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
@ -1066,6 +1067,16 @@ out:
} }
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
static void reset_node_present_pages(pg_data_t *pgdat)
{
struct zone *z;
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
z->present_pages = 0;
pgdat->node_present_pages = 0;
}
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
{ {
@ -1096,6 +1107,21 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
build_all_zonelists(pgdat, NULL); build_all_zonelists(pgdat, NULL);
mutex_unlock(&zonelists_mutex); mutex_unlock(&zonelists_mutex);
/*
* zone->managed_pages is set to an approximate value in
* free_area_init_core(), which will cause
* /sys/device/system/node/nodeX/meminfo has wrong data.
* So reset it to 0 before any memory is onlined.
*/
reset_node_managed_pages(pgdat);
/*
* When memory is hot-added, all the memory is in offline state. So
* clear all zones' present_pages because they will be updated in
* online_pages() and offline_pages().
*/
reset_node_present_pages(pgdat);
return pgdat; return pgdat;
} }

View File

@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void)
static int reset_managed_pages_done __initdata; static int reset_managed_pages_done __initdata;
static inline void __init reset_node_managed_pages(pg_data_t *pgdat) void reset_node_managed_pages(pg_data_t *pgdat)
{ {
struct zone *z; struct zone *z;
if (reset_managed_pages_done)
return;
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
z->managed_pages = 0; z->managed_pages = 0;
} }
@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void)
{ {
struct pglist_data *pgdat; struct pglist_data *pgdat;
if (reset_managed_pages_done)
return;
for_each_online_pgdat(pgdat) for_each_online_pgdat(pgdat)
reset_node_managed_pages(pgdat); reset_node_managed_pages(pgdat);
reset_managed_pages_done = 1; reset_managed_pages_done = 1;
} }

View File

@ -466,29 +466,6 @@ static inline void rmv_page_order(struct page *page)
set_page_private(page, 0); set_page_private(page, 0);
} }
/*
* Locate the struct page for both the matching buddy in our
* pair (buddy1) and the combined O(n+1) page they form (page).
*
* 1) Any buddy B1 will have an order O twin B2 which satisfies
* the following equation:
* B2 = B1 ^ (1 << O)
* For example, if the starting buddy (buddy2) is #8 its order
* 1 buddy is #10:
* B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
*
* 2) Any buddy B will have an order O+1 parent P which
* satisfies the following equation:
* P = B & ~(1 << O)
*
* Assumption: *_mem_map is contiguous at least up to MAX_ORDER
*/
static inline unsigned long
__find_buddy_index(unsigned long page_idx, unsigned int order)
{
return page_idx ^ (1 << order);
}
/* /*
* This function checks whether a page is free && is the buddy * This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if * we can do coalesce a page and its buddy if
@ -569,6 +546,7 @@ static inline void __free_one_page(struct page *page,
unsigned long combined_idx; unsigned long combined_idx;
unsigned long uninitialized_var(buddy_idx); unsigned long uninitialized_var(buddy_idx);
struct page *buddy; struct page *buddy;
int max_order = MAX_ORDER;
VM_BUG_ON(!zone_is_initialized(zone)); VM_BUG_ON(!zone_is_initialized(zone));
@ -577,13 +555,24 @@ static inline void __free_one_page(struct page *page,
return; return;
VM_BUG_ON(migratetype == -1); VM_BUG_ON(migratetype == -1);
if (is_migrate_isolate(migratetype)) {
/*
* We restrict max order of merging to prevent merge
* between freepages on isolate pageblock and normal
* pageblock. Without this, pageblock isolation
* could cause incorrect freepage accounting.
*/
max_order = min(MAX_ORDER, pageblock_order + 1);
} else {
__mod_zone_freepage_state(zone, 1 << order, migratetype);
}
page_idx = pfn & ((1 << MAX_ORDER) - 1); page_idx = pfn & ((1 << max_order) - 1);
VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
VM_BUG_ON_PAGE(bad_range(zone, page), page); VM_BUG_ON_PAGE(bad_range(zone, page), page);
while (order < MAX_ORDER-1) { while (order < max_order - 1) {
buddy_idx = __find_buddy_index(page_idx, order); buddy_idx = __find_buddy_index(page_idx, order);
buddy = page + (buddy_idx - page_idx); buddy = page + (buddy_idx - page_idx);
if (!page_is_buddy(page, buddy, order)) if (!page_is_buddy(page, buddy, order))
@ -594,9 +583,11 @@ static inline void __free_one_page(struct page *page,
*/ */
if (page_is_guard(buddy)) { if (page_is_guard(buddy)) {
clear_page_guard_flag(buddy); clear_page_guard_flag(buddy);
set_page_private(page, 0); set_page_private(buddy, 0);
__mod_zone_freepage_state(zone, 1 << order, if (!is_migrate_isolate(migratetype)) {
migratetype); __mod_zone_freepage_state(zone, 1 << order,
migratetype);
}
} else { } else {
list_del(&buddy->lru); list_del(&buddy->lru);
zone->free_area[order].nr_free--; zone->free_area[order].nr_free--;
@ -715,14 +706,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
/* must delete as __free_one_page list manipulates */ /* must delete as __free_one_page list manipulates */
list_del(&page->lru); list_del(&page->lru);
mt = get_freepage_migratetype(page); mt = get_freepage_migratetype(page);
if (unlikely(has_isolate_pageblock(zone)))
mt = get_pageblock_migratetype(page);
/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
__free_one_page(page, page_to_pfn(page), zone, 0, mt); __free_one_page(page, page_to_pfn(page), zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt);
if (likely(!is_migrate_isolate_page(page))) {
__mod_zone_page_state(zone, NR_FREE_PAGES, 1);
if (is_migrate_cma(mt))
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
}
} while (--to_free && --batch_free && !list_empty(list)); } while (--to_free && --batch_free && !list_empty(list));
} }
spin_unlock(&zone->lock); spin_unlock(&zone->lock);
@ -739,9 +728,11 @@ static void free_one_page(struct zone *zone,
if (nr_scanned) if (nr_scanned)
__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
if (unlikely(has_isolate_pageblock(zone) ||
is_migrate_isolate(migratetype))) {
migratetype = get_pfnblock_migratetype(page, pfn);
}
__free_one_page(page, pfn, zone, order, migratetype); __free_one_page(page, pfn, zone, order, migratetype);
if (unlikely(!is_migrate_isolate(migratetype)))
__mod_zone_freepage_state(zone, 1 << order, migratetype);
spin_unlock(&zone->lock); spin_unlock(&zone->lock);
} }
@ -1484,7 +1475,7 @@ void split_page(struct page *page, unsigned int order)
} }
EXPORT_SYMBOL_GPL(split_page); EXPORT_SYMBOL_GPL(split_page);
static int __isolate_free_page(struct page *page, unsigned int order) int __isolate_free_page(struct page *page, unsigned int order)
{ {
unsigned long watermark; unsigned long watermark;
struct zone *zone; struct zone *zone;
@ -6408,13 +6399,12 @@ int alloc_contig_range(unsigned long start, unsigned long end,
/* Make sure the range is really isolated. */ /* Make sure the range is really isolated. */
if (test_pages_isolated(outer_start, end, false)) { if (test_pages_isolated(outer_start, end, false)) {
pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", pr_info("%s: [%lx, %lx) PFNs busy\n",
outer_start, end); __func__, outer_start, end);
ret = -EBUSY; ret = -EBUSY;
goto done; goto done;
} }
/* Grab isolated pages from freelists. */ /* Grab isolated pages from freelists. */
outer_end = isolate_freepages_range(&cc, outer_start, end); outer_end = isolate_freepages_range(&cc, outer_start, end);
if (!outer_end) { if (!outer_end) {

View File

@ -60,6 +60,7 @@ out:
int migratetype = get_pageblock_migratetype(page); int migratetype = get_pageblock_migratetype(page);
set_pageblock_migratetype(page, MIGRATE_ISOLATE); set_pageblock_migratetype(page, MIGRATE_ISOLATE);
zone->nr_isolate_pageblock++;
nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
__mod_zone_freepage_state(zone, -nr_pages, migratetype); __mod_zone_freepage_state(zone, -nr_pages, migratetype);
@ -75,16 +76,54 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
{ {
struct zone *zone; struct zone *zone;
unsigned long flags, nr_pages; unsigned long flags, nr_pages;
struct page *isolated_page = NULL;
unsigned int order;
unsigned long page_idx, buddy_idx;
struct page *buddy;
zone = page_zone(page); zone = page_zone(page);
spin_lock_irqsave(&zone->lock, flags); spin_lock_irqsave(&zone->lock, flags);
if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
goto out; goto out;
nr_pages = move_freepages_block(zone, page, migratetype);
__mod_zone_freepage_state(zone, nr_pages, migratetype); /*
* Because freepage with more than pageblock_order on isolated
* pageblock is restricted to merge due to freepage counting problem,
* it is possible that there is free buddy page.
* move_freepages_block() doesn't care of merge so we need other
* approach in order to merge them. Isolation and free will make
* these pages to be merged.
*/
if (PageBuddy(page)) {
order = page_order(page);
if (order >= pageblock_order) {
page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
buddy_idx = __find_buddy_index(page_idx, order);
buddy = page + (buddy_idx - page_idx);
if (!is_migrate_isolate_page(buddy)) {
__isolate_free_page(page, order);
set_page_refcounted(page);
isolated_page = page;
}
}
}
/*
* If we isolate freepage with more than pageblock_order, there
* should be no freepage in the range, so we could avoid costly
* pageblock scanning for freepage moving.
*/
if (!isolated_page) {
nr_pages = move_freepages_block(zone, page, migratetype);
__mod_zone_freepage_state(zone, nr_pages, migratetype);
}
set_pageblock_migratetype(page, migratetype); set_pageblock_migratetype(page, migratetype);
zone->nr_isolate_pageblock--;
out: out:
spin_unlock_irqrestore(&zone->lock, flags); spin_unlock_irqrestore(&zone->lock, flags);
if (isolated_page)
__free_pages(isolated_page, order);
} }
static inline struct page * static inline struct page *

View File

@ -259,6 +259,10 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
if (s->size - size >= sizeof(void *)) if (s->size - size >= sizeof(void *))
continue; continue;
if (IS_ENABLED(CONFIG_SLAB) && align &&
(align > s->align || s->align % align))
continue;
return s; return s;
} }
return NULL; return NULL;