diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a367ed5bb3fe..0fa491326c4a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -650,6 +650,7 @@ typedef struct pglist_data { #include +extern struct mutex zonelists_mutex; void get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free); void build_all_zonelists(void *data); diff --git a/kernel/cpu.c b/kernel/cpu.c index 3e8b3ba27175..124ad9d6be16 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -357,8 +357,11 @@ int __cpuinit cpu_up(unsigned int cpu) return -ENOMEM; } - if (pgdat->node_zonelists->_zonerefs->zone == NULL) + if (pgdat->node_zonelists->_zonerefs->zone == NULL) { + mutex_lock(&zonelists_mutex); build_all_zonelists(NULL); + mutex_unlock(&zonelists_mutex); + } #endif cpu_maps_update_begin(); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 089cc97aed3c..a4cfcdc00455 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -389,11 +389,6 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) int nid; int ret; struct memory_notify arg; - /* - * mutex to protect zone->pageset when it's still shared - * in onlined_pages() - */ - static DEFINE_MUTEX(zone_pageset_mutex); arg.start_pfn = pfn; arg.nr_pages = nr_pages; @@ -420,14 +415,14 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) * This means the page allocator ignores this zone. * So, zonelist must be updated after online. */ - mutex_lock(&zone_pageset_mutex); + mutex_lock(&zonelists_mutex); if (!populated_zone(zone)) need_zonelists_rebuild = 1; ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages, online_pages_range); if (ret) { - mutex_unlock(&zone_pageset_mutex); + mutex_unlock(&zonelists_mutex); printk(KERN_DEBUG "online_pages %lx at %lx failed\n", nr_pages, pfn); memory_notify(MEM_CANCEL_ONLINE, &arg); @@ -441,7 +436,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) else zone_pcp_update(zone); - mutex_unlock(&zone_pageset_mutex); + mutex_unlock(&zonelists_mutex); setup_per_zone_wmarks(); calculate_zone_inactive_ratio(zone); if (onlined_pages) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 21c52d2d8624..08b349931ebc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2571,8 +2571,11 @@ int numa_zonelist_order_handler(ctl_table *table, int write, strncpy((char*)table->data, saved_string, NUMA_ZONELIST_ORDER_LEN); user_zonelist_order = oldval; - } else if (oldval != user_zonelist_order) + } else if (oldval != user_zonelist_order) { + mutex_lock(&zonelists_mutex); build_all_zonelists(NULL); + mutex_unlock(&zonelists_mutex); + } } out: mutex_unlock(&zl_order_mutex); @@ -2924,6 +2927,12 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch); static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); static void setup_zone_pageset(struct zone *zone); +/* + * Global mutex to protect against size modification of zonelists + * as well as to serialize pageset setup for the new populated zone. + */ +DEFINE_MUTEX(zonelists_mutex); + /* return values int ....just for stop_machine() */ static __init_refok int __build_all_zonelists(void *data) { @@ -2967,6 +2976,10 @@ static __init_refok int __build_all_zonelists(void *data) return 0; } +/* + * Called with zonelists_mutex held always + * unless system_state == SYSTEM_BOOTING. + */ void build_all_zonelists(void *data) { set_zonelist_order();