Apply memory policies to top two highest zones when highest zone is ZONE_MOVABLE

The NUMA layer only supports NUMA policies for the highest zone.  When
ZONE_MOVABLE is configured with kernelcore=, the the highest zone becomes
ZONE_MOVABLE.  The result is that policies are only applied to allocations
like anonymous pages and page cache allocated from ZONE_MOVABLE when the
zone is used.

This patch applies policies to the two highest zones when the highest zone
is ZONE_MOVABLE.  As ZONE_MOVABLE consists of pages from the highest "real"
zone, it's always functionally equivalent.

The patch has been tested on a variety of machines both NUMA and non-NUMA
covering x86, x86_64 and ppc64.  No abnormal results were seen in
kernbench, tbench, dbench or hackbench.  It passes regression tests from
the numactl package with and without kernelcore= once numactl tests are
patched to wait for vmstat counters to update.

akpm: this is the nasty hack to fix NUMA mempolicies in the presence of
ZONE_MOVABLE and kernelcore= in 2.6.23.  Christoph says "For .24 either merge
the mobility or get the other solution that Mel is working on.  That solution
would only use a single zonelist per node and filter on the fly.  That may
help performance and also help to make memory policies work better."

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by:  Lee Schermerhorn <lee.schermerhorn@hp.com>
Tested-by:  Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Mel Gorman 2007-08-22 14:02:05 -07:00 committed by Linus Torvalds
parent 8e92f21ba3
commit b377fd3982
4 changed files with 33 additions and 2 deletions

View File

@ -166,7 +166,7 @@ extern enum zone_type policy_zone;
static inline void check_highest_zone(enum zone_type k) static inline void check_highest_zone(enum zone_type k)
{ {
if (k > policy_zone) if (k > policy_zone && k != ZONE_MOVABLE)
policy_zone = k; policy_zone = k;
} }

View File

@ -410,6 +410,24 @@ struct zonelist {
#endif #endif
}; };
#ifdef CONFIG_NUMA
/*
* Only custom zonelists like MPOL_BIND need to be filtered as part of
* policies. As described in the comment for struct zonelist_cache, these
* zonelists will not have a zlcache so zlcache_ptr will not be set. Use
* that to determine if the zonelists needs to be filtered or not.
*/
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
{
return !zonelist->zlcache_ptr;
}
#else
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
{
return 0;
}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
struct node_active_region { struct node_active_region {
unsigned long start_pfn; unsigned long start_pfn;

View File

@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
lower zones etc. Avoid empty zones because the memory allocator lower zones etc. Avoid empty zones because the memory allocator
doesn't like them. If you implement node hot removal you doesn't like them. If you implement node hot removal you
have to fix that. */ have to fix that. */
k = policy_zone; k = MAX_NR_ZONES - 1;
while (1) { while (1) {
for_each_node_mask(nd, *nodes) { for_each_node_mask(nd, *nodes) {
struct zone *z = &NODE_DATA(nd)->node_zones[k]; struct zone *z = &NODE_DATA(nd)->node_zones[k];

View File

@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
int zlc_active = 0; /* set if using zonelist_cache */ int zlc_active = 0; /* set if using zonelist_cache */
int did_zlc_setup = 0; /* just call zlc_setup() one time */ int did_zlc_setup = 0; /* just call zlc_setup() one time */
enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */
zonelist_scan: zonelist_scan:
/* /*
@ -1166,6 +1167,18 @@ zonelist_scan:
z = zonelist->zones; z = zonelist->zones;
do { do {
/*
* In NUMA, this could be a policy zonelist which contains
* zones that may not be allowed by the current gfp_mask.
* Check the zone is allowed by the current flags
*/
if (unlikely(alloc_should_filter_zonelist(zonelist))) {
if (highest_zoneidx == -1)
highest_zoneidx = gfp_zone(gfp_mask);
if (zone_idx(*z) > highest_zoneidx)
continue;
}
if (NUMA_BUILD && zlc_active && if (NUMA_BUILD && zlc_active &&
!zlc_zone_worth_trying(zonelist, z, allowednodes)) !zlc_zone_worth_trying(zonelist, z, allowednodes))
continue; continue;