From 072c56c13e1302fcdc39961dc64e76485731ad67 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 7 Feb 2008 00:14:39 -0800 Subject: [PATCH] per-zone and reclaim enhancements for memory controller: per-zone-lock for cgroup Now, lru is per-zone. Then, lru_lock can be (should be) per-zone, too. This patch implementes per-zone lru lock. lru_lock is placed into mem_cgroup_per_zone struct. lock can be accessed by mz = mem_cgroup_zoneinfo(mem_cgroup, node, zone); &mz->lru_lock or mz = page_cgroup_zoneinfo(page_cgroup); &mz->lru_lock Signed-off-by: KAMEZAWA hiroyuki Cc: "Eric W. Biederman" Cc: Balbir Singh Cc: David Rientjes Cc: Herbert Poetzl Cc: Kirill Korotaev Cc: Nick Piggin Cc: Paul Menage Cc: Pavel Emelianov Cc: Peter Zijlstra Cc: Vaidyanathan Srinivasan Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 71 ++++++++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f728d67a3267..315dee180129 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -89,6 +89,10 @@ enum mem_cgroup_zstat_index { }; struct mem_cgroup_per_zone { + /* + * spin_lock to protect the per cgroup LRU + */ + spinlock_t lru_lock; struct list_head active_list; struct list_head inactive_list; unsigned long count[NR_MEM_CGROUP_ZSTAT]; @@ -126,10 +130,7 @@ struct mem_cgroup { * per zone LRU lists. */ struct mem_cgroup_lru_info info; - /* - * spin_lock to protect the per cgroup LRU - */ - spinlock_t lru_lock; + unsigned long control_type; /* control RSS or RSS+Pagecache */ int prev_priority; /* for recording reclaim priority */ /* @@ -409,15 +410,16 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) */ void mem_cgroup_move_lists(struct page_cgroup *pc, bool active) { - struct mem_cgroup *mem; + struct mem_cgroup_per_zone *mz; + unsigned long flags; + if (!pc) return; - mem = pc->mem_cgroup; - - spin_lock(&mem->lru_lock); + mz = page_cgroup_zoneinfo(pc); + spin_lock_irqsave(&mz->lru_lock, flags); __mem_cgroup_move_lists(pc, active); - spin_unlock(&mem->lru_lock); + spin_unlock_irqrestore(&mz->lru_lock, flags); } /* @@ -527,7 +529,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, src = &mz->inactive_list; - spin_lock(&mem_cont->lru_lock); + spin_lock(&mz->lru_lock); scan = 0; list_for_each_entry_safe_reverse(pc, tmp, src, lru) { if (scan >= nr_to_scan) @@ -557,7 +559,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, } list_splice(&pc_list, src); - spin_unlock(&mem_cont->lru_lock); + spin_unlock(&mz->lru_lock); *scanned = scan; return nr_taken; @@ -576,6 +578,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, struct page_cgroup *pc; unsigned long flags; unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; + struct mem_cgroup_per_zone *mz; /* * Should page_cgroup's go to their own slab? @@ -677,10 +680,11 @@ retry: goto retry; } - spin_lock_irqsave(&mem->lru_lock, flags); + mz = page_cgroup_zoneinfo(pc); + spin_lock_irqsave(&mz->lru_lock, flags); /* Update statistics vector */ __mem_cgroup_add_list(pc); - spin_unlock_irqrestore(&mem->lru_lock, flags); + spin_unlock_irqrestore(&mz->lru_lock, flags); done: return 0; @@ -727,6 +731,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, void mem_cgroup_uncharge(struct page_cgroup *pc) { struct mem_cgroup *mem; + struct mem_cgroup_per_zone *mz; struct page *page; unsigned long flags; @@ -739,6 +744,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) if (atomic_dec_and_test(&pc->ref_cnt)) { page = pc->page; + mz = page_cgroup_zoneinfo(pc); /* * get page->cgroup and clear it under lock. * force_empty can drop page->cgroup without checking refcnt. @@ -747,9 +753,9 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) mem = pc->mem_cgroup; css_put(&mem->css); res_counter_uncharge(&mem->res, PAGE_SIZE); - spin_lock_irqsave(&mem->lru_lock, flags); + spin_lock_irqsave(&mz->lru_lock, flags); __mem_cgroup_remove_list(pc); - spin_unlock_irqrestore(&mem->lru_lock, flags); + spin_unlock_irqrestore(&mz->lru_lock, flags); kfree(pc); } } @@ -788,24 +794,29 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) struct page_cgroup *pc; struct mem_cgroup *mem; unsigned long flags; + struct mem_cgroup_per_zone *mz; retry: pc = page_get_page_cgroup(page); if (!pc) return; mem = pc->mem_cgroup; + mz = page_cgroup_zoneinfo(pc); if (clear_page_cgroup(page, pc) != pc) goto retry; - - spin_lock_irqsave(&mem->lru_lock, flags); + spin_lock_irqsave(&mz->lru_lock, flags); __mem_cgroup_remove_list(pc); + spin_unlock_irqrestore(&mz->lru_lock, flags); + pc->page = newpage; lock_page_cgroup(newpage); page_assign_page_cgroup(newpage, pc); unlock_page_cgroup(newpage); - __mem_cgroup_add_list(pc); - spin_unlock_irqrestore(&mem->lru_lock, flags); + mz = page_cgroup_zoneinfo(pc); + spin_lock_irqsave(&mz->lru_lock, flags); + __mem_cgroup_add_list(pc); + spin_unlock_irqrestore(&mz->lru_lock, flags); return; } @@ -816,18 +827,26 @@ retry: */ #define FORCE_UNCHARGE_BATCH (128) static void -mem_cgroup_force_empty_list(struct mem_cgroup *mem, struct list_head *list) +mem_cgroup_force_empty_list(struct mem_cgroup *mem, + struct mem_cgroup_per_zone *mz, + int active) { struct page_cgroup *pc; struct page *page; int count; unsigned long flags; + struct list_head *list; + + if (active) + list = &mz->active_list; + else + list = &mz->inactive_list; if (list_empty(list)) return; retry: count = FORCE_UNCHARGE_BATCH; - spin_lock_irqsave(&mem->lru_lock, flags); + spin_lock_irqsave(&mz->lru_lock, flags); while (--count && !list_empty(list)) { pc = list_entry(list->prev, struct page_cgroup, lru); @@ -842,7 +861,7 @@ retry: } else /* being uncharged ? ...do relax */ break; } - spin_unlock_irqrestore(&mem->lru_lock, flags); + spin_unlock_irqrestore(&mz->lru_lock, flags); if (!list_empty(list)) { cond_resched(); goto retry; @@ -873,11 +892,9 @@ int mem_cgroup_force_empty(struct mem_cgroup *mem) struct mem_cgroup_per_zone *mz; mz = mem_cgroup_zoneinfo(mem, node, zid); /* drop all page_cgroup in active_list */ - mem_cgroup_force_empty_list(mem, - &mz->active_list); + mem_cgroup_force_empty_list(mem, mz, 1); /* drop all page_cgroup in inactive_list */ - mem_cgroup_force_empty_list(mem, - &mz->inactive_list); + mem_cgroup_force_empty_list(mem, mz, 0); } } ret = 0; @@ -1114,6 +1131,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) mz = &pn->zoneinfo[zone]; INIT_LIST_HEAD(&mz->active_list); INIT_LIST_HEAD(&mz->inactive_list); + spin_lock_init(&mz->lru_lock); } return 0; } @@ -1143,7 +1161,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) res_counter_init(&mem->res); - spin_lock_init(&mem->lru_lock); mem->control_type = MEM_CGROUP_TYPE_ALL; memset(&mem->info, 0, sizeof(mem->info));