cpuset: drop async_rebuild_sched_domains()

In general, we want to make cgroup_mutex one of the outermost locks and be able to use get_online_cpus() and friends from cgroup methods. With cpuset hotplug made async, get_online_cpus() can now be nested inside cgroup_mutex. Currently, cpuset avoids nesting get_online_cpus() inside cgroup_mutex by bouncing sched_domain rebuilding to a work item. As such nesting is allowed now, remove the workqueue bouncing code and always rebuild sched_domains synchronously. This also nests sched_domains_mutex inside cgroup_mutex, which is intended and should be okay. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com>
2013-01-07 08:51:07 -08:00 · 2013-01-07 08:51:07 -08:00 · 699140ba83
parent 3a5a6d0c2b
commit 699140ba83
1 changed files with 16 additions and 60 deletions
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@ -60,14 +60,6 @@
 #include <linux/workqueue.h>
 #include <linux/cgroup.h>

-/*
- * Workqueue for cpuset related tasks.
- *
- * Using kevent workqueue may cause deadlock when memory_migrate
- * is set. So we create a separate workqueue thread for cpuset.
- */
-static struct workqueue_struct *cpuset_wq;
-
 /*
 * Tracks how many cpusets are currently defined in system.
 * When there is only one cpuset (the root cpuset) we can
@ -753,25 +745,25 @@ done:
 /*
 * Rebuild scheduler domains.
 *
- * Call with neither cgroup_mutex held nor within get_online_cpus().
- * Takes both cgroup_mutex and get_online_cpus().
+ * If the flag 'sched_load_balance' of any cpuset with non-empty
+ * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
+ * which has that flag enabled, or if any cpuset with a non-empty
+ * 'cpus' is removed, then call this routine to rebuild the
+ * scheduler's dynamic sched domains.
 *
- * Cannot be directly called from cpuset code handling changes
- * to the cpuset pseudo-filesystem, because it cannot be called
- * from code that already holds cgroup_mutex.
+ * Call with cgroup_mutex held.  Takes get_online_cpus().
 */
-static void do_rebuild_sched_domains(struct work_struct *unused)
+static void rebuild_sched_domains_locked(void)
 {
 	struct sched_domain_attr *attr;
 	cpumask_var_t *doms;
 	int ndoms;

+	WARN_ON_ONCE(!cgroup_lock_is_held());
 	get_online_cpus();

 	/* Generate domain masks and attrs */
-	cgroup_lock();
 	ndoms = generate_sched_domains(&doms, &attr);
-	cgroup_unlock();

 	/* Have scheduler rebuild the domains */
 	partition_sched_domains(ndoms, doms, attr);
@ -779,7 +771,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused)
 	put_online_cpus();
 }
 #else /* !CONFIG_SMP */
-static void do_rebuild_sched_domains(struct work_struct *unused)
+static void rebuild_sched_domains_locked(void)
 {
 }

@ -791,44 +783,11 @@ static int generate_sched_domains(cpumask_var_t **domains,
 }
 #endif /* CONFIG_SMP */

-static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
-
-/*
- * Rebuild scheduler domains, asynchronously via workqueue.
- *
- * If the flag 'sched_load_balance' of any cpuset with non-empty
- * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
- * which has that flag enabled, or if any cpuset with a non-empty
- * 'cpus' is removed, then call this routine to rebuild the
- * scheduler's dynamic sched domains.
- *
- * The rebuild_sched_domains() and partition_sched_domains()
- * routines must nest cgroup_lock() inside get_online_cpus(),
- * but such cpuset changes as these must nest that locking the
- * other way, holding cgroup_lock() for much of the code.
- *
- * So in order to avoid an ABBA deadlock, the cpuset code handling
- * these user changes delegates the actual sched domain rebuilding
- * to a separate workqueue thread, which ends up processing the
- * above do_rebuild_sched_domains() function.
- */
-static void async_rebuild_sched_domains(void)
-{
-	queue_work(cpuset_wq, &rebuild_sched_domains_work);
-}
-
-/*
- * Accomplishes the same scheduler domain rebuild as the above
- * async_rebuild_sched_domains(), however it directly calls the
- * rebuild routine synchronously rather than calling it via an
- * asynchronous work thread.
- *
- * This can only be called from code that is not holding
- * cgroup_mutex (not nested in a cgroup_lock() call.)
- */
 void rebuild_sched_domains(void)
 {
-	do_rebuild_sched_domains(NULL);
+	cgroup_lock();
+	rebuild_sched_domains_locked();
+	cgroup_unlock();
 }

 /**
@ -948,7 +907,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 	heap_free(&heap);

 	if (is_load_balanced)
-		async_rebuild_sched_domains();
+		rebuild_sched_domains_locked();
 	return 0;
 }

@ -1196,7 +1155,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
 		cs->relax_domain_level = val;
 		if (!cpumask_empty(cs->cpus_allowed) &&
 		    is_sched_load_balance(cs))
-			async_rebuild_sched_domains();
+			rebuild_sched_domains_locked();
 	}

 	return 0;
@ -1288,7 +1247,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 	mutex_unlock(&callback_mutex);

 	if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
-		async_rebuild_sched_domains();
+		rebuild_sched_domains_locked();

 	if (spread_flag_changed)
 		update_tasks_flags(cs, &heap);
@ -1925,7 +1884,7 @@ static void cpuset_css_offline(struct cgroup *cgrp)
 /*
 * If the cpuset being removed has its flag 'sched_load_balance'
 * enabled, then simulate turning sched_load_balance off, which
- * will call async_rebuild_sched_domains().
+ * will call rebuild_sched_domains_locked().
 */

 static void cpuset_css_free(struct cgroup *cont)
@ -2237,9 +2196,6 @@ void __init cpuset_init_smp(void)
 	top_cpuset.mems_allowed = node_states[N_MEMORY];

 	hotplug_memory_notifier(cpuset_track_online_nodes, 10);
-
-	cpuset_wq = create_singlethread_workqueue("cpuset");
-	BUG_ON(!cpuset_wq);
 }

 /**