From 3e84050c81ffb4961ef43d20e1fb1d7607167d83 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Sun, 13 Jul 2008 02:10:29 +0200 Subject: [PATCH 1/5] cpusets, hotplug, scheduler: fix scheduler domain breakage Commit f18f982ab ("sched: CPU hotplug events must not destroy scheduler domains created by the cpusets") introduced a hotplug-related problem as described below: Upon CPU_DOWN_PREPARE, update_sched_domains() -> detach_destroy_domains(&cpu_online_map) does the following: /* * Force a reinitialization of the sched domains hierarchy. The domains * and groups cannot be updated in place without racing with the balancing * code, so we temporarily attach all running cpus to the NULL domain * which will prevent rebalancing while the sched domains are recalculated. */ The sched-domains should be rebuilt when a CPU_DOWN ops. has been completed, effectively either upon CPU_DEAD{_FROZEN} (upon success) or CPU_DOWN_FAILED{_FROZEN} (upon failure -- restore the things to their initial state). That's what update_sched_domains() also does but only for !CPUSETS case. With f18f982ab, sched-domains' reinitialization is delegated to CPUSETS code: cpuset_handle_cpuhp() -> common_cpu_mem_hotplug_unplug() -> rebuild_sched_domains() Being called for CPU_UP_PREPARE and if its callback is called after update_sched_domains()), it just negates all the work done by update_sched_domains() -- i.e. a soon-to-be-offline cpu is included in the sched-domains and that makes it visible for the load-balancer while the CPU_DOWN ops. is in progress. __migrate_live_tasks() moves the tasks off a 'dead' cpu (it's already "offline" when this function is called). try_to_wake_up() is called for one of these tasks from another CPU -> the load-balancer (wake_idle()) picks up a "dead" CPU and places the task on it. Then e.g. BUG_ON(rq->nr_running) detects this a bit later -> oops. Signed-off-by: Dmitry Adamushko Tested-by: Vegard Nossum Cc: Paul Menage Cc: Max Krasnyansky Cc: Paul Jackson Cc: Peter Zijlstra Cc: miaox@cn.fujitsu.com Cc: rostedt@goodmis.org Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- kernel/cpuset.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9fceb97e989c..798b3ab054eb 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1882,7 +1882,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root) * in order to minimize text size. */ -static void common_cpu_mem_hotplug_unplug(void) +static void common_cpu_mem_hotplug_unplug(int rebuild_sd) { cgroup_lock(); @@ -1894,7 +1894,8 @@ static void common_cpu_mem_hotplug_unplug(void) * Scheduler destroys domains on hotplug events. * Rebuild them based on the current settings. */ - rebuild_sched_domains(); + if (rebuild_sd) + rebuild_sched_domains(); cgroup_unlock(); } @@ -1912,11 +1913,22 @@ static void common_cpu_mem_hotplug_unplug(void) static int cpuset_handle_cpuhp(struct notifier_block *unused_nb, unsigned long phase, void *unused_cpu) { - if (phase == CPU_DYING || phase == CPU_DYING_FROZEN) + switch (phase) { + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + common_cpu_mem_hotplug_unplug(1); + break; + default: return NOTIFY_DONE; + } - common_cpu_mem_hotplug_unplug(); - return 0; + return NOTIFY_OK; } #ifdef CONFIG_MEMORY_HOTPLUG @@ -1929,7 +1941,7 @@ static int cpuset_handle_cpuhp(struct notifier_block *unused_nb, void cpuset_track_online_nodes(void) { - common_cpu_mem_hotplug_unplug(); + common_cpu_mem_hotplug_unplug(0); } #endif From 0302c01b4b793cfbc5c7bf8723f6d14bf9bd7cf4 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 13 Jul 2008 12:13:59 -0700 Subject: [PATCH 2/5] Documentation/HOWTO: correct wrong kernel bugzilla FAQ URL Signed-off-by: Jiri Pirko Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/HOWTO | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 0291ade44c17..619e8caf30db 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -377,7 +377,7 @@ Bug Reporting bugzilla.kernel.org is where the Linux kernel developers track kernel bugs. Users are encouraged to report all bugs that they find in this tool. For details on how to use the kernel bugzilla, please see: - http://test.kernel.org/bugzilla/faq.html + http://bugzilla.kernel.org/page.cgi?id=faq.html The file REPORTING-BUGS in the main kernel source directory has a good template for how to report a possible kernel bug, and details what kind From 17d213f806dad629e9af36fc45f082b87ed7bceb Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 13 Jul 2008 12:14:02 -0700 Subject: [PATCH 3/5] devcgroup: always show positive major/minor num # echo "b $((0x7fffffff)):$((0x80000000)) rwm" > devices.allow # cat devices.list b 214748364:-21474836 rwm though a major/minor number of 0x800000000 is meaningless, we should not cast it to a negative value. Signed-off-by: Li Zefan Acked-by: Serge Hallyn Cc: Serge Hallyn Cc: Paul Menage Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- security/device_cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/device_cgroup.c b/security/device_cgroup.c index fd764a0858d0..1e2e28afba45 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -222,7 +222,7 @@ static void devcgroup_destroy(struct cgroup_subsys *ss, #define DEVCG_DENY 2 #define DEVCG_LIST 3 -#define MAJMINLEN 10 +#define MAJMINLEN 13 #define ACCLEN 4 static void set_access(char *acc, short access) @@ -254,7 +254,7 @@ static void set_majmin(char *str, unsigned m) if (m == ~0) sprintf(str, "*"); else - snprintf(str, MAJMINLEN, "%d", m); + snprintf(str, MAJMINLEN, "%u", m); } static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, From ec229e830060091b9be63c8f873c1b2407a82821 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 13 Jul 2008 12:14:04 -0700 Subject: [PATCH 4/5] devcgroup: fix permission check when adding entry to child cgroup # cat devices.list c 1:3 r # echo 'c 1:3 w' > sub/devices.allow # cat sub/devices.list c 1:3 w As illustrated, the parent group has no write permission to /dev/null, so it's child should not be allowed to add this write permission. Signed-off-by: Li Zefan Acked-by: Serge Hallyn Cc: Serge Hallyn Cc: Paul Menage Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- security/device_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 1e2e28afba45..ddd92cec78ed 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -300,7 +300,7 @@ static int may_access_whitelist(struct dev_cgroup *c, continue; if (whitem->minor != ~0 && whitem->minor != refwh->minor) continue; - if (refwh->access & (~(whitem->access | ACC_MASK))) + if (refwh->access & (~whitem->access)) continue; return 1; } From bce7f793daec3e65ec5c5705d2457b81fe7b5725 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Jul 2008 14:51:29 -0700 Subject: [PATCH 5/5] Linux 2.6.26 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6315424a00b9..e3c5eb66ec52 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 26 -EXTRAVERSION = -rc9 +EXTRAVERSION = NAME = Rotary Wombat # *DOCUMENTATION*