From e37b6a7b27b400c3aa488db8c6629a05095bc79c Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Fri, 21 Jan 2011 20:44:59 -0800 Subject: [PATCH 1/3] sched: Fix sign under-flows in wake_affine While care is taken around the zero-point in effective_load to not exceed the instantaneous rq->weight, it's still possible (e.g. using wake_idx != 0) for (load + effective_load) to underflow. In this case the comparing the unsigned values can result in incorrect balanced decisions. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20110122044851.734245014@google.com> Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 354769979c02..ccecfec02a70 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1432,7 +1432,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) { - unsigned long this_load, load; + s64 this_load, load; int idx, this_cpu, prev_cpu; unsigned long tl_per_task; struct task_group *tg; @@ -1471,8 +1471,8 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) * Otherwise check if either cpus are near enough in load to allow this * task to be woken on this_cpu. */ - if (this_load) { - unsigned long this_eff_load, prev_eff_load; + if (this_load > 0) { + s64 this_eff_load, prev_eff_load; this_eff_load = 100; this_eff_load *= power_of(prev_cpu); From b815f1963e47b9b69bb17e0588bd5af5b1114ae0 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Fri, 21 Jan 2011 20:45:00 -0800 Subject: [PATCH 2/3] sched: Fix/remove redundant cfs_rq checks Since updates are against an entity's queuing cfs_rq it's not possible to enter update_cfs_{shares,load} with a NULL cfs_rq. (Indeed, update_cfs_load would crash prior to the check if we did anyway since we load is examined during the initializers). Also, in the update_cfs_load case there's no point in maintaining averages for rq->cfs_rq since we don't perform shares distribution at that level -- NULL check is replaced accordingly. Thanks to Dan Carpenter for pointing out the deference before NULL check. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20110122044851.825284940@google.com> Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ccecfec02a70..1997383ba4d6 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -722,7 +722,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) u64 now, delta; unsigned long load = cfs_rq->load.weight; - if (!cfs_rq) + if (cfs_rq->tg == &root_task_group) return; now = rq_of(cfs_rq)->clock; @@ -830,9 +830,6 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) struct sched_entity *se; long shares; - if (!cfs_rq) - return; - tg = cfs_rq->tg; se = tg->se[cpu_of(rq_of(cfs_rq))]; if (!se) From 05ca62c6ca17f39b88fa956d5ebc1fa6e93ad5e3 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Fri, 21 Jan 2011 20:45:02 -0800 Subject: [PATCH 3/3] sched: Use rq->clock_task instead of rq->clock for correctly maintaining load averages The delta in clock_task is a more fair attribution of how much time a tg has been contributing load to the current cpu. While not really important it also means we're more in sync (by magnitude) with respect to periodic updates (since __update_curr deltas are clock_task based). Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20110122044852.007092349@google.com> Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 1997383ba4d6..0c26e2df450e 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -725,7 +725,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) if (cfs_rq->tg == &root_task_group) return; - now = rq_of(cfs_rq)->clock; + now = rq_of(cfs_rq)->clock_task; delta = now - cfs_rq->load_stamp; /* truncate load history at 4 idle periods */