sched/numa: Update migrate_improves/degrades_locality()

Update the migrate_improves/degrades_locality() functions with knowledge of pseudo-interleaving. Do not consider moving tasks around within the set of group's active nodes as improving or degrading locality. Instead, leave the load balancer free to balance the load between a numa_group's active nodes. Also, switch from the group/task_weight functions to the group/task_fault functions. The "weight" functions involve a division, but both calls use the same divisor, so there's no point in doing that from these functions. On a 4 node (x10 core) system, performance of SPECjbb2005 seems unaffected, though the number of migrations with 2 8-warehouse wide instances seems to have almost halved, due to the scheduler running each instance on a single node. Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: mgorman@suse.de Cc: chegu_vinod@hp.com Link: http://lkml.kernel.org/r/20140515130306.61aae7db@cuia.bos.redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-05-15 13:03:06 -04:00 · 2014-05-15 13:03:06 -04:00 · b1ad065e65
parent e63da03639
commit b1ad065e65
1 changed files with 29 additions and 13 deletions
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@ -5123,6 +5123,7 @@ task_hot(struct task_struct *p, u64 now)
 /* Returns true if the destination node has incurred more faults */
 static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
 {
+	struct numa_group *numa_group = rcu_dereference(p->numa_group);
 	int src_nid, dst_nid;

 	if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults_memory ||
@ -5136,21 +5137,29 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
 	if (src_nid == dst_nid)
 		return false;

-	/* Always encourage migration to the preferred node. */
+	if (numa_group) {
+		/* Task is already in the group's interleave set. */
+		if (node_isset(src_nid, numa_group->active_nodes))
+			return false;
+
+		/* Task is moving into the group's interleave set. */
+		if (node_isset(dst_nid, numa_group->active_nodes))
+			return true;
+
+		return group_faults(p, dst_nid) > group_faults(p, src_nid);
+	}
+
+	/* Encourage migration to the preferred node. */
 	if (dst_nid == p->numa_preferred_nid)
 		return true;

-	/* If both task and group weight improve, this move is a winner. */
-	if (task_weight(p, dst_nid) > task_weight(p, src_nid) &&
-	    group_weight(p, dst_nid) > group_weight(p, src_nid))
-		return true;
-
-	return false;
+	return task_faults(p, dst_nid) > task_faults(p, src_nid);
 }


 static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
 {
+	struct numa_group *numa_group = rcu_dereference(p->numa_group);
 	int src_nid, dst_nid;

 	if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER))
@ -5165,16 +5174,23 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
 	if (src_nid == dst_nid)
 		return false;

+	if (numa_group) {
+		/* Task is moving within/into the group's interleave set. */
+		if (node_isset(dst_nid, numa_group->active_nodes))
+			return false;
+
+		/* Task is moving out of the group's interleave set. */
+		if (node_isset(src_nid, numa_group->active_nodes))
+			return true;
+
+		return group_faults(p, dst_nid) < group_faults(p, src_nid);
+	}
+
 	/* Migrating away from the preferred node is always bad. */
 	if (src_nid == p->numa_preferred_nid)
 		return true;

-	/* If either task or group weight get worse, don't do it. */
-	if (task_weight(p, dst_nid) < task_weight(p, src_nid) ||
-	    group_weight(p, dst_nid) < group_weight(p, src_nid))
-		return true;
-
-	return false;
+	return task_faults(p, dst_nid) < task_faults(p, src_nid);
 }

 #else