perf/core: Fix event schedule order

Scheduling in events with cpu=-1 before events with cpu=# changes semantics and is undesirable in that it would priorize these events. Given that groups->index is across all groups we actually have an inter-group ordering, meaning we can merge-sort two groups, which is just what we need to preserve semantics. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Mark Rutland <mark.rutland@arm.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexey Budankov <alexey.budankov@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Carrillo-Cisneros <davidcc@google.com> Cc: Dmitri Prokhorov <Dmitry.Prohorov@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kan Liang <kan.liang@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Valery Cherepennikov <valery.cherepennikov@intel.com> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-11-13 14:28:30 +01:00 · 2017-11-13 14:28:30 +01:00 · 1cac7b1ae3
parent 161c85fab7
commit 1cac7b1ae3
1 changed files with 109 additions and 50 deletions
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@ -1608,6 +1608,21 @@ perf_event_groups_first(struct perf_event_groups *groups, int cpu)
 	return match;
 }

+/*
+ * Like rb_entry_next_safe() for the @cpu subtree.
+ */
+static struct perf_event *
+perf_event_groups_next(struct perf_event *event)
+{
+	struct perf_event *next;
+
+	next = rb_entry_safe(rb_next(&event->group_node), typeof(*event), group_node);
+	if (next && next->cpu == event->cpu)
+		return next;
+
+	return NULL;
+}
+
 /*
 * Rotate the @cpu subtree.
 *
@ -2354,22 +2369,6 @@ static int group_can_go_on(struct perf_event *event,
 	return can_add_hw;
 }

-static int
-flexible_group_sched_in(struct perf_event *event,
-			struct perf_event_context *ctx,
-		        struct perf_cpu_context *cpuctx,
-			int *can_add_hw)
-{
-	if (event->state <= PERF_EVENT_STATE_OFF || !event_filter_match(event))
-		return 0;
-
-	if (group_can_go_on(event, cpuctx, *can_add_hw))
-		if (group_sched_in(event, cpuctx, ctx))
-			*can_add_hw = 0;
-
-	return 1;
-}
-
 static void add_event_to_ctx(struct perf_event *event,
 			       struct perf_event_context *ctx)
 {
@ -3185,52 +3184,112 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
 	ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
 }

+static int visit_groups_merge(struct perf_event_groups *groups, int cpu,
+			      int (*func)(struct perf_event *, void *), void *data)
+{
+	struct perf_event **evt, *evt1, *evt2;
+	int ret;
+
+	evt1 = perf_event_groups_first(groups, -1);
+	evt2 = perf_event_groups_first(groups, cpu);
+
+	while (evt1 || evt2) {
+		if (evt1 && evt2) {
+			if (evt1->group_index < evt2->group_index)
+				evt = &evt1;
+			else
+				evt = &evt2;
+		} else if (evt1) {
+			evt = &evt1;
+		} else {
+			evt = &evt2;
+		}
+
+		ret = func(*evt, data);
+		if (ret)
+			return ret;
+
+		*evt = perf_event_groups_next(*evt);
+	}
+
+	return 0;
+}
+
+struct sched_in_data {
+	struct perf_event_context *ctx;
+	struct perf_cpu_context *cpuctx;
+	int can_add_hw;
+};
+
+static int pinned_sched_in(struct perf_event *event, void *data)
+{
+	struct sched_in_data *sid = data;
+
+	if (event->state <= PERF_EVENT_STATE_OFF)
+		return 0;
+
+	if (!event_filter_match(event))
+		return 0;
+
+	if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw))
+		group_sched_in(event, sid->cpuctx, sid->ctx);
+
+	/*
+	 * If this pinned group hasn't been scheduled,
+	 * put it in error state.
+	 */
+	if (event->state == PERF_EVENT_STATE_INACTIVE)
+		perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+
+	return 0;
+}
+
+static int flexible_sched_in(struct perf_event *event, void *data)
+{
+	struct sched_in_data *sid = data;
+
+	if (event->state <= PERF_EVENT_STATE_OFF)
+		return 0;
+
+	if (!event_filter_match(event))
+		return 0;
+
+	if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) {
+		if (group_sched_in(event, sid->cpuctx, sid->ctx))
+			sid->can_add_hw = 0;
+	}
+
+	return 0;
+}
+
 static void
 ctx_pinned_sched_in(struct perf_event_context *ctx,
 		    struct perf_cpu_context *cpuctx)
 {
-	int sw = -1, cpu = smp_processor_id();
-	struct perf_event *event;
-	int can_add_hw;
+	struct sched_in_data sid = {
+		.ctx = ctx,
+		.cpuctx = cpuctx,
+		.can_add_hw = 1,
+	};

-	perf_event_groups_for_each_cpu(event, sw,
-			&ctx->pinned_groups, group_node) {
-		can_add_hw = 1;
-		if (flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw)) {
-			if (event->state == PERF_EVENT_STATE_INACTIVE)
-				perf_event_set_state(event,
-						PERF_EVENT_STATE_ERROR);
-		}
-	}
-
-	perf_event_groups_for_each_cpu(event, cpu,
-			&ctx->pinned_groups, group_node) {
-		can_add_hw = 1;
-		if (flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw)) {
-			if (event->state == PERF_EVENT_STATE_INACTIVE)
-				perf_event_set_state(event,
-						PERF_EVENT_STATE_ERROR);
-		}
-	}
+	visit_groups_merge(&ctx->pinned_groups,
+			   smp_processor_id(),
+			   pinned_sched_in, &sid);
 }

 static void
 ctx_flexible_sched_in(struct perf_event_context *ctx,
 		      struct perf_cpu_context *cpuctx)
 {
-	int sw = -1, cpu = smp_processor_id();
-	struct perf_event *event;
-	int can_add_hw = 1;
-
-	perf_event_groups_for_each_cpu(event, sw,
-			&ctx->flexible_groups, group_node)
-		flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw);
-
-	can_add_hw = 1;
-	perf_event_groups_for_each_cpu(event, cpu,
-			&ctx->flexible_groups, group_node)
-		flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw);
+	struct sched_in_data sid = {
+		.ctx = ctx,
+		.cpuctx = cpuctx,
+		.can_add_hw = 1,
+	};

+	visit_groups_merge(&ctx->flexible_groups,
+			   smp_processor_id(),
+			   flexible_sched_in, &sid);
 }

 static void