| From 9b85e7a779e0771c90758f2a6fd61a0ab7a9ba36 Mon Sep 17 00:00:00 2001 |
| From: Song Liu <songliubraving@fb.com> |
| Date: Tue, 8 Oct 2019 09:59:49 -0700 |
| Subject: [PATCH] perf/core: Fix corner case in perf_rotate_context() |
| |
| commit 7fa343b7fdc4f351de4e3f28d5c285937dd1f42f upstream. |
| |
| In perf_rotate_context(), when the first cpu flexible event fail to |
| schedule, cpu_rotate is 1, while cpu_event is NULL. Since cpu_event is |
| NULL, perf_rotate_context will _NOT_ call cpu_ctx_sched_out(), thus |
| cpuctx->ctx.is_active will have EVENT_FLEXIBLE set. Then, the next |
| perf_event_sched_in() will skip all cpu flexible events because of the |
| EVENT_FLEXIBLE bit. |
| |
| In the next call of perf_rotate_context(), cpu_rotate stays 1, and |
| cpu_event stays NULL, so this process repeats. The end result is, flexible |
| events on this cpu will not be scheduled (until another event being added |
| to the cpuctx). |
| |
| Here is an easy repro of this issue. On Intel CPUs, where ref-cycles |
| could only use one counter, run one pinned event for ref-cycles, one |
| flexible event for ref-cycles, and one flexible event for cycles. The |
| flexible ref-cycles is never scheduled, which is expected. However, |
| because of this issue, the cycles event is never scheduled either. |
| |
| $ perf stat -e ref-cycles:D,ref-cycles,cycles -C 5 -I 1000 |
| |
| time counts unit events |
| 1.000152973 15,412,480 ref-cycles:D |
| 1.000152973 <not counted> ref-cycles (0.00%) |
| 1.000152973 <not counted> cycles (0.00%) |
| 2.000486957 18,263,120 ref-cycles:D |
| 2.000486957 <not counted> ref-cycles (0.00%) |
| 2.000486957 <not counted> cycles (0.00%) |
| |
| To fix this, when the flexible_active list is empty, try rotate the |
| first event in the flexible_groups. Also, rename ctx_first_active() to |
| ctx_event_to_rotate(), which is more accurate. |
| |
| Signed-off-by: Song Liu <songliubraving@fb.com> |
| Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Cc: <kernel-team@fb.com> |
| Cc: Arnaldo Carvalho de Melo <acme@kernel.org> |
| Cc: Linus Torvalds <torvalds@linux-foundation.org> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Sasha Levin <sashal@kernel.org> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Fixes: 8d5bce0c37fa ("perf/core: Optimize perf_rotate_context() event scheduling") |
| Link: https://lkml.kernel.org/r/20191008165949.920548-1-songliubraving@fb.com |
| Signed-off-by: Ingo Molnar <mingo@kernel.org> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/kernel/events/core.c b/kernel/events/core.c |
| index 836dbbb8bfed..f8c223c70c32 100644 |
| --- a/kernel/events/core.c |
| +++ b/kernel/events/core.c |
| @@ -3685,11 +3685,23 @@ static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event) |
| perf_event_groups_insert(&ctx->flexible_groups, event); |
| } |
| |
| +/* pick an event from the flexible_groups to rotate */ |
| static inline struct perf_event * |
| -ctx_first_active(struct perf_event_context *ctx) |
| +ctx_event_to_rotate(struct perf_event_context *ctx) |
| { |
| - return list_first_entry_or_null(&ctx->flexible_active, |
| - struct perf_event, active_list); |
| + struct perf_event *event; |
| + |
| + /* pick the first active flexible event */ |
| + event = list_first_entry_or_null(&ctx->flexible_active, |
| + struct perf_event, active_list); |
| + |
| + /* if no active flexible event, pick the first event */ |
| + if (!event) { |
| + event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree), |
| + typeof(*event), group_node); |
| + } |
| + |
| + return event; |
| } |
| |
| static bool perf_rotate_context(struct perf_cpu_context *cpuctx) |
| @@ -3721,9 +3733,9 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx) |
| perf_pmu_disable(cpuctx->ctx.pmu); |
| |
| if (task_rotate) |
| - task_event = ctx_first_active(ctx); |
| + task_event = ctx_event_to_rotate(ctx); |
| if (cpu_rotate) |
| - cpu_event = ctx_first_active(&cpuctx->ctx); |
| + cpu_event = ctx_event_to_rotate(&cpuctx->ctx); |
| |
| /* |
| * As per the order given at ctx_resched() first 'pop' task flexible |
| -- |
| 2.7.4 |
| |