| From: Thomas Gleixner <tglx@linutronix.de> |
| Date: Wed, 24 May 2017 10:15:34 +0200 |
| Subject: [PATCH 23/32] perf/tracing/cpuhotplug: Fix locking order |
| |
| perf, tracing, kprobes and jump_labels have a gazillion of ways to create |
| dependency lock chains. Some of those involve nested invocations of |
| get_online_cpus(). |
| |
| The conversion of the hotplug locking to a percpu rwsem requires to avoid |
| such nested calls. sys_perf_event_open() protects most of the syscall logic |
| against cpu hotplug. This causes nested calls and lock inversions versus |
| ftrace and kprobes in various interesting ways. |
| |
| It's impossible to move the hotplug locking to the outer end of all call |
| chains in the involved facilities, so the hotplug protection in |
| sys_perf_event_open() needs to be solved differently. |
| |
| Introduce 'pmus_mutex' which protects a perf private online cpumask. This |
| mutex is taken when the mask is updated in the cpu hotplug callbacks and |
| can be taken in sys_perf_event_open() to protect the swhash setup/teardown |
| code and when the final judgement about a valid event has to be made. |
| |
| [ tglx: Produced changelog and fixed the swhash interaction ] |
| |
| Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Acked-by: Ingo Molnar <mingo@kernel.org> |
| Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| Cc: Sebastian Siewior <bigeasy@linutronix.de> |
| Cc: Steven Rostedt <rostedt@goodmis.org> |
| Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
| Cc: Masami Hiramatsu <mhiramat@kernel.org> |
| Link: http://lkml.kernel.org/r/20170524081548.930941109@linutronix.de |
| --- |
| include/linux/perf_event.h | 2 |
| kernel/events/core.c | 106 ++++++++++++++++++++++++++++++++------------- |
| 2 files changed, 78 insertions(+), 30 deletions(-) |
| |
| --- a/include/linux/perf_event.h |
| +++ b/include/linux/perf_event.h |
| @@ -794,6 +794,8 @@ struct perf_cpu_context { |
| |
| struct list_head sched_cb_entry; |
| int sched_cb_usage; |
| + |
| + int online; |
| }; |
| |
| struct perf_output_handle { |
| --- a/kernel/events/core.c |
| +++ b/kernel/events/core.c |
| @@ -386,6 +386,7 @@ static atomic_t nr_switch_events __read_ |
| static LIST_HEAD(pmus); |
| static DEFINE_MUTEX(pmus_lock); |
| static struct srcu_struct pmus_srcu; |
| +static cpumask_var_t perf_online_mask; |
| |
| /* |
| * perf event paranoia level: |
| @@ -3809,14 +3810,6 @@ find_get_context(struct pmu *pmu, struct |
| if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
| return ERR_PTR(-EACCES); |
| |
| - /* |
| - * We could be clever and allow to attach a event to an |
| - * offline CPU and activate it when the CPU comes up, but |
| - * that's for later. |
| - */ |
| - if (!cpu_online(cpu)) |
| - return ERR_PTR(-ENODEV); |
| - |
| cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
| ctx = &cpuctx->ctx; |
| get_ctx(ctx); |
| @@ -7592,7 +7585,8 @@ static int swevent_hlist_get_cpu(int cpu |
| int err = 0; |
| |
| mutex_lock(&swhash->hlist_mutex); |
| - if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { |
| + if (!swevent_hlist_deref(swhash) && |
| + cpumask_test_cpu(cpu, perf_online_mask)) { |
| struct swevent_hlist *hlist; |
| |
| hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); |
| @@ -7613,7 +7607,7 @@ static int swevent_hlist_get(void) |
| { |
| int err, cpu, failed_cpu; |
| |
| - get_online_cpus(); |
| + mutex_lock(&pmus_lock); |
| for_each_possible_cpu(cpu) { |
| err = swevent_hlist_get_cpu(cpu); |
| if (err) { |
| @@ -7621,8 +7615,7 @@ static int swevent_hlist_get(void) |
| goto fail; |
| } |
| } |
| - put_online_cpus(); |
| - |
| + mutex_unlock(&pmus_lock); |
| return 0; |
| fail: |
| for_each_possible_cpu(cpu) { |
| @@ -7630,8 +7623,7 @@ static int swevent_hlist_get(void) |
| break; |
| swevent_hlist_put_cpu(cpu); |
| } |
| - |
| - put_online_cpus(); |
| + mutex_unlock(&pmus_lock); |
| return err; |
| } |
| |
| @@ -8809,7 +8801,7 @@ perf_event_mux_interval_ms_store(struct |
| pmu->hrtimer_interval_ms = timer; |
| |
| /* update all cpuctx for this PMU */ |
| - get_online_cpus(); |
| + cpus_read_lock(); |
| for_each_online_cpu(cpu) { |
| struct perf_cpu_context *cpuctx; |
| cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
| @@ -8818,7 +8810,7 @@ perf_event_mux_interval_ms_store(struct |
| cpu_function_call(cpu, |
| (remote_function_f)perf_mux_hrtimer_restart, cpuctx); |
| } |
| - put_online_cpus(); |
| + cpus_read_unlock(); |
| mutex_unlock(&mux_interval_mutex); |
| |
| return count; |
| @@ -8948,6 +8940,7 @@ int perf_pmu_register(struct pmu *pmu, c |
| lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); |
| lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); |
| cpuctx->ctx.pmu = pmu; |
| + cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask); |
| |
| __perf_mux_hrtimer_init(cpuctx, cpu); |
| } |
| @@ -9764,12 +9757,10 @@ SYSCALL_DEFINE5(perf_event_open, |
| goto err_task; |
| } |
| |
| - get_online_cpus(); |
| - |
| if (task) { |
| err = mutex_lock_interruptible(&task->signal->cred_guard_mutex); |
| if (err) |
| - goto err_cpus; |
| + goto err_cred; |
| |
| /* |
| * Reuse ptrace permission checks for now. |
| @@ -9955,6 +9946,23 @@ SYSCALL_DEFINE5(perf_event_open, |
| goto err_locked; |
| } |
| |
| + if (!task) { |
| + /* |
| + * Check if the @cpu we're creating an event for is online. |
| + * |
| + * We use the perf_cpu_context::ctx::mutex to serialize against |
| + * the hotplug notifiers. See perf_event_{init,exit}_cpu(). |
| + */ |
| + struct perf_cpu_context *cpuctx = |
| + container_of(ctx, struct perf_cpu_context, ctx); |
| + |
| + if (!cpuctx->online) { |
| + err = -ENODEV; |
| + goto err_locked; |
| + } |
| + } |
| + |
| + |
| /* |
| * Must be under the same ctx::mutex as perf_install_in_context(), |
| * because we need to serialize with concurrent event creation. |
| @@ -10044,8 +10052,6 @@ SYSCALL_DEFINE5(perf_event_open, |
| put_task_struct(task); |
| } |
| |
| - put_online_cpus(); |
| - |
| mutex_lock(¤t->perf_event_mutex); |
| list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
| mutex_unlock(¤t->perf_event_mutex); |
| @@ -10079,8 +10085,6 @@ SYSCALL_DEFINE5(perf_event_open, |
| err_cred: |
| if (task) |
| mutex_unlock(&task->signal->cred_guard_mutex); |
| -err_cpus: |
| - put_online_cpus(); |
| err_task: |
| if (task) |
| put_task_struct(task); |
| @@ -10135,6 +10139,21 @@ perf_event_create_kernel_counter(struct |
| goto err_unlock; |
| } |
| |
| + if (!task) { |
| + /* |
| + * Check if the @cpu we're creating an event for is online. |
| + * |
| + * We use the perf_cpu_context::ctx::mutex to serialize against |
| + * the hotplug notifiers. See perf_event_{init,exit}_cpu(). |
| + */ |
| + struct perf_cpu_context *cpuctx = |
| + container_of(ctx, struct perf_cpu_context, ctx); |
| + if (!cpuctx->online) { |
| + err = -ENODEV; |
| + goto err_unlock; |
| + } |
| + } |
| + |
| if (!exclusive_event_installable(event, ctx)) { |
| err = -EBUSY; |
| goto err_unlock; |
| @@ -10802,6 +10821,8 @@ static void __init perf_event_init_all_c |
| struct swevent_htable *swhash; |
| int cpu; |
| |
| + zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL); |
| + |
| for_each_possible_cpu(cpu) { |
| swhash = &per_cpu(swevent_htable, cpu); |
| mutex_init(&swhash->hlist_mutex); |
| @@ -10817,7 +10838,7 @@ static void __init perf_event_init_all_c |
| } |
| } |
| |
| -int perf_event_init_cpu(unsigned int cpu) |
| +void perf_swevent_init_cpu(unsigned int cpu) |
| { |
| struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
| |
| @@ -10830,7 +10851,6 @@ int perf_event_init_cpu(unsigned int cpu |
| rcu_assign_pointer(swhash->swevent_hlist, hlist); |
| } |
| mutex_unlock(&swhash->hlist_mutex); |
| - return 0; |
| } |
| |
| #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE |
| @@ -10848,19 +10868,22 @@ static void __perf_event_exit_context(vo |
| |
| static void perf_event_exit_cpu_context(int cpu) |
| { |
| + struct perf_cpu_context *cpuctx; |
| struct perf_event_context *ctx; |
| struct pmu *pmu; |
| - int idx; |
| |
| - idx = srcu_read_lock(&pmus_srcu); |
| - list_for_each_entry_rcu(pmu, &pmus, entry) { |
| - ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx; |
| + mutex_lock(&pmus_lock); |
| + list_for_each_entry(pmu, &pmus, entry) { |
| + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
| + ctx = &cpuctx->ctx; |
| |
| mutex_lock(&ctx->mutex); |
| smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); |
| + cpuctx->online = 0; |
| mutex_unlock(&ctx->mutex); |
| } |
| - srcu_read_unlock(&pmus_srcu, idx); |
| + cpumask_clear_cpu(cpu, perf_online_mask); |
| + mutex_unlock(&pmus_lock); |
| } |
| #else |
| |
| @@ -10868,6 +10891,29 @@ static void perf_event_exit_cpu_context( |
| |
| #endif |
| |
| +int perf_event_init_cpu(unsigned int cpu) |
| +{ |
| + struct perf_cpu_context *cpuctx; |
| + struct perf_event_context *ctx; |
| + struct pmu *pmu; |
| + |
| + perf_swevent_init_cpu(cpu); |
| + |
| + mutex_lock(&pmus_lock); |
| + cpumask_set_cpu(cpu, perf_online_mask); |
| + list_for_each_entry(pmu, &pmus, entry) { |
| + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
| + ctx = &cpuctx->ctx; |
| + |
| + mutex_lock(&ctx->mutex); |
| + cpuctx->online = 1; |
| + mutex_unlock(&ctx->mutex); |
| + } |
| + mutex_unlock(&pmus_lock); |
| + |
| + return 0; |
| +} |
| + |
| int perf_event_exit_cpu(unsigned int cpu) |
| { |
| perf_event_exit_cpu_context(cpu); |