| From: Thomas Gleixner <tglx@linutronix.de> |
| Date: Wed, 24 May 2017 10:15:36 +0200 |
| Subject: [PATCH 25/32] kprobes: Cure hotplug lock ordering issues |
| |
| Converting the cpu hotplug locking to a percpu rwsem unearthed hidden lock |
| ordering problems. |
| |
| There is a wide range of locks involved in this: kprobe_mutex, |
| jump_label_mutex, ftrace_lock, text_mutex, event_mutex, module_mutex, |
| func_hash->regex_lock and a gazillion of lock order permutations with |
| nested get_online_cpus() calls. |
| |
| Some of those permutations are potential deadlocks even with the current |
| nesting hotplug locking scheme, but they can't be discovered by lockdep. |
| |
| The conversion of the hotplug locking to a percpu rwsem requires to prevent |
| nested locking, so it's required to take the hotplug rwsem early in the |
| call chain and establish a proper lock order. |
| |
| After quite some analysis and going down the wrong road severa times the |
| following lock order has been chosen: |
| |
| kprobe_mutex -> cpus_rwsem -> jump_label_mutex -> text_mutex |
| |
| For kprobes which hook on an ftrace function trace point, it's required to |
| drop cpus_rwsem before calling into the ftrace code to avoid a deadlock on |
| the func_hash->regex_lock. |
| |
| [ Steven: Ftrace interaction fixes ] |
| |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Steven Rostedt <rostedt@goodmis.org> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Acked-by: Ingo Molnar <mingo@kernel.org> |
| Acked-by: Masami Hiramatsu <mhiramat@kernel.org> |
| Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Sebastian Siewior <bigeasy@linutronix.de> |
| Link: http://lkml.kernel.org/r/20170524081549.104864779@linutronix.de |
| --- |
| kernel/kprobes.c | 59 +++++++++++++++++++++++++++++-------------------------- |
| 1 file changed, 32 insertions(+), 27 deletions(-) |
| |
| --- a/kernel/kprobes.c |
| +++ b/kernel/kprobes.c |
| @@ -486,11 +486,6 @@ static DECLARE_DELAYED_WORK(optimizing_w |
| */ |
| static void do_optimize_kprobes(void) |
| { |
| - /* Optimization never be done when disarmed */ |
| - if (kprobes_all_disarmed || !kprobes_allow_optimization || |
| - list_empty(&optimizing_list)) |
| - return; |
| - |
| /* |
| * The optimization/unoptimization refers online_cpus via |
| * stop_machine() and cpu-hotplug modifies online_cpus. |
| @@ -498,14 +493,19 @@ static void do_optimize_kprobes(void) |
| * This combination can cause a deadlock (cpu-hotplug try to lock |
| * text_mutex but stop_machine can not be done because online_cpus |
| * has been changed) |
| - * To avoid this deadlock, we need to call get_online_cpus() |
| + * To avoid this deadlock, caller must have locked cpu hotplug |
| * for preventing cpu-hotplug outside of text_mutex locking. |
| */ |
| - get_online_cpus(); |
| + lockdep_assert_cpus_held(); |
| + |
| + /* Optimization never be done when disarmed */ |
| + if (kprobes_all_disarmed || !kprobes_allow_optimization || |
| + list_empty(&optimizing_list)) |
| + return; |
| + |
| mutex_lock(&text_mutex); |
| arch_optimize_kprobes(&optimizing_list); |
| mutex_unlock(&text_mutex); |
| - put_online_cpus(); |
| } |
| |
| /* |
| @@ -516,12 +516,13 @@ static void do_unoptimize_kprobes(void) |
| { |
| struct optimized_kprobe *op, *tmp; |
| |
| + /* See comment in do_optimize_kprobes() */ |
| + lockdep_assert_cpus_held(); |
| + |
| /* Unoptimization must be done anytime */ |
| if (list_empty(&unoptimizing_list)) |
| return; |
| |
| - /* Ditto to do_optimize_kprobes */ |
| - get_online_cpus(); |
| mutex_lock(&text_mutex); |
| arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); |
| /* Loop free_list for disarming */ |
| @@ -540,7 +541,6 @@ static void do_unoptimize_kprobes(void) |
| list_del_init(&op->list); |
| } |
| mutex_unlock(&text_mutex); |
| - put_online_cpus(); |
| } |
| |
| /* Reclaim all kprobes on the free_list */ |
| @@ -565,6 +565,7 @@ static void kick_kprobe_optimizer(void) |
| static void kprobe_optimizer(struct work_struct *work) |
| { |
| mutex_lock(&kprobe_mutex); |
| + cpus_read_lock(); |
| /* Lock modules while optimizing kprobes */ |
| mutex_lock(&module_mutex); |
| |
| @@ -590,6 +591,7 @@ static void kprobe_optimizer(struct work |
| do_free_cleaned_kprobes(); |
| |
| mutex_unlock(&module_mutex); |
| + cpus_read_unlock(); |
| mutex_unlock(&kprobe_mutex); |
| |
| /* Step 5: Kick optimizer again if needed */ |
| @@ -653,9 +655,8 @@ static void optimize_kprobe(struct kprob |
| /* Short cut to direct unoptimizing */ |
| static void force_unoptimize_kprobe(struct optimized_kprobe *op) |
| { |
| - get_online_cpus(); |
| + lockdep_assert_cpus_held(); |
| arch_unoptimize_kprobe(op); |
| - put_online_cpus(); |
| if (kprobe_disabled(&op->kp)) |
| arch_disarm_kprobe(&op->kp); |
| } |
| @@ -787,6 +788,7 @@ static void try_to_optimize_kprobe(struc |
| return; |
| |
| /* For preparing optimization, jump_label_text_reserved() is called */ |
| + cpus_read_lock(); |
| jump_label_lock(); |
| mutex_lock(&text_mutex); |
| |
| @@ -808,6 +810,7 @@ static void try_to_optimize_kprobe(struc |
| out: |
| mutex_unlock(&text_mutex); |
| jump_label_unlock(); |
| + cpus_read_unlock(); |
| } |
| |
| #ifdef CONFIG_SYSCTL |
| @@ -822,6 +825,7 @@ static void optimize_all_kprobes(void) |
| if (kprobes_allow_optimization) |
| goto out; |
| |
| + cpus_read_lock(); |
| kprobes_allow_optimization = true; |
| for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
| head = &kprobe_table[i]; |
| @@ -829,6 +833,7 @@ static void optimize_all_kprobes(void) |
| if (!kprobe_disabled(p)) |
| optimize_kprobe(p); |
| } |
| + cpus_read_unlock(); |
| printk(KERN_INFO "Kprobes globally optimized\n"); |
| out: |
| mutex_unlock(&kprobe_mutex); |
| @@ -847,6 +852,7 @@ static void unoptimize_all_kprobes(void) |
| return; |
| } |
| |
| + cpus_read_lock(); |
| kprobes_allow_optimization = false; |
| for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
| head = &kprobe_table[i]; |
| @@ -855,6 +861,7 @@ static void unoptimize_all_kprobes(void) |
| unoptimize_kprobe(p, false); |
| } |
| } |
| + cpus_read_unlock(); |
| mutex_unlock(&kprobe_mutex); |
| |
| /* Wait for unoptimizing completion */ |
| @@ -1006,14 +1013,11 @@ static void arm_kprobe(struct kprobe *kp |
| arm_kprobe_ftrace(kp); |
| return; |
| } |
| - /* |
| - * Here, since __arm_kprobe() doesn't use stop_machine(), |
| - * this doesn't cause deadlock on text_mutex. So, we don't |
| - * need get_online_cpus(). |
| - */ |
| + cpus_read_lock(); |
| mutex_lock(&text_mutex); |
| __arm_kprobe(kp); |
| mutex_unlock(&text_mutex); |
| + cpus_read_unlock(); |
| } |
| |
| /* Disarm a kprobe with text_mutex */ |
| @@ -1023,10 +1027,12 @@ static void disarm_kprobe(struct kprobe |
| disarm_kprobe_ftrace(kp); |
| return; |
| } |
| - /* Ditto */ |
| + |
| + cpus_read_lock(); |
| mutex_lock(&text_mutex); |
| __disarm_kprobe(kp, reopt); |
| mutex_unlock(&text_mutex); |
| + cpus_read_unlock(); |
| } |
| |
| /* |
| @@ -1294,13 +1300,10 @@ static int register_aggr_kprobe(struct k |
| int ret = 0; |
| struct kprobe *ap = orig_p; |
| |
| + cpus_read_lock(); |
| + |
| /* For preparing optimization, jump_label_text_reserved() is called */ |
| jump_label_lock(); |
| - /* |
| - * Get online CPUs to avoid text_mutex deadlock.with stop machine, |
| - * which is invoked by unoptimize_kprobe() in add_new_kprobe() |
| - */ |
| - get_online_cpus(); |
| mutex_lock(&text_mutex); |
| |
| if (!kprobe_aggrprobe(orig_p)) { |
| @@ -1348,8 +1351,8 @@ static int register_aggr_kprobe(struct k |
| |
| out: |
| mutex_unlock(&text_mutex); |
| - put_online_cpus(); |
| jump_label_unlock(); |
| + cpus_read_unlock(); |
| |
| if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) { |
| ap->flags &= ~KPROBE_FLAG_DISABLED; |
| @@ -1548,9 +1551,12 @@ int register_kprobe(struct kprobe *p) |
| goto out; |
| } |
| |
| - mutex_lock(&text_mutex); /* Avoiding text modification */ |
| + cpus_read_lock(); |
| + /* Prevent text modification */ |
| + mutex_lock(&text_mutex); |
| ret = prepare_kprobe(p); |
| mutex_unlock(&text_mutex); |
| + cpus_read_unlock(); |
| if (ret) |
| goto out; |
| |
| @@ -1563,7 +1569,6 @@ int register_kprobe(struct kprobe *p) |
| |
| /* Try to optimize kprobe */ |
| try_to_optimize_kprobe(p); |
| - |
| out: |
| mutex_unlock(&kprobe_mutex); |
| |