| From: Alex Shi <alex.shi@linaro.org> |
| Date: Fri, 28 Jul 2017 15:09:25 +0800 |
| Subject: PM / CPU: replace raw_notifier with atomic_notifier |
| |
| This patch replaces an rwlock and raw notifier by an atomic notifier |
| protected by a spin_lock and RCU. |
| |
| The main reason for this change is due to a 'scheduling while atomic' |
| bug with RT kernels on ARM/ARM64. On ARM/ARM64, the rwlock |
| cpu_pm_notifier_lock in cpu_pm_enter/exit() causes a potential |
| schedule after IRQ disable in the idle call chain: |
| |
| cpu_startup_entry |
| cpu_idle_loop |
| local_irq_disable() |
| cpuidle_idle_call |
| call_cpuidle |
| cpuidle_enter |
| cpuidle_enter_state |
| ->enter :arm_enter_idle_state |
| cpu_pm_enter/exit |
| CPU_PM_CPU_IDLE_ENTER |
| read_lock(&cpu_pm_notifier_lock); <-- sleep in idle |
| __rt_spin_lock(); |
| schedule(); |
| |
| The kernel panic is here: |
| [ 4.609601] BUG: scheduling while atomic: swapper/1/0/0x00000002 |
| [ 4.609608] [<ffff0000086fae70>] arm_enter_idle_state+0x18/0x70 |
| [ 4.609614] Modules linked in: |
| [ 4.609615] [<ffff0000086f9298>] cpuidle_enter_state+0xf0/0x218 |
| [ 4.609620] [<ffff0000086f93f8>] cpuidle_enter+0x18/0x20 |
| [ 4.609626] Preemption disabled at: |
| [ 4.609627] [<ffff0000080fa234>] call_cpuidle+0x24/0x40 |
| [ 4.609635] [<ffff000008882fa4>] schedule_preempt_disabled+0x1c/0x28 |
| [ 4.609639] [<ffff0000080fa49c>] cpu_startup_entry+0x154/0x1f8 |
| [ 4.609645] [<ffff00000808e004>] secondary_start_kernel+0x15c/0x1a0 |
| |
| Daniel Lezcano said this notification is needed on arm/arm64 platforms. |
| Sebastian suggested using atomic_notifier instead of rwlock, which is not |
| only removing the sleeping in idle, but also improving latency. |
| |
| Tony Lindgren found a miss use that rcu_read_lock used after rcu_idle_enter |
| Paul McKenney suggested trying RCU_NONIDLE. |
| |
| Signed-off-by: Alex Shi <alex.shi@linaro.org> |
| Tested-by: Tony Lindgren <tony@atomide.com> |
| Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| [ rjw: Subject & changelog ] |
| Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| --- |
| kernel/cpu_pm.c | 50 +++++++++++++------------------------------------- |
| 1 file changed, 13 insertions(+), 37 deletions(-) |
| |
| --- a/kernel/cpu_pm.c |
| +++ b/kernel/cpu_pm.c |
| @@ -22,15 +22,21 @@ |
| #include <linux/spinlock.h> |
| #include <linux/syscore_ops.h> |
| |
| -static DEFINE_RWLOCK(cpu_pm_notifier_lock); |
| -static RAW_NOTIFIER_HEAD(cpu_pm_notifier_chain); |
| +static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain); |
| |
| static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls) |
| { |
| int ret; |
| |
| - ret = __raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, |
| + /* |
| + * __atomic_notifier_call_chain has a RCU read critical section, which |
| + * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let |
| + * RCU know this. |
| + */ |
| + rcu_irq_enter_irqson(); |
| + ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, |
| nr_to_call, nr_calls); |
| + rcu_irq_exit_irqson(); |
| |
| return notifier_to_errno(ret); |
| } |
| @@ -47,14 +53,7 @@ static int cpu_pm_notify(enum cpu_pm_eve |
| */ |
| int cpu_pm_register_notifier(struct notifier_block *nb) |
| { |
| - unsigned long flags; |
| - int ret; |
| - |
| - write_lock_irqsave(&cpu_pm_notifier_lock, flags); |
| - ret = raw_notifier_chain_register(&cpu_pm_notifier_chain, nb); |
| - write_unlock_irqrestore(&cpu_pm_notifier_lock, flags); |
| - |
| - return ret; |
| + return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb); |
| } |
| EXPORT_SYMBOL_GPL(cpu_pm_register_notifier); |
| |
| @@ -69,14 +68,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifi |
| */ |
| int cpu_pm_unregister_notifier(struct notifier_block *nb) |
| { |
| - unsigned long flags; |
| - int ret; |
| - |
| - write_lock_irqsave(&cpu_pm_notifier_lock, flags); |
| - ret = raw_notifier_chain_unregister(&cpu_pm_notifier_chain, nb); |
| - write_unlock_irqrestore(&cpu_pm_notifier_lock, flags); |
| - |
| - return ret; |
| + return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb); |
| } |
| EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); |
| |
| @@ -100,7 +92,6 @@ int cpu_pm_enter(void) |
| int nr_calls; |
| int ret = 0; |
| |
| - read_lock(&cpu_pm_notifier_lock); |
| ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls); |
| if (ret) |
| /* |
| @@ -108,7 +99,6 @@ int cpu_pm_enter(void) |
| * PM entry who are notified earlier to prepare for it. |
| */ |
| cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL); |
| - read_unlock(&cpu_pm_notifier_lock); |
| |
| return ret; |
| } |
| @@ -128,13 +118,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter); |
| */ |
| int cpu_pm_exit(void) |
| { |
| - int ret; |
| - |
| - read_lock(&cpu_pm_notifier_lock); |
| - ret = cpu_pm_notify(CPU_PM_EXIT, -1, NULL); |
| - read_unlock(&cpu_pm_notifier_lock); |
| - |
| - return ret; |
| + return cpu_pm_notify(CPU_PM_EXIT, -1, NULL); |
| } |
| EXPORT_SYMBOL_GPL(cpu_pm_exit); |
| |
| @@ -159,7 +143,6 @@ int cpu_cluster_pm_enter(void) |
| int nr_calls; |
| int ret = 0; |
| |
| - read_lock(&cpu_pm_notifier_lock); |
| ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls); |
| if (ret) |
| /* |
| @@ -167,7 +150,6 @@ int cpu_cluster_pm_enter(void) |
| * PM entry who are notified earlier to prepare for it. |
| */ |
| cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL); |
| - read_unlock(&cpu_pm_notifier_lock); |
| |
| return ret; |
| } |
| @@ -190,13 +172,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); |
| */ |
| int cpu_cluster_pm_exit(void) |
| { |
| - int ret; |
| - |
| - read_lock(&cpu_pm_notifier_lock); |
| - ret = cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL); |
| - read_unlock(&cpu_pm_notifier_lock); |
| - |
| - return ret; |
| + return cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL); |
| } |
| EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit); |
| |