| From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| Date: Mon, 20 May 2019 13:09:08 +0200 |
| Subject: [PATCH] softirq: Add preemptible softirq |
| |
| Add preemptible softirq for RT's needs. By removing the softirq count |
| from the preempt counter, the softirq becomes preemptible. A per-CPU |
| lock ensures that there is no parallel softirq processing or that |
| per-CPU variables are not access in parallel by multiple threads. |
| |
| local_bh_enable() will process all softirq work that has been raised in |
| its BH-disabled section once the BH counter gets to 0. |
| |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| --- |
| include/linux/bottom_half.h | 5 |
| include/linux/interrupt.h | 1 |
| include/linux/preempt.h | 17 ++- |
| include/linux/sched.h | 3 |
| kernel/softirq.c | 222 +++++++++++++++++++++++++++++++++++++++++++- |
| kernel/time/tick-sched.c | 9 - |
| 6 files changed, 244 insertions(+), 13 deletions(-) |
| |
| --- a/include/linux/bottom_half.h |
| +++ b/include/linux/bottom_half.h |
| @@ -4,6 +4,10 @@ |
| |
| #include <linux/preempt.h> |
| |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| +extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); |
| +#else |
| + |
| #ifdef CONFIG_TRACE_IRQFLAGS |
| extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); |
| #else |
| @@ -13,6 +17,7 @@ static __always_inline void __local_bh_d |
| barrier(); |
| } |
| #endif |
| +#endif |
| |
| static inline void local_bh_disable(void) |
| { |
| --- a/include/linux/interrupt.h |
| +++ b/include/linux/interrupt.h |
| @@ -527,6 +527,7 @@ extern void __raise_softirq_irqoff(unsig |
| |
| extern void raise_softirq_irqoff(unsigned int nr); |
| extern void raise_softirq(unsigned int nr); |
| +extern void softirq_check_pending_idle(void); |
| |
| DECLARE_PER_CPU(struct task_struct *, ksoftirqd); |
| |
| --- a/include/linux/preempt.h |
| +++ b/include/linux/preempt.h |
| @@ -78,10 +78,8 @@ |
| #include <asm/preempt.h> |
| |
| #define hardirq_count() (preempt_count() & HARDIRQ_MASK) |
| -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) |
| #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ |
| | NMI_MASK)) |
| - |
| /* |
| * Are we doing bottom half or hardware interrupt processing? |
| * |
| @@ -96,12 +94,23 @@ |
| * should not be used in new code. |
| */ |
| #define in_irq() (hardirq_count()) |
| -#define in_softirq() (softirq_count()) |
| #define in_interrupt() (irq_count()) |
| -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) |
| #define in_nmi() (preempt_count() & NMI_MASK) |
| #define in_task() (!(preempt_count() & \ |
| (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + |
| +#define softirq_count() ((long)get_current()->softirq_count) |
| +#define in_softirq() (softirq_count()) |
| +#define in_serving_softirq() (get_current()->softirq_count & SOFTIRQ_OFFSET) |
| + |
| +#else |
| + |
| +#define softirq_count() (preempt_count() & SOFTIRQ_MASK) |
| +#define in_softirq() (softirq_count()) |
| +#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) |
| + |
| +#endif |
| |
| /* |
| * The preempt_count offset after preempt_disable(); |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -927,6 +927,9 @@ struct task_struct { |
| int softirqs_enabled; |
| int softirq_context; |
| #endif |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + int softirq_count; |
| +#endif |
| |
| #ifdef CONFIG_LOCKDEP |
| # define MAX_LOCK_DEPTH 48UL |
| --- a/kernel/softirq.c |
| +++ b/kernel/softirq.c |
| @@ -26,6 +26,7 @@ |
| #include <linux/smpboot.h> |
| #include <linux/tick.h> |
| #include <linux/irq.h> |
| +#include <linux/locallock.h> |
| |
| #define CREATE_TRACE_POINTS |
| #include <trace/events/irq.h> |
| @@ -77,6 +78,7 @@ static void wakeup_softirqd(void) |
| wake_up_process(tsk); |
| } |
| |
| +#ifndef CONFIG_PREEMPT_RT_FULL |
| /* |
| * If ksoftirqd is scheduled, we do not want to process pending softirqs |
| * right now. Let ksoftirqd handle this at its own rate, to get fairness, |
| @@ -91,6 +93,7 @@ static bool ksoftirqd_running(unsigned l |
| return false; |
| return tsk && (tsk->state == TASK_RUNNING); |
| } |
| +#endif |
| |
| /* |
| * preempt_count and SOFTIRQ_OFFSET usage: |
| @@ -102,6 +105,98 @@ static bool ksoftirqd_running(unsigned l |
| * softirq and whether we just have bh disabled. |
| */ |
| |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| +static DEFINE_LOCAL_IRQ_LOCK(bh_lock); |
| +static DEFINE_PER_CPU(long, softirq_counter); |
| + |
| +void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) |
| +{ |
| + unsigned long __maybe_unused flags; |
| + long soft_cnt; |
| + |
| + WARN_ON_ONCE(in_irq()); |
| + if (!in_atomic()) |
| + local_lock(bh_lock); |
| + soft_cnt = this_cpu_inc_return(softirq_counter); |
| + WARN_ON_ONCE(soft_cnt == 0); |
| + current->softirq_count += SOFTIRQ_DISABLE_OFFSET; |
| + |
| +#ifdef CONFIG_TRACE_IRQFLAGS |
| + local_irq_save(flags); |
| + if (soft_cnt == 1) |
| + trace_softirqs_off(ip); |
| + local_irq_restore(flags); |
| +#endif |
| +} |
| +EXPORT_SYMBOL(__local_bh_disable_ip); |
| + |
| +static void local_bh_disable_rt(void) |
| +{ |
| + local_bh_disable(); |
| +} |
| + |
| +void _local_bh_enable(void) |
| +{ |
| + unsigned long __maybe_unused flags; |
| + long soft_cnt; |
| + |
| + soft_cnt = this_cpu_dec_return(softirq_counter); |
| + WARN_ON_ONCE(soft_cnt < 0); |
| + |
| +#ifdef CONFIG_TRACE_IRQFLAGS |
| + local_irq_save(flags); |
| + if (soft_cnt == 0) |
| + trace_softirqs_on(_RET_IP_); |
| + local_irq_restore(flags); |
| +#endif |
| + |
| + current->softirq_count -= SOFTIRQ_DISABLE_OFFSET; |
| + if (!in_atomic()) |
| + local_unlock(bh_lock); |
| +} |
| + |
| +void _local_bh_enable_rt(void) |
| +{ |
| + _local_bh_enable(); |
| +} |
| + |
| +void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) |
| +{ |
| + u32 pending; |
| + long count; |
| + |
| + WARN_ON_ONCE(in_irq()); |
| + lockdep_assert_irqs_enabled(); |
| + |
| + local_irq_disable(); |
| + count = this_cpu_read(softirq_counter); |
| + |
| + if (unlikely(count == 1)) { |
| + pending = local_softirq_pending(); |
| + if (pending) { |
| + if (!in_atomic()) |
| + __do_softirq(); |
| + else |
| + wakeup_softirqd(); |
| + } |
| + trace_softirqs_on(ip); |
| + } |
| + count = this_cpu_dec_return(softirq_counter); |
| + WARN_ON_ONCE(count < 0); |
| + local_irq_enable(); |
| + |
| + if (!in_atomic()) |
| + local_unlock(bh_lock); |
| + |
| + current->softirq_count -= SOFTIRQ_DISABLE_OFFSET; |
| + preempt_check_resched(); |
| +} |
| +EXPORT_SYMBOL(__local_bh_enable_ip); |
| + |
| +#else |
| +static void local_bh_disable_rt(void) { } |
| +static void _local_bh_enable_rt(void) { } |
| + |
| /* |
| * This one is for softirq.c-internal use, |
| * where hardirqs are disabled legitimately: |
| @@ -196,6 +291,7 @@ void __local_bh_enable_ip(unsigned long |
| preempt_check_resched(); |
| } |
| EXPORT_SYMBOL(__local_bh_enable_ip); |
| +#endif |
| |
| /* |
| * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, |
| @@ -266,7 +362,11 @@ asmlinkage __visible void __softirq_entr |
| pending = local_softirq_pending(); |
| account_irq_enter_time(current); |
| |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + current->softirq_count |= SOFTIRQ_OFFSET; |
| +#else |
| __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); |
| +#endif |
| in_hardirq = lockdep_softirq_start(); |
| |
| restart: |
| @@ -300,9 +400,10 @@ asmlinkage __visible void __softirq_entr |
| h++; |
| pending >>= softirq_bit; |
| } |
| - |
| +#ifndef CONFIG_PREEMPT_RT_FULL |
| if (__this_cpu_read(ksoftirqd) == current) |
| rcu_softirq_qs(); |
| +#endif |
| local_irq_disable(); |
| |
| pending = local_softirq_pending(); |
| @@ -316,11 +417,16 @@ asmlinkage __visible void __softirq_entr |
| |
| lockdep_softirq_end(in_hardirq); |
| account_irq_exit_time(current); |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + current->softirq_count &= ~SOFTIRQ_OFFSET; |
| +#else |
| __local_bh_enable(SOFTIRQ_OFFSET); |
| +#endif |
| WARN_ON_ONCE(in_interrupt()); |
| current_restore_flags(old_flags, PF_MEMALLOC); |
| } |
| |
| +#ifndef CONFIG_PREEMPT_RT_FULL |
| asmlinkage __visible void do_softirq(void) |
| { |
| __u32 pending; |
| @@ -338,6 +444,7 @@ asmlinkage __visible void do_softirq(voi |
| |
| local_irq_restore(flags); |
| } |
| +#endif |
| |
| /* |
| * Enter an interrupt context. |
| @@ -358,6 +465,16 @@ void irq_enter(void) |
| __irq_enter(); |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + |
| +static inline void invoke_softirq(void) |
| +{ |
| + if (this_cpu_read(softirq_counter) == 0) |
| + wakeup_softirqd(); |
| +} |
| + |
| +#else |
| + |
| static inline void invoke_softirq(void) |
| { |
| if (ksoftirqd_running(local_softirq_pending())) |
| @@ -383,6 +500,7 @@ static inline void invoke_softirq(void) |
| wakeup_softirqd(); |
| } |
| } |
| +#endif |
| |
| static inline void tick_irq_exit(void) |
| { |
| @@ -420,6 +538,27 @@ void irq_exit(void) |
| /* |
| * This function must run with irqs disabled! |
| */ |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| +void raise_softirq_irqoff(unsigned int nr) |
| +{ |
| + __raise_softirq_irqoff(nr); |
| + |
| + /* |
| + * If we're in an hard interrupt we let irq return code deal |
| + * with the wakeup of ksoftirqd. |
| + */ |
| + if (in_irq()) |
| + return; |
| + /* |
| + * If were are not in BH-disabled section then we have to wake |
| + * ksoftirqd. |
| + */ |
| + if (this_cpu_read(softirq_counter) == 0) |
| + wakeup_softirqd(); |
| +} |
| + |
| +#else |
| + |
| inline void raise_softirq_irqoff(unsigned int nr) |
| { |
| __raise_softirq_irqoff(nr); |
| @@ -437,6 +576,8 @@ inline void raise_softirq_irqoff(unsigne |
| wakeup_softirqd(); |
| } |
| |
| +#endif |
| + |
| void raise_softirq(unsigned int nr) |
| { |
| unsigned long flags; |
| @@ -645,6 +786,7 @@ static int ksoftirqd_should_run(unsigned |
| |
| static void run_ksoftirqd(unsigned int cpu) |
| { |
| + local_bh_disable_rt(); |
| local_irq_disable(); |
| if (local_softirq_pending()) { |
| /* |
| @@ -653,10 +795,12 @@ static void run_ksoftirqd(unsigned int c |
| */ |
| __do_softirq(); |
| local_irq_enable(); |
| + _local_bh_enable_rt(); |
| cond_resched(); |
| return; |
| } |
| local_irq_enable(); |
| + _local_bh_enable_rt(); |
| } |
| |
| #ifdef CONFIG_HOTPLUG_CPU |
| @@ -730,6 +874,13 @@ static struct smp_hotplug_thread softirq |
| |
| static __init int spawn_ksoftirqd(void) |
| { |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + int cpu; |
| + |
| + for_each_possible_cpu(cpu) |
| + lockdep_set_novalidate_class(per_cpu_ptr(&bh_lock.lock, cpu)); |
| +#endif |
| + |
| cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, |
| takeover_tasklets); |
| BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); |
| @@ -738,6 +889,75 @@ static __init int spawn_ksoftirqd(void) |
| } |
| early_initcall(spawn_ksoftirqd); |
| |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + |
| +/* |
| + * On preempt-rt a softirq running context might be blocked on a |
| + * lock. There might be no other runnable task on this CPU because the |
| + * lock owner runs on some other CPU. So we have to go into idle with |
| + * the pending bit set. Therefor we need to check this otherwise we |
| + * warn about false positives which confuses users and defeats the |
| + * whole purpose of this test. |
| + * |
| + * This code is called with interrupts disabled. |
| + */ |
| +void softirq_check_pending_idle(void) |
| +{ |
| + struct task_struct *tsk = __this_cpu_read(ksoftirqd); |
| + static int rate_limit; |
| + bool okay = false; |
| + u32 warnpending; |
| + |
| + if (rate_limit >= 10) |
| + return; |
| + |
| + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK; |
| + if (!warnpending) |
| + return; |
| + |
| + if (!tsk) |
| + return; |
| + /* |
| + * If ksoftirqd is blocked on a lock then we may go idle with pending |
| + * softirq. |
| + */ |
| + raw_spin_lock(&tsk->pi_lock); |
| + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING || |
| + (tsk->state == TASK_UNINTERRUPTIBLE && tsk->sleeping_lock)) { |
| + okay = true; |
| + } |
| + raw_spin_unlock(&tsk->pi_lock); |
| + if (okay) |
| + return; |
| + /* |
| + * The softirq lock is held in non-atomic context and the owner is |
| + * blocking on a lock. It will schedule softirqs once the counter goes |
| + * back to zero. |
| + */ |
| + if (this_cpu_read(softirq_counter) > 0) |
| + return; |
| + |
| + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", |
| + warnpending); |
| + rate_limit++; |
| +} |
| + |
| +#else |
| + |
| +void softirq_check_pending_idle(void) |
| +{ |
| + static int ratelimit; |
| + |
| + if (ratelimit < 10 && |
| + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { |
| + pr_warn("NOHZ: local_softirq_pending %02x\n", |
| + (unsigned int) local_softirq_pending()); |
| + ratelimit++; |
| + } |
| +} |
| + |
| +#endif |
| + |
| /* |
| * [ These __weak aliases are kept in a separate compilation unit, so that |
| * GCC does not inline them incorrectly. ] |
| --- a/kernel/time/tick-sched.c |
| +++ b/kernel/time/tick-sched.c |
| @@ -883,14 +883,7 @@ static bool can_stop_idle_tick(int cpu, |
| return false; |
| |
| if (unlikely(local_softirq_pending())) { |
| - static int ratelimit; |
| - |
| - if (ratelimit < 10 && |
| - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { |
| - pr_warn("NOHZ: local_softirq_pending %02x\n", |
| - (unsigned int) local_softirq_pending()); |
| - ratelimit++; |
| - } |
| + softirq_check_pending_idle(); |
| return false; |
| } |
| |