| From 8acdcc8d9d0a296b89756eb9b76289ff311f6962 Mon Sep 17 00:00:00 2001 |
| From: Ingo Molnar <mingo@elte.hu> |
| Date: Fri, 3 Jul 2009 08:29:31 -0500 |
| Subject: [PATCH] softirqs: forced threading of softirqs |
| |
| commit cecf393e7eca1950e4299f21c072617b8f7b568c in tip. |
| |
| Split them into separate threads. One for each softirq |
| |
| [PG: original didn't need extra_flags back in 2.6.31, also |
| add things like BLOCK_IOPOLL_SOFTIRQ which appeared in 33rt] |
| |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h |
| index 54c9394..f89e357 100644 |
| --- a/include/linux/interrupt.h |
| +++ b/include/linux/interrupt.h |
| @@ -354,7 +354,6 @@ enum |
| SCHED_SOFTIRQ, |
| HRTIMER_SOFTIRQ, |
| RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ |
| - |
| NR_SOFTIRQS |
| }; |
| |
| @@ -372,11 +371,14 @@ struct softirq_action |
| void (*action)(struct softirq_action *); |
| }; |
| |
| +#define __raise_softirq_irqoff(nr) \ |
| + do { or_softirq_pending(1UL << (nr)); } while (0) |
| +#define __do_raise_softirq_irqoff(nr) __raise_softirq_irqoff(nr) |
| + |
| asmlinkage void do_softirq(void); |
| asmlinkage void __do_softirq(void); |
| extern void open_softirq(int nr, void (*action)(struct softirq_action *)); |
| extern void softirq_init(void); |
| -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) |
| extern void raise_softirq_irqoff(unsigned int nr); |
| extern void raise_softirq(unsigned int nr); |
| |
| @@ -524,6 +526,7 @@ extern void tasklet_kill(struct tasklet_struct *t); |
| extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); |
| extern void tasklet_init(struct tasklet_struct *t, |
| void (*func)(unsigned long), unsigned long data); |
| +extern void takeover_tasklets(unsigned int cpu); |
| |
| struct tasklet_hrtimer { |
| struct hrtimer timer; |
| diff --git a/include/linux/sched.h b/include/linux/sched.h |
| index c916409..4807851 100644 |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -102,6 +102,12 @@ struct fs_struct; |
| struct bts_context; |
| struct perf_event_context; |
| |
| +#ifdef CONFIG_PREEMPT_SOFTIRQS |
| +extern int softirq_preemption; |
| +#else |
| +# define softirq_preemption 0 |
| +#endif |
| + |
| /* |
| * List of flags we want to share for kernel threads, |
| * if only because they are not used by them anyway. |
| @@ -1234,6 +1240,7 @@ struct task_struct { |
| void *stack; |
| atomic_t usage; |
| unsigned int flags; /* per process flags, defined below */ |
| + unsigned int extra_flags; |
| unsigned int ptrace; |
| |
| int lock_depth; /* BKL lock depth */ |
| @@ -1788,6 +1795,9 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * |
| #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ |
| #define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ |
| |
| +/* Flags in the extra_flags field */ |
| +#define PFE_SOFTIRQ 0x00000001 /* softirq context */ |
| + |
| /* |
| * Only the _current_ task can read/write to tsk->flags, but other |
| * tasks can access tsk->flags in readonly mode for example |
| @@ -2440,6 +2450,8 @@ extern int __cond_resched_softirq(void); |
| __cond_resched_softirq(); \ |
| }) |
| |
| +extern int cond_resched_softirq_context(void); |
| + |
| /* |
| * Does a critical section need to be broken due to another |
| * task waiting?: (technically does not depend on CONFIG_PREEMPT, |
| @@ -2471,6 +2483,13 @@ static inline void thread_group_cputime_free(struct signal_struct *sig) |
| { |
| } |
| |
| +static inline int softirq_need_resched(void) |
| +{ |
| + if (softirq_preemption && (current->extra_flags & PFE_SOFTIRQ)) |
| + return need_resched(); |
| + return 0; |
| +} |
| + |
| /* |
| * Reevaluate whether the task has signals pending delivery. |
| * Wake the task if so. |
| diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt |
| index bf987b9..2ff1834 100644 |
| --- a/kernel/Kconfig.preempt |
| +++ b/kernel/Kconfig.preempt |
| @@ -52,3 +52,18 @@ config PREEMPT |
| |
| endchoice |
| |
| +config PREEMPT_SOFTIRQS |
| + bool "Thread Softirqs" |
| + default n |
| +# depends on PREEMPT |
| + help |
| + This option reduces the latency of the kernel by 'threading' |
| + soft interrupts. This means that all softirqs will execute |
| + in softirqd's context. While this helps latency, it can also |
| + reduce performance. |
| + |
| + The threading of softirqs can also be controlled via |
| + /proc/sys/kernel/softirq_preemption runtime flag and the |
| + sofirq-preempt=0/1 boot-time option. |
| + |
| + Say N if you are unsure. |
| diff --git a/kernel/sched.c b/kernel/sched.c |
| index 1c5f49d..b059d2f 100644 |
| --- a/kernel/sched.c |
| +++ b/kernel/sched.c |
| @@ -5125,7 +5125,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, |
| tmp = cputime_to_cputime64(cputime); |
| if (hardirq_count() - hardirq_offset) |
| cpustat->irq = cputime64_add(cpustat->irq, tmp); |
| - else if (softirq_count()) |
| + else if (softirq_count() || (p->extra_flags & PFE_SOFTIRQ)) |
| cpustat->softirq = cputime64_add(cpustat->softirq, tmp); |
| else |
| cpustat->system = cputime64_add(cpustat->system, tmp); |
| @@ -5689,7 +5689,7 @@ asmlinkage void __sched preempt_schedule_irq(void) |
| int saved_lock_depth; |
| |
| /* Catch callers which need to be fixed */ |
| - BUG_ON(ti->preempt_count || !irqs_disabled()); |
| + WARN_ON_ONCE(ti->preempt_count || !irqs_disabled()); |
| |
| do { |
| add_preempt_count(PREEMPT_ACTIVE); |
| @@ -6830,9 +6830,12 @@ int __cond_resched_lock(spinlock_t *lock) |
| } |
| EXPORT_SYMBOL(__cond_resched_lock); |
| |
| +/* |
| + * Voluntarily preempt a process context that has softirqs disabled: |
| + */ |
| int __sched __cond_resched_softirq(void) |
| { |
| - BUG_ON(!in_softirq()); |
| + WARN_ON_ONCE(!in_softirq()); |
| |
| if (should_resched()) { |
| local_bh_enable(); |
| @@ -6844,6 +6847,25 @@ int __sched __cond_resched_softirq(void) |
| } |
| EXPORT_SYMBOL(__cond_resched_softirq); |
| |
| +/* |
| + * Voluntarily preempt a softirq context (possible with softirq threading): |
| + */ |
| +int __sched cond_resched_softirq_context(void) |
| +{ |
| + WARN_ON_ONCE(!in_softirq()); |
| + |
| + if (softirq_need_resched() && system_state == SYSTEM_RUNNING) { |
| + raw_local_irq_disable(); |
| + _local_bh_enable(); |
| + raw_local_irq_enable(); |
| + __cond_resched(); |
| + local_bh_disable(); |
| + return 1; |
| + } |
| + return 0; |
| +} |
| +EXPORT_SYMBOL(cond_resched_softirq_context); |
| + |
| /** |
| * yield - yield the current processor to other threads. |
| * |
| diff --git a/kernel/softirq.c b/kernel/softirq.c |
| index 19ef218..eda9c66 100644 |
| --- a/kernel/softirq.c |
| +++ b/kernel/softirq.c |
| @@ -8,9 +8,15 @@ |
| * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) |
| * |
| * Remote softirq infrastructure is by Jens Axboe. |
| + * |
| + * Softirq-split implemetation by |
| + * Copyright (C) 2005 Thomas Gleixner, Ingo Molnar |
| */ |
| |
| #include <linux/module.h> |
| +#include <linux/kallsyms.h> |
| +#include <linux/syscalls.h> |
| +#include <linux/wait.h> |
| #include <linux/kernel_stat.h> |
| #include <linux/interrupt.h> |
| #include <linux/init.h> |
| @@ -54,7 +60,14 @@ EXPORT_SYMBOL(irq_stat); |
| |
| static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; |
| |
| -static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); |
| +struct softirqdata { |
| + int nr; |
| + unsigned long cpu; |
| + struct task_struct *tsk; |
| + int running; |
| +}; |
| + |
| +static DEFINE_PER_CPU(struct softirqdata [NR_SOFTIRQS], ksoftirqd); |
| |
| char *softirq_to_name[NR_SOFTIRQS] = { |
| "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", |
| @@ -67,16 +80,32 @@ char *softirq_to_name[NR_SOFTIRQS] = { |
| * to the pending events, so lets the scheduler to balance |
| * the softirq load for us. |
| */ |
| -static void wakeup_softirqd(void) |
| +static void wakeup_softirqd(int softirq) |
| { |
| /* Interrupts are disabled: no need to stop preemption */ |
| - struct task_struct *tsk = __get_cpu_var(ksoftirqd); |
| + struct task_struct *tsk = __get_cpu_var(ksoftirqd)[softirq].tsk; |
| |
| if (tsk && tsk->state != TASK_RUNNING) |
| wake_up_process(tsk); |
| } |
| |
| /* |
| + * Wake up the softirq threads which have work |
| + */ |
| +static void trigger_softirqs(void) |
| +{ |
| + u32 pending = local_softirq_pending(); |
| + int curr = 0; |
| + |
| + while (pending) { |
| + if (pending & 1) |
| + wakeup_softirqd(curr); |
| + pending >>= 1; |
| + curr++; |
| + } |
| +} |
| + |
| +/* |
| * This one is for softirq.c-internal use, |
| * where hardirqs are disabled legitimately: |
| */ |
| @@ -188,7 +217,7 @@ EXPORT_SYMBOL(local_bh_enable_ip); |
| */ |
| #define MAX_SOFTIRQ_RESTART 10 |
| |
| -asmlinkage void __do_softirq(void) |
| +static void ___do_softirq(void) |
| { |
| struct softirq_action *h; |
| __u32 pending; |
| @@ -198,9 +227,6 @@ asmlinkage void __do_softirq(void) |
| pending = local_softirq_pending(); |
| account_system_vtime(current); |
| |
| - __local_bh_disable((unsigned long)__builtin_return_address(0)); |
| - lockdep_softirq_enter(); |
| - |
| cpu = smp_processor_id(); |
| restart: |
| /* Reset the pending bitmask before enabling irqs */ |
| @@ -228,6 +254,7 @@ restart: |
| } |
| |
| rcu_bh_qs(cpu); |
| + cond_resched_softirq_context(); |
| } |
| h++; |
| pending >>= 1; |
| @@ -240,12 +267,34 @@ restart: |
| goto restart; |
| |
| if (pending) |
| - wakeup_softirqd(); |
| + trigger_softirqs(); |
| +} |
| + |
| +asmlinkage void __do_softirq(void) |
| +{ |
| +#ifdef CONFIG_PREEMPT_SOFTIRQS |
| + /* |
| + * 'preempt harder'. Push all softirq processing off to ksoftirqd. |
| + */ |
| + if (softirq_preemption) { |
| + if (local_softirq_pending()) |
| + trigger_softirqs(); |
| + return; |
| + } |
| +#endif |
| + /* |
| + * 'immediate' softirq execution: |
| + */ |
| + __local_bh_disable((unsigned long)__builtin_return_address(0)); |
| + lockdep_softirq_enter(); |
| + |
| + ___do_softirq(); |
| |
| lockdep_softirq_exit(); |
| |
| account_system_vtime(current); |
| _local_bh_enable(); |
| + |
| } |
| |
| #ifndef __ARCH_HAS_DO_SOFTIRQ |
| @@ -316,19 +365,11 @@ void irq_exit(void) |
| */ |
| inline void raise_softirq_irqoff(unsigned int nr) |
| { |
| - __raise_softirq_irqoff(nr); |
| + __do_raise_softirq_irqoff(nr); |
| |
| - /* |
| - * If we're in an interrupt or softirq, we're done |
| - * (this also catches softirq-disabled code). We will |
| - * actually run the softirq once we return from |
| - * the irq or softirq. |
| - * |
| - * Otherwise we wake up ksoftirqd to make sure we |
| - * schedule the softirq soon. |
| - */ |
| - if (!in_interrupt()) |
| - wakeup_softirqd(); |
| +#ifdef CONFIG_PREEMPT_SOFTIRQS |
| + wakeup_softirqd(nr); |
| +#endif |
| } |
| |
| void raise_softirq(unsigned int nr) |
| @@ -426,7 +467,7 @@ static void tasklet_action(struct softirq_action *a) |
| t->next = NULL; |
| *__get_cpu_var(tasklet_vec).tail = t; |
| __get_cpu_var(tasklet_vec).tail = &(t->next); |
| - __raise_softirq_irqoff(TASKLET_SOFTIRQ); |
| + __do_raise_softirq_irqoff(TASKLET_SOFTIRQ); |
| local_irq_enable(); |
| } |
| } |
| @@ -461,7 +502,7 @@ static void tasklet_hi_action(struct softirq_action *a) |
| t->next = NULL; |
| *__get_cpu_var(tasklet_hi_vec).tail = t; |
| __get_cpu_var(tasklet_hi_vec).tail = &(t->next); |
| - __raise_softirq_irqoff(HI_SOFTIRQ); |
| + __do_raise_softirq_irqoff(HI_SOFTIRQ); |
| local_irq_enable(); |
| } |
| } |
| @@ -692,33 +733,56 @@ void __init softirq_init(void) |
| open_softirq(HI_SOFTIRQ, tasklet_hi_action); |
| } |
| |
| -static int run_ksoftirqd(void * __bind_cpu) |
| +static int run_ksoftirqd(void * __data) |
| { |
| + /* Priority needs to be below hardirqs */ |
| + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 - 1}; |
| + struct softirqdata *data = __data; |
| + u32 mask = (1 << data->nr); |
| + struct softirq_action *h; |
| + |
| + sys_sched_setscheduler(current->pid, SCHED_FIFO, ¶m); |
| + current->extra_flags |= PFE_SOFTIRQ; |
| set_current_state(TASK_INTERRUPTIBLE); |
| |
| while (!kthread_should_stop()) { |
| preempt_disable(); |
| - if (!local_softirq_pending()) { |
| + if (!(local_softirq_pending() & mask)) { |
| preempt_enable_and_schedule(); |
| preempt_disable(); |
| } |
| |
| __set_current_state(TASK_RUNNING); |
| + data->running = 1; |
| |
| - while (local_softirq_pending()) { |
| + while (local_softirq_pending() & mask) { |
| /* Preempt disable stops cpu going offline. |
| If already offline, we'll be on wrong CPU: |
| don't process */ |
| - if (cpu_is_offline((long)__bind_cpu)) |
| + if (cpu_is_offline(data->cpu)) |
| goto wait_to_die; |
| - do_softirq(); |
| + |
| + local_irq_disable(); |
| __preempt_enable_no_resched(); |
| + set_softirq_pending(local_softirq_pending() & ~mask); |
| + local_bh_disable(); |
| + local_irq_enable(); |
| + |
| + h = &softirq_vec[data->nr]; |
| + if (h) |
| + h->action(h); |
| + rcu_bh_qs(data->cpu); |
| + |
| + local_irq_disable(); |
| + _local_bh_enable(); |
| + local_irq_enable(); |
| + |
| cond_resched(); |
| preempt_disable(); |
| - rcu_sched_qs((long)__bind_cpu); |
| } |
| preempt_enable(); |
| set_current_state(TASK_INTERRUPTIBLE); |
| + data->running = 0; |
| } |
| __set_current_state(TASK_RUNNING); |
| return 0; |
| @@ -768,7 +832,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) |
| BUG(); |
| } |
| |
| -static void takeover_tasklets(unsigned int cpu) |
| +void takeover_tasklets(unsigned int cpu) |
| { |
| /* CPU is dead, so no lock needed. */ |
| local_irq_disable(); |
| @@ -794,49 +858,83 @@ static void takeover_tasklets(unsigned int cpu) |
| } |
| #endif /* CONFIG_HOTPLUG_CPU */ |
| |
| +static const char *softirq_names [] = |
| +{ |
| + [HI_SOFTIRQ] = "high", |
| + [SCHED_SOFTIRQ] = "sched", |
| + [TIMER_SOFTIRQ] = "timer", |
| + [NET_TX_SOFTIRQ] = "net-tx", |
| + [NET_RX_SOFTIRQ] = "net-rx", |
| + [BLOCK_SOFTIRQ] = "block", |
| + [BLOCK_IOPOLL_SOFTIRQ]= "block-iopoll", |
| + [TASKLET_SOFTIRQ] = "tasklet", |
| +#ifdef CONFIG_HIGH_RES_TIMERS |
| + [HRTIMER_SOFTIRQ] = "hrtimer", |
| +#endif |
| + [RCU_SOFTIRQ] = "rcu", |
| +}; |
| + |
| static int __cpuinit cpu_callback(struct notifier_block *nfb, |
| unsigned long action, |
| void *hcpu) |
| { |
| - int hotcpu = (unsigned long)hcpu; |
| + int hotcpu = (unsigned long)hcpu, i; |
| struct task_struct *p; |
| |
| switch (action) { |
| case CPU_UP_PREPARE: |
| case CPU_UP_PREPARE_FROZEN: |
| - p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); |
| - if (IS_ERR(p)) { |
| - printk("ksoftirqd for %i failed\n", hotcpu); |
| - return NOTIFY_BAD; |
| + for (i = 0; i < NR_SOFTIRQS; i++) { |
| + per_cpu(ksoftirqd, hotcpu)[i].nr = i; |
| + per_cpu(ksoftirqd, hotcpu)[i].cpu = hotcpu; |
| + per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL; |
| } |
| - kthread_bind(p, hotcpu); |
| - per_cpu(ksoftirqd, hotcpu) = p; |
| - break; |
| + for (i = 0; i < NR_SOFTIRQS; i++) { |
| + p = kthread_create(run_ksoftirqd, |
| + &per_cpu(ksoftirqd, hotcpu)[i], |
| + "softirq-%s/%d", softirq_names[i], |
| + hotcpu); |
| + if (IS_ERR(p)) { |
| + printk("ksoftirqd %d for %i failed\n", i, |
| + hotcpu); |
| + return NOTIFY_BAD; |
| + } |
| + kthread_bind(p, hotcpu); |
| + per_cpu(ksoftirqd, hotcpu)[i].tsk = p; |
| + } |
| + break; |
| + break; |
| case CPU_ONLINE: |
| case CPU_ONLINE_FROZEN: |
| - wake_up_process(per_cpu(ksoftirqd, hotcpu)); |
| + for (i = 0; i < NR_SOFTIRQS; i++) |
| + wake_up_process(per_cpu(ksoftirqd, hotcpu)[i].tsk); |
| break; |
| #ifdef CONFIG_HOTPLUG_CPU |
| case CPU_UP_CANCELED: |
| case CPU_UP_CANCELED_FROZEN: |
| - if (!per_cpu(ksoftirqd, hotcpu)) |
| - break; |
| - /* Unbind so it can run. Fall thru. */ |
| - kthread_bind(per_cpu(ksoftirqd, hotcpu), |
| - cpumask_any(cpu_online_mask)); |
| +#if 0 |
| + for (i = 0; i < NR_SOFTIRQS; i++) { |
| + if (!per_cpu(ksoftirqd, hotcpu)[i].tsk) |
| + continue; |
| + kthread_bind(per_cpu(ksoftirqd, hotcpu)[i].tsk, |
| + cpumask_any(cpu_online_mask)); |
| + } |
| +#endif |
| case CPU_DEAD: |
| case CPU_DEAD_FROZEN: { |
| struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
| |
| - p = per_cpu(ksoftirqd, hotcpu); |
| - per_cpu(ksoftirqd, hotcpu) = NULL; |
| - sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); |
| - kthread_stop(p); |
| + for (i = 0; i < NR_SOFTIRQS; i++) { |
| + p = per_cpu(ksoftirqd, hotcpu)[i].tsk; |
| + per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL; |
| + sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); |
| + kthread_stop(p); |
| + } |
| takeover_tasklets(hotcpu); |
| break; |
| } |
| #endif /* CONFIG_HOTPLUG_CPU */ |
| - } |
| + } |
| return NOTIFY_OK; |
| } |
| |
| @@ -856,6 +954,29 @@ static __init int spawn_ksoftirqd(void) |
| } |
| early_initcall(spawn_ksoftirqd); |
| |
| + |
| +#ifdef CONFIG_PREEMPT_SOFTIRQS |
| + |
| +int softirq_preemption = 1; |
| + |
| +EXPORT_SYMBOL(softirq_preemption); |
| + |
| +static int __init softirq_preempt_setup (char *str) |
| +{ |
| + if (!strncmp(str, "off", 3)) |
| + softirq_preemption = 0; |
| + else |
| + get_option(&str, &softirq_preemption); |
| + if (!softirq_preemption) |
| + printk("turning off softirq preemption!\n"); |
| + |
| + return 1; |
| +} |
| + |
| +__setup("softirq-preempt=", softirq_preempt_setup); |
| + |
| +#endif |
| + |
| #ifdef CONFIG_SMP |
| /* |
| * Call a function on all processors |
| -- |
| 1.7.1.1 |
| |