blob: 825c287b8e54ca069ee4e127b4773518d8a693ec [file] [log] [blame]
From 8acdcc8d9d0a296b89756eb9b76289ff311f6962 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:31 -0500
Subject: [PATCH] softirqs: forced threading of softirqs
commit cecf393e7eca1950e4299f21c072617b8f7b568c in tip.
Split them into separate threads. One for each softirq
[PG: original didn't need extra_flags back in 2.6.31, also
add things like BLOCK_IOPOLL_SOFTIRQ which appeared in 33rt]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 54c9394..f89e357 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -354,7 +354,6 @@ enum
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ,
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
-
NR_SOFTIRQS
};
@@ -372,11 +371,14 @@ struct softirq_action
void (*action)(struct softirq_action *);
};
+#define __raise_softirq_irqoff(nr) \
+ do { or_softirq_pending(1UL << (nr)); } while (0)
+#define __do_raise_softirq_irqoff(nr) __raise_softirq_irqoff(nr)
+
asmlinkage void do_softirq(void);
asmlinkage void __do_softirq(void);
extern void open_softirq(int nr, void (*action)(struct softirq_action *));
extern void softirq_init(void);
-#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
@@ -524,6 +526,7 @@ extern void tasklet_kill(struct tasklet_struct *t);
extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
extern void tasklet_init(struct tasklet_struct *t,
void (*func)(unsigned long), unsigned long data);
+extern void takeover_tasklets(unsigned int cpu);
struct tasklet_hrtimer {
struct hrtimer timer;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c916409..4807851 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -102,6 +102,12 @@ struct fs_struct;
struct bts_context;
struct perf_event_context;
+#ifdef CONFIG_PREEMPT_SOFTIRQS
+extern int softirq_preemption;
+#else
+# define softirq_preemption 0
+#endif
+
/*
* List of flags we want to share for kernel threads,
* if only because they are not used by them anyway.
@@ -1234,6 +1240,7 @@ struct task_struct {
void *stack;
atomic_t usage;
unsigned int flags; /* per process flags, defined below */
+ unsigned int extra_flags;
unsigned int ptrace;
int lock_depth; /* BKL lock depth */
@@ -1788,6 +1795,9 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */
+/* Flags in the extra_flags field */
+#define PFE_SOFTIRQ 0x00000001 /* softirq context */
+
/*
* Only the _current_ task can read/write to tsk->flags, but other
* tasks can access tsk->flags in readonly mode for example
@@ -2440,6 +2450,8 @@ extern int __cond_resched_softirq(void);
__cond_resched_softirq(); \
})
+extern int cond_resched_softirq_context(void);
+
/*
* Does a critical section need to be broken due to another
* task waiting?: (technically does not depend on CONFIG_PREEMPT,
@@ -2471,6 +2483,13 @@ static inline void thread_group_cputime_free(struct signal_struct *sig)
{
}
+static inline int softirq_need_resched(void)
+{
+ if (softirq_preemption && (current->extra_flags & PFE_SOFTIRQ))
+ return need_resched();
+ return 0;
+}
+
/*
* Reevaluate whether the task has signals pending delivery.
* Wake the task if so.
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index bf987b9..2ff1834 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -52,3 +52,18 @@ config PREEMPT
endchoice
+config PREEMPT_SOFTIRQS
+ bool "Thread Softirqs"
+ default n
+# depends on PREEMPT
+ help
+ This option reduces the latency of the kernel by 'threading'
+ soft interrupts. This means that all softirqs will execute
+ in softirqd's context. While this helps latency, it can also
+ reduce performance.
+
+ The threading of softirqs can also be controlled via
+ /proc/sys/kernel/softirq_preemption runtime flag and the
+ sofirq-preempt=0/1 boot-time option.
+
+ Say N if you are unsure.
diff --git a/kernel/sched.c b/kernel/sched.c
index 1c5f49d..b059d2f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5125,7 +5125,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
tmp = cputime_to_cputime64(cputime);
if (hardirq_count() - hardirq_offset)
cpustat->irq = cputime64_add(cpustat->irq, tmp);
- else if (softirq_count())
+ else if (softirq_count() || (p->extra_flags & PFE_SOFTIRQ))
cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
else
cpustat->system = cputime64_add(cpustat->system, tmp);
@@ -5689,7 +5689,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
int saved_lock_depth;
/* Catch callers which need to be fixed */
- BUG_ON(ti->preempt_count || !irqs_disabled());
+ WARN_ON_ONCE(ti->preempt_count || !irqs_disabled());
do {
add_preempt_count(PREEMPT_ACTIVE);
@@ -6830,9 +6830,12 @@ int __cond_resched_lock(spinlock_t *lock)
}
EXPORT_SYMBOL(__cond_resched_lock);
+/*
+ * Voluntarily preempt a process context that has softirqs disabled:
+ */
int __sched __cond_resched_softirq(void)
{
- BUG_ON(!in_softirq());
+ WARN_ON_ONCE(!in_softirq());
if (should_resched()) {
local_bh_enable();
@@ -6844,6 +6847,25 @@ int __sched __cond_resched_softirq(void)
}
EXPORT_SYMBOL(__cond_resched_softirq);
+/*
+ * Voluntarily preempt a softirq context (possible with softirq threading):
+ */
+int __sched cond_resched_softirq_context(void)
+{
+ WARN_ON_ONCE(!in_softirq());
+
+ if (softirq_need_resched() && system_state == SYSTEM_RUNNING) {
+ raw_local_irq_disable();
+ _local_bh_enable();
+ raw_local_irq_enable();
+ __cond_resched();
+ local_bh_disable();
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(cond_resched_softirq_context);
+
/**
* yield - yield the current processor to other threads.
*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 19ef218..eda9c66 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -8,9 +8,15 @@
* Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
*
* Remote softirq infrastructure is by Jens Axboe.
+ *
+ * Softirq-split implemetation by
+ * Copyright (C) 2005 Thomas Gleixner, Ingo Molnar
*/
#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/syscalls.h>
+#include <linux/wait.h>
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
#include <linux/init.h>
@@ -54,7 +60,14 @@ EXPORT_SYMBOL(irq_stat);
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
-static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+struct softirqdata {
+ int nr;
+ unsigned long cpu;
+ struct task_struct *tsk;
+ int running;
+};
+
+static DEFINE_PER_CPU(struct softirqdata [NR_SOFTIRQS], ksoftirqd);
char *softirq_to_name[NR_SOFTIRQS] = {
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
@@ -67,16 +80,32 @@ char *softirq_to_name[NR_SOFTIRQS] = {
* to the pending events, so lets the scheduler to balance
* the softirq load for us.
*/
-static void wakeup_softirqd(void)
+static void wakeup_softirqd(int softirq)
{
/* Interrupts are disabled: no need to stop preemption */
- struct task_struct *tsk = __get_cpu_var(ksoftirqd);
+ struct task_struct *tsk = __get_cpu_var(ksoftirqd)[softirq].tsk;
if (tsk && tsk->state != TASK_RUNNING)
wake_up_process(tsk);
}
/*
+ * Wake up the softirq threads which have work
+ */
+static void trigger_softirqs(void)
+{
+ u32 pending = local_softirq_pending();
+ int curr = 0;
+
+ while (pending) {
+ if (pending & 1)
+ wakeup_softirqd(curr);
+ pending >>= 1;
+ curr++;
+ }
+}
+
+/*
* This one is for softirq.c-internal use,
* where hardirqs are disabled legitimately:
*/
@@ -188,7 +217,7 @@ EXPORT_SYMBOL(local_bh_enable_ip);
*/
#define MAX_SOFTIRQ_RESTART 10
-asmlinkage void __do_softirq(void)
+static void ___do_softirq(void)
{
struct softirq_action *h;
__u32 pending;
@@ -198,9 +227,6 @@ asmlinkage void __do_softirq(void)
pending = local_softirq_pending();
account_system_vtime(current);
- __local_bh_disable((unsigned long)__builtin_return_address(0));
- lockdep_softirq_enter();
-
cpu = smp_processor_id();
restart:
/* Reset the pending bitmask before enabling irqs */
@@ -228,6 +254,7 @@ restart:
}
rcu_bh_qs(cpu);
+ cond_resched_softirq_context();
}
h++;
pending >>= 1;
@@ -240,12 +267,34 @@ restart:
goto restart;
if (pending)
- wakeup_softirqd();
+ trigger_softirqs();
+}
+
+asmlinkage void __do_softirq(void)
+{
+#ifdef CONFIG_PREEMPT_SOFTIRQS
+ /*
+ * 'preempt harder'. Push all softirq processing off to ksoftirqd.
+ */
+ if (softirq_preemption) {
+ if (local_softirq_pending())
+ trigger_softirqs();
+ return;
+ }
+#endif
+ /*
+ * 'immediate' softirq execution:
+ */
+ __local_bh_disable((unsigned long)__builtin_return_address(0));
+ lockdep_softirq_enter();
+
+ ___do_softirq();
lockdep_softirq_exit();
account_system_vtime(current);
_local_bh_enable();
+
}
#ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -316,19 +365,11 @@ void irq_exit(void)
*/
inline void raise_softirq_irqoff(unsigned int nr)
{
- __raise_softirq_irqoff(nr);
+ __do_raise_softirq_irqoff(nr);
- /*
- * If we're in an interrupt or softirq, we're done
- * (this also catches softirq-disabled code). We will
- * actually run the softirq once we return from
- * the irq or softirq.
- *
- * Otherwise we wake up ksoftirqd to make sure we
- * schedule the softirq soon.
- */
- if (!in_interrupt())
- wakeup_softirqd();
+#ifdef CONFIG_PREEMPT_SOFTIRQS
+ wakeup_softirqd(nr);
+#endif
}
void raise_softirq(unsigned int nr)
@@ -426,7 +467,7 @@ static void tasklet_action(struct softirq_action *a)
t->next = NULL;
*__get_cpu_var(tasklet_vec).tail = t;
__get_cpu_var(tasklet_vec).tail = &(t->next);
- __raise_softirq_irqoff(TASKLET_SOFTIRQ);
+ __do_raise_softirq_irqoff(TASKLET_SOFTIRQ);
local_irq_enable();
}
}
@@ -461,7 +502,7 @@ static void tasklet_hi_action(struct softirq_action *a)
t->next = NULL;
*__get_cpu_var(tasklet_hi_vec).tail = t;
__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
- __raise_softirq_irqoff(HI_SOFTIRQ);
+ __do_raise_softirq_irqoff(HI_SOFTIRQ);
local_irq_enable();
}
}
@@ -692,33 +733,56 @@ void __init softirq_init(void)
open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
-static int run_ksoftirqd(void * __bind_cpu)
+static int run_ksoftirqd(void * __data)
{
+ /* Priority needs to be below hardirqs */
+ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 - 1};
+ struct softirqdata *data = __data;
+ u32 mask = (1 << data->nr);
+ struct softirq_action *h;
+
+ sys_sched_setscheduler(current->pid, SCHED_FIFO, &param);
+ current->extra_flags |= PFE_SOFTIRQ;
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
preempt_disable();
- if (!local_softirq_pending()) {
+ if (!(local_softirq_pending() & mask)) {
preempt_enable_and_schedule();
preempt_disable();
}
__set_current_state(TASK_RUNNING);
+ data->running = 1;
- while (local_softirq_pending()) {
+ while (local_softirq_pending() & mask) {
/* Preempt disable stops cpu going offline.
If already offline, we'll be on wrong CPU:
don't process */
- if (cpu_is_offline((long)__bind_cpu))
+ if (cpu_is_offline(data->cpu))
goto wait_to_die;
- do_softirq();
+
+ local_irq_disable();
__preempt_enable_no_resched();
+ set_softirq_pending(local_softirq_pending() & ~mask);
+ local_bh_disable();
+ local_irq_enable();
+
+ h = &softirq_vec[data->nr];
+ if (h)
+ h->action(h);
+ rcu_bh_qs(data->cpu);
+
+ local_irq_disable();
+ _local_bh_enable();
+ local_irq_enable();
+
cond_resched();
preempt_disable();
- rcu_sched_qs((long)__bind_cpu);
}
preempt_enable();
set_current_state(TASK_INTERRUPTIBLE);
+ data->running = 0;
}
__set_current_state(TASK_RUNNING);
return 0;
@@ -768,7 +832,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
BUG();
}
-static void takeover_tasklets(unsigned int cpu)
+void takeover_tasklets(unsigned int cpu)
{
/* CPU is dead, so no lock needed. */
local_irq_disable();
@@ -794,49 +858,83 @@ static void takeover_tasklets(unsigned int cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
+static const char *softirq_names [] =
+{
+ [HI_SOFTIRQ] = "high",
+ [SCHED_SOFTIRQ] = "sched",
+ [TIMER_SOFTIRQ] = "timer",
+ [NET_TX_SOFTIRQ] = "net-tx",
+ [NET_RX_SOFTIRQ] = "net-rx",
+ [BLOCK_SOFTIRQ] = "block",
+ [BLOCK_IOPOLL_SOFTIRQ]= "block-iopoll",
+ [TASKLET_SOFTIRQ] = "tasklet",
+#ifdef CONFIG_HIGH_RES_TIMERS
+ [HRTIMER_SOFTIRQ] = "hrtimer",
+#endif
+ [RCU_SOFTIRQ] = "rcu",
+};
+
static int __cpuinit cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
- int hotcpu = (unsigned long)hcpu;
+ int hotcpu = (unsigned long)hcpu, i;
struct task_struct *p;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
- if (IS_ERR(p)) {
- printk("ksoftirqd for %i failed\n", hotcpu);
- return NOTIFY_BAD;
+ for (i = 0; i < NR_SOFTIRQS; i++) {
+ per_cpu(ksoftirqd, hotcpu)[i].nr = i;
+ per_cpu(ksoftirqd, hotcpu)[i].cpu = hotcpu;
+ per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL;
}
- kthread_bind(p, hotcpu);
- per_cpu(ksoftirqd, hotcpu) = p;
- break;
+ for (i = 0; i < NR_SOFTIRQS; i++) {
+ p = kthread_create(run_ksoftirqd,
+ &per_cpu(ksoftirqd, hotcpu)[i],
+ "softirq-%s/%d", softirq_names[i],
+ hotcpu);
+ if (IS_ERR(p)) {
+ printk("ksoftirqd %d for %i failed\n", i,
+ hotcpu);
+ return NOTIFY_BAD;
+ }
+ kthread_bind(p, hotcpu);
+ per_cpu(ksoftirqd, hotcpu)[i].tsk = p;
+ }
+ break;
+ break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- wake_up_process(per_cpu(ksoftirqd, hotcpu));
+ for (i = 0; i < NR_SOFTIRQS; i++)
+ wake_up_process(per_cpu(ksoftirqd, hotcpu)[i].tsk);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
- if (!per_cpu(ksoftirqd, hotcpu))
- break;
- /* Unbind so it can run. Fall thru. */
- kthread_bind(per_cpu(ksoftirqd, hotcpu),
- cpumask_any(cpu_online_mask));
+#if 0
+ for (i = 0; i < NR_SOFTIRQS; i++) {
+ if (!per_cpu(ksoftirqd, hotcpu)[i].tsk)
+ continue;
+ kthread_bind(per_cpu(ksoftirqd, hotcpu)[i].tsk,
+ cpumask_any(cpu_online_mask));
+ }
+#endif
case CPU_DEAD:
case CPU_DEAD_FROZEN: {
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
- p = per_cpu(ksoftirqd, hotcpu);
- per_cpu(ksoftirqd, hotcpu) = NULL;
- sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
- kthread_stop(p);
+ for (i = 0; i < NR_SOFTIRQS; i++) {
+ p = per_cpu(ksoftirqd, hotcpu)[i].tsk;
+ per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL;
+ sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
+ kthread_stop(p);
+ }
takeover_tasklets(hotcpu);
break;
}
#endif /* CONFIG_HOTPLUG_CPU */
- }
+ }
return NOTIFY_OK;
}
@@ -856,6 +954,29 @@ static __init int spawn_ksoftirqd(void)
}
early_initcall(spawn_ksoftirqd);
+
+#ifdef CONFIG_PREEMPT_SOFTIRQS
+
+int softirq_preemption = 1;
+
+EXPORT_SYMBOL(softirq_preemption);
+
+static int __init softirq_preempt_setup (char *str)
+{
+ if (!strncmp(str, "off", 3))
+ softirq_preemption = 0;
+ else
+ get_option(&str, &softirq_preemption);
+ if (!softirq_preemption)
+ printk("turning off softirq preemption!\n");
+
+ return 1;
+}
+
+__setup("softirq-preempt=", softirq_preempt_setup);
+
+#endif
+
#ifdef CONFIG_SMP
/*
* Call a function on all processors
--
1.7.1.1