| From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> |
| Date: Mon, 4 Nov 2013 13:21:10 -0800 |
| Subject: rcu: Eliminate softirq processing from rcutree |
| |
| Running RCU out of softirq is a problem for some workloads that would |
| like to manage RCU core processing independently of other softirq work, |
| for example, setting kthread priority. This commit therefore moves the |
| RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER kthread |
| named rcuc. The SCHED_OTHER approach avoids the scalability problems |
| that appeared with the earlier attempt to move RCU core processing to |
| from softirq to kthreads. That said, kernels built with RCU_BOOST=y |
| will run the rcuc kthreads at the RCU-boosting priority. |
| |
| Reported-by: Thomas Gleixner <tglx@linutronix.de> |
| Tested-by: Mike Galbraith <bitbucket@online.de> |
| Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| --- |
| kernel/rcu/tree.c | 110 ++++++++++++++++++++++++++++++--- |
| kernel/rcu/tree.h | 5 - |
| kernel/rcu/tree_plugin.h | 153 ++++++----------------------------------------- |
| 3 files changed, 122 insertions(+), 146 deletions(-) |
| |
| --- a/kernel/rcu/tree.c |
| +++ b/kernel/rcu/tree.c |
| @@ -56,6 +56,11 @@ |
| #include <linux/random.h> |
| #include <linux/trace_events.h> |
| #include <linux/suspend.h> |
| +#include <linux/delay.h> |
| +#include <linux/gfp.h> |
| +#include <linux/oom.h> |
| +#include <linux/smpboot.h> |
| +#include "../time/tick-internal.h" |
| |
| #include "tree.h" |
| #include "rcu.h" |
| @@ -2946,18 +2951,17 @@ static void |
| /* |
| * Do RCU core processing for the current CPU. |
| */ |
| -static void rcu_process_callbacks(struct softirq_action *unused) |
| +static void rcu_process_callbacks(void) |
| { |
| struct rcu_state *rsp; |
| |
| if (cpu_is_offline(smp_processor_id())) |
| return; |
| - trace_rcu_utilization(TPS("Start RCU core")); |
| for_each_rcu_flavor(rsp) |
| __rcu_process_callbacks(rsp); |
| - trace_rcu_utilization(TPS("End RCU core")); |
| } |
| |
| +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); |
| /* |
| * Schedule RCU callback invocation. If the specified type of RCU |
| * does not support RCU priority boosting, just do a direct call, |
| @@ -2969,18 +2973,105 @@ static void invoke_rcu_callbacks(struct |
| { |
| if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) |
| return; |
| - if (likely(!rsp->boost)) { |
| - rcu_do_batch(rsp, rdp); |
| + rcu_do_batch(rsp, rdp); |
| +} |
| + |
| +static void rcu_wake_cond(struct task_struct *t, int status) |
| +{ |
| + /* |
| + * If the thread is yielding, only wake it when this |
| + * is invoked from idle |
| + */ |
| + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current))) |
| + wake_up_process(t); |
| +} |
| + |
| +/* |
| + * Wake up this CPU's rcuc kthread to do RCU core processing. |
| + */ |
| +static void invoke_rcu_core(void) |
| +{ |
| + unsigned long flags; |
| + struct task_struct *t; |
| + |
| + if (!cpu_online(smp_processor_id())) |
| return; |
| + local_irq_save(flags); |
| + __this_cpu_write(rcu_cpu_has_work, 1); |
| + t = __this_cpu_read(rcu_cpu_kthread_task); |
| + if (t != NULL && current != t) |
| + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status)); |
| + local_irq_restore(flags); |
| +} |
| + |
| +static void rcu_cpu_kthread_park(unsigned int cpu) |
| +{ |
| + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; |
| +} |
| + |
| +static int rcu_cpu_kthread_should_run(unsigned int cpu) |
| +{ |
| + return __this_cpu_read(rcu_cpu_has_work); |
| +} |
| + |
| +/* |
| + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the |
| + * RCU softirq used in flavors and configurations of RCU that do not |
| + * support RCU priority boosting. |
| + */ |
| +static void rcu_cpu_kthread(unsigned int cpu) |
| +{ |
| + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); |
| + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); |
| + int spincnt; |
| + |
| + for (spincnt = 0; spincnt < 10; spincnt++) { |
| + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); |
| + local_bh_disable(); |
| + *statusp = RCU_KTHREAD_RUNNING; |
| + this_cpu_inc(rcu_cpu_kthread_loops); |
| + local_irq_disable(); |
| + work = *workp; |
| + *workp = 0; |
| + local_irq_enable(); |
| + if (work) |
| + rcu_process_callbacks(); |
| + local_bh_enable(); |
| + if (*workp == 0) { |
| + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); |
| + *statusp = RCU_KTHREAD_WAITING; |
| + return; |
| + } |
| } |
| - invoke_rcu_callbacks_kthread(); |
| + *statusp = RCU_KTHREAD_YIELDING; |
| + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); |
| + schedule_timeout_interruptible(2); |
| + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); |
| + *statusp = RCU_KTHREAD_WAITING; |
| } |
| |
| -static void invoke_rcu_core(void) |
| +static struct smp_hotplug_thread rcu_cpu_thread_spec = { |
| + .store = &rcu_cpu_kthread_task, |
| + .thread_should_run = rcu_cpu_kthread_should_run, |
| + .thread_fn = rcu_cpu_kthread, |
| + .thread_comm = "rcuc/%u", |
| + .setup = rcu_cpu_kthread_setup, |
| + .park = rcu_cpu_kthread_park, |
| +}; |
| + |
| +/* |
| + * Spawn per-CPU RCU core processing kthreads. |
| + */ |
| +static int __init rcu_spawn_core_kthreads(void) |
| { |
| - if (cpu_online(smp_processor_id())) |
| - raise_softirq(RCU_SOFTIRQ); |
| + int cpu; |
| + |
| + for_each_possible_cpu(cpu) |
| + per_cpu(rcu_cpu_has_work, cpu) = 0; |
| + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); |
| + return 0; |
| } |
| +early_initcall(rcu_spawn_core_kthreads); |
| |
| /* |
| * Handle any core-RCU processing required by a call_rcu() invocation. |
| @@ -4648,7 +4739,6 @@ void __init rcu_init(void) |
| if (dump_tree) |
| rcu_dump_rcu_node_tree(&rcu_sched_state); |
| __rcu_init_preempt(); |
| - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
| |
| /* |
| * We don't need protection against CPU-hotplug here because |
| --- a/kernel/rcu/tree.h |
| +++ b/kernel/rcu/tree.h |
| @@ -580,12 +580,10 @@ extern struct rcu_state rcu_bh_state; |
| extern struct rcu_state rcu_preempt_state; |
| #endif /* #ifdef CONFIG_PREEMPT_RCU */ |
| |
| -#ifdef CONFIG_RCU_BOOST |
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
| DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); |
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
| DECLARE_PER_CPU(char, rcu_cpu_has_work); |
| -#endif /* #ifdef CONFIG_RCU_BOOST */ |
| |
| #ifndef RCU_TREE_NONCORE |
| |
| @@ -605,10 +603,9 @@ void call_rcu(struct rcu_head *head, rcu |
| static void __init __rcu_init_preempt(void); |
| static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
| static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
| -static void invoke_rcu_callbacks_kthread(void); |
| static bool rcu_is_callbacks_kthread(void); |
| +static void rcu_cpu_kthread_setup(unsigned int cpu); |
| #ifdef CONFIG_RCU_BOOST |
| -static void rcu_preempt_do_callbacks(void); |
| static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
| struct rcu_node *rnp); |
| #endif /* #ifdef CONFIG_RCU_BOOST */ |
| --- a/kernel/rcu/tree_plugin.h |
| +++ b/kernel/rcu/tree_plugin.h |
| @@ -24,26 +24,10 @@ |
| * Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| */ |
| |
| -#include <linux/delay.h> |
| -#include <linux/gfp.h> |
| -#include <linux/oom.h> |
| -#include <linux/smpboot.h> |
| -#include <linux/jiffies.h> |
| -#include "../time/tick-internal.h" |
| - |
| #ifdef CONFIG_RCU_BOOST |
| |
| #include "../locking/rtmutex_common.h" |
| |
| -/* |
| - * Control variables for per-CPU and per-rcu_node kthreads. These |
| - * handle all flavors of RCU. |
| - */ |
| -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); |
| -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
| -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
| -DEFINE_PER_CPU(char, rcu_cpu_has_work); |
| - |
| #else /* #ifdef CONFIG_RCU_BOOST */ |
| |
| /* |
| @@ -56,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); |
| |
| #endif /* #else #ifdef CONFIG_RCU_BOOST */ |
| |
| +/* |
| + * Control variables for per-CPU and per-rcu_node kthreads. These |
| + * handle all flavors of RCU. |
| + */ |
| +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
| +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
| +DEFINE_PER_CPU(char, rcu_cpu_has_work); |
| + |
| #ifdef CONFIG_RCU_NOCB_CPU |
| static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ |
| static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ |
| @@ -635,15 +627,6 @@ static void rcu_preempt_check_callbacks( |
| t->rcu_read_unlock_special.b.need_qs = true; |
| } |
| |
| -#ifdef CONFIG_RCU_BOOST |
| - |
| -static void rcu_preempt_do_callbacks(void) |
| -{ |
| - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); |
| -} |
| - |
| -#endif /* #ifdef CONFIG_RCU_BOOST */ |
| - |
| /* |
| * Queue a preemptible-RCU callback for invocation after a grace period. |
| */ |
| @@ -925,6 +908,19 @@ void exit_rcu(void) |
| |
| #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ |
| |
| +/* |
| + * If boosting, set rcuc kthreads to realtime priority. |
| + */ |
| +static void rcu_cpu_kthread_setup(unsigned int cpu) |
| +{ |
| +#ifdef CONFIG_RCU_BOOST |
| + struct sched_param sp; |
| + |
| + sp.sched_priority = kthread_prio; |
| + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
| +#endif /* #ifdef CONFIG_RCU_BOOST */ |
| +} |
| + |
| #ifdef CONFIG_RCU_BOOST |
| |
| #include "../locking/rtmutex_common.h" |
| @@ -956,16 +952,6 @@ static void rcu_initiate_boost_trace(str |
| |
| #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
| |
| -static void rcu_wake_cond(struct task_struct *t, int status) |
| -{ |
| - /* |
| - * If the thread is yielding, only wake it when this |
| - * is invoked from idle |
| - */ |
| - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) |
| - wake_up_process(t); |
| -} |
| - |
| /* |
| * Carry out RCU priority boosting on the task indicated by ->exp_tasks |
| * or ->boost_tasks, advancing the pointer to the next task in the |
| @@ -1109,23 +1095,6 @@ static void rcu_initiate_boost(struct rc |
| } |
| |
| /* |
| - * Wake up the per-CPU kthread to invoke RCU callbacks. |
| - */ |
| -static void invoke_rcu_callbacks_kthread(void) |
| -{ |
| - unsigned long flags; |
| - |
| - local_irq_save(flags); |
| - __this_cpu_write(rcu_cpu_has_work, 1); |
| - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && |
| - current != __this_cpu_read(rcu_cpu_kthread_task)) { |
| - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), |
| - __this_cpu_read(rcu_cpu_kthread_status)); |
| - } |
| - local_irq_restore(flags); |
| -} |
| - |
| -/* |
| * Is the current CPU running the RCU-callbacks kthread? |
| * Caller must have preemption disabled. |
| */ |
| @@ -1179,67 +1148,6 @@ static int rcu_spawn_one_boost_kthread(s |
| return 0; |
| } |
| |
| -static void rcu_kthread_do_work(void) |
| -{ |
| - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); |
| - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); |
| - rcu_preempt_do_callbacks(); |
| -} |
| - |
| -static void rcu_cpu_kthread_setup(unsigned int cpu) |
| -{ |
| - struct sched_param sp; |
| - |
| - sp.sched_priority = kthread_prio; |
| - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
| -} |
| - |
| -static void rcu_cpu_kthread_park(unsigned int cpu) |
| -{ |
| - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; |
| -} |
| - |
| -static int rcu_cpu_kthread_should_run(unsigned int cpu) |
| -{ |
| - return __this_cpu_read(rcu_cpu_has_work); |
| -} |
| - |
| -/* |
| - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the |
| - * RCU softirq used in flavors and configurations of RCU that do not |
| - * support RCU priority boosting. |
| - */ |
| -static void rcu_cpu_kthread(unsigned int cpu) |
| -{ |
| - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); |
| - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); |
| - int spincnt; |
| - |
| - for (spincnt = 0; spincnt < 10; spincnt++) { |
| - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); |
| - local_bh_disable(); |
| - *statusp = RCU_KTHREAD_RUNNING; |
| - this_cpu_inc(rcu_cpu_kthread_loops); |
| - local_irq_disable(); |
| - work = *workp; |
| - *workp = 0; |
| - local_irq_enable(); |
| - if (work) |
| - rcu_kthread_do_work(); |
| - local_bh_enable(); |
| - if (*workp == 0) { |
| - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); |
| - *statusp = RCU_KTHREAD_WAITING; |
| - return; |
| - } |
| - } |
| - *statusp = RCU_KTHREAD_YIELDING; |
| - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); |
| - schedule_timeout_interruptible(2); |
| - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); |
| - *statusp = RCU_KTHREAD_WAITING; |
| -} |
| - |
| /* |
| * Set the per-rcu_node kthread's affinity to cover all CPUs that are |
| * served by the rcu_node in question. The CPU hotplug lock is still |
| @@ -1269,26 +1177,12 @@ static void rcu_boost_kthread_setaffinit |
| free_cpumask_var(cm); |
| } |
| |
| -static struct smp_hotplug_thread rcu_cpu_thread_spec = { |
| - .store = &rcu_cpu_kthread_task, |
| - .thread_should_run = rcu_cpu_kthread_should_run, |
| - .thread_fn = rcu_cpu_kthread, |
| - .thread_comm = "rcuc/%u", |
| - .setup = rcu_cpu_kthread_setup, |
| - .park = rcu_cpu_kthread_park, |
| -}; |
| - |
| /* |
| * Spawn boost kthreads -- called as soon as the scheduler is running. |
| */ |
| static void __init rcu_spawn_boost_kthreads(void) |
| { |
| struct rcu_node *rnp; |
| - int cpu; |
| - |
| - for_each_possible_cpu(cpu) |
| - per_cpu(rcu_cpu_has_work, cpu) = 0; |
| - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); |
| rcu_for_each_leaf_node(rcu_state_p, rnp) |
| (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); |
| } |
| @@ -1311,11 +1205,6 @@ static void rcu_initiate_boost(struct rc |
| raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
| } |
| |
| -static void invoke_rcu_callbacks_kthread(void) |
| -{ |
| - WARN_ON_ONCE(1); |
| -} |
| - |
| static bool rcu_is_callbacks_kthread(void) |
| { |
| return false; |