| From d105bfa39cad93515abd7c2aea20a21be8e3edf9 Mon Sep 17 00:00:00 2001 |
| From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> |
| Date: Mon, 4 Nov 2013 13:21:10 -0800 |
| Subject: [PATCH] rcu: Eliminate softirq processing from rcutree |
| |
| Running RCU out of softirq is a problem for some workloads that would |
| like to manage RCU core processing independently of other softirq work, |
| for example, setting kthread priority. This commit therefore moves the |
| RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER kthread |
| named rcuc. The SCHED_OTHER approach avoids the scalability problems |
| that appeared with the earlier attempt to move RCU core processing to |
| from softirq to kthreads. That said, kernels built with RCU_BOOST=y |
| will run the rcuc kthreads at the RCU-boosting priority. |
| |
| Reported-by: Thomas Gleixner <tglx@linutronix.de> |
| Tested-by: Mike Galbraith <bitbucket@online.de> |
| Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| |
| diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c |
| index f28c520790e2..83c89e4fcc29 100644 |
| --- a/kernel/rcu/tree.c |
| +++ b/kernel/rcu/tree.c |
| @@ -58,6 +58,11 @@ |
| #include <linux/trace_events.h> |
| #include <linux/suspend.h> |
| #include <linux/ftrace.h> |
| +#include <linux/delay.h> |
| +#include <linux/gfp.h> |
| +#include <linux/oom.h> |
| +#include <linux/smpboot.h> |
| +#include "../time/tick-internal.h" |
| |
| #include "tree.h" |
| #include "rcu.h" |
| @@ -3058,18 +3063,17 @@ __rcu_process_callbacks(struct rcu_state *rsp) |
| /* |
| * Do RCU core processing for the current CPU. |
| */ |
| -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) |
| +static __latent_entropy void rcu_process_callbacks(void) |
| { |
| struct rcu_state *rsp; |
| |
| if (cpu_is_offline(smp_processor_id())) |
| return; |
| - trace_rcu_utilization(TPS("Start RCU core")); |
| for_each_rcu_flavor(rsp) |
| __rcu_process_callbacks(rsp); |
| - trace_rcu_utilization(TPS("End RCU core")); |
| } |
| |
| +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); |
| /* |
| * Schedule RCU callback invocation. If the specified type of RCU |
| * does not support RCU priority boosting, just do a direct call, |
| @@ -3081,18 +3085,105 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
| { |
| if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) |
| return; |
| - if (likely(!rsp->boost)) { |
| - rcu_do_batch(rsp, rdp); |
| - return; |
| - } |
| - invoke_rcu_callbacks_kthread(); |
| + rcu_do_batch(rsp, rdp); |
| +} |
| + |
| +static void rcu_wake_cond(struct task_struct *t, int status) |
| +{ |
| + /* |
| + * If the thread is yielding, only wake it when this |
| + * is invoked from idle |
| + */ |
| + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current))) |
| + wake_up_process(t); |
| } |
| |
| +/* |
| + * Wake up this CPU's rcuc kthread to do RCU core processing. |
| + */ |
| static void invoke_rcu_core(void) |
| { |
| - if (cpu_online(smp_processor_id())) |
| - raise_softirq(RCU_SOFTIRQ); |
| + unsigned long flags; |
| + struct task_struct *t; |
| + |
| + if (!cpu_online(smp_processor_id())) |
| + return; |
| + local_irq_save(flags); |
| + __this_cpu_write(rcu_cpu_has_work, 1); |
| + t = __this_cpu_read(rcu_cpu_kthread_task); |
| + if (t != NULL && current != t) |
| + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status)); |
| + local_irq_restore(flags); |
| +} |
| + |
| +static void rcu_cpu_kthread_park(unsigned int cpu) |
| +{ |
| + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; |
| +} |
| + |
| +static int rcu_cpu_kthread_should_run(unsigned int cpu) |
| +{ |
| + return __this_cpu_read(rcu_cpu_has_work); |
| +} |
| + |
| +/* |
| + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the |
| + * RCU softirq used in flavors and configurations of RCU that do not |
| + * support RCU priority boosting. |
| + */ |
| +static void rcu_cpu_kthread(unsigned int cpu) |
| +{ |
| + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); |
| + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); |
| + int spincnt; |
| + |
| + for (spincnt = 0; spincnt < 10; spincnt++) { |
| + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); |
| + local_bh_disable(); |
| + *statusp = RCU_KTHREAD_RUNNING; |
| + this_cpu_inc(rcu_cpu_kthread_loops); |
| + local_irq_disable(); |
| + work = *workp; |
| + *workp = 0; |
| + local_irq_enable(); |
| + if (work) |
| + rcu_process_callbacks(); |
| + local_bh_enable(); |
| + if (*workp == 0) { |
| + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); |
| + *statusp = RCU_KTHREAD_WAITING; |
| + return; |
| + } |
| + } |
| + *statusp = RCU_KTHREAD_YIELDING; |
| + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); |
| + schedule_timeout_interruptible(2); |
| + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); |
| + *statusp = RCU_KTHREAD_WAITING; |
| +} |
| + |
| +static struct smp_hotplug_thread rcu_cpu_thread_spec = { |
| + .store = &rcu_cpu_kthread_task, |
| + .thread_should_run = rcu_cpu_kthread_should_run, |
| + .thread_fn = rcu_cpu_kthread, |
| + .thread_comm = "rcuc/%u", |
| + .setup = rcu_cpu_kthread_setup, |
| + .park = rcu_cpu_kthread_park, |
| +}; |
| + |
| +/* |
| + * Spawn per-CPU RCU core processing kthreads. |
| + */ |
| +static int __init rcu_spawn_core_kthreads(void) |
| +{ |
| + int cpu; |
| + |
| + for_each_possible_cpu(cpu) |
| + per_cpu(rcu_cpu_has_work, cpu) = 0; |
| + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); |
| + return 0; |
| } |
| +early_initcall(rcu_spawn_core_kthreads); |
| |
| /* |
| * Handle any core-RCU processing required by a call_rcu() invocation. |
| @@ -4243,7 +4334,6 @@ void __init rcu_init(void) |
| if (dump_tree) |
| rcu_dump_rcu_node_tree(&rcu_sched_state); |
| __rcu_init_preempt(); |
| - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
| |
| /* |
| * We don't need protection against CPU-hotplug here because |
| diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h |
| index b75cad4e441c..0606d1a68cc8 100644 |
| --- a/kernel/rcu/tree.h |
| +++ b/kernel/rcu/tree.h |
| @@ -468,12 +468,10 @@ extern struct rcu_state rcu_preempt_state; |
| int rcu_dynticks_snap(struct rcu_dynticks *rdtp); |
| bool rcu_eqs_special_set(int cpu); |
| |
| -#ifdef CONFIG_RCU_BOOST |
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
| DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); |
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
| DECLARE_PER_CPU(char, rcu_cpu_has_work); |
| -#endif /* #ifdef CONFIG_RCU_BOOST */ |
| |
| #ifndef RCU_TREE_NONCORE |
| |
| @@ -493,10 +491,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); |
| static void __init __rcu_init_preempt(void); |
| static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
| static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
| -static void invoke_rcu_callbacks_kthread(void); |
| static bool rcu_is_callbacks_kthread(void); |
| +static void rcu_cpu_kthread_setup(unsigned int cpu); |
| #ifdef CONFIG_RCU_BOOST |
| -static void rcu_preempt_do_callbacks(void); |
| static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
| struct rcu_node *rnp); |
| #endif /* #ifdef CONFIG_RCU_BOOST */ |
| diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h |
| index 5d05aa37126b..d91beb6cf392 100644 |
| --- a/kernel/rcu/tree_plugin.h |
| +++ b/kernel/rcu/tree_plugin.h |
| @@ -24,28 +24,10 @@ |
| * Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| */ |
| |
| -#include <linux/delay.h> |
| -#include <linux/gfp.h> |
| -#include <linux/oom.h> |
| -#include <linux/sched/debug.h> |
| -#include <linux/smpboot.h> |
| -#include <linux/jiffies.h> |
| -#include <uapi/linux/sched/types.h> |
| -#include "../time/tick-internal.h" |
| - |
| #ifdef CONFIG_RCU_BOOST |
| |
| #include "../locking/rtmutex_common.h" |
| |
| -/* |
| - * Control variables for per-CPU and per-rcu_node kthreads. These |
| - * handle all flavors of RCU. |
| - */ |
| -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); |
| -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
| -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
| -DEFINE_PER_CPU(char, rcu_cpu_has_work); |
| - |
| #else /* #ifdef CONFIG_RCU_BOOST */ |
| |
| /* |
| @@ -58,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); |
| |
| #endif /* #else #ifdef CONFIG_RCU_BOOST */ |
| |
| +/* |
| + * Control variables for per-CPU and per-rcu_node kthreads. These |
| + * handle all flavors of RCU. |
| + */ |
| +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
| +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
| +DEFINE_PER_CPU(char, rcu_cpu_has_work); |
| + |
| #ifdef CONFIG_RCU_NOCB_CPU |
| static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ |
| static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ |
| @@ -635,15 +625,6 @@ static void rcu_preempt_check_callbacks(void) |
| t->rcu_read_unlock_special.b.need_qs = true; |
| } |
| |
| -#ifdef CONFIG_RCU_BOOST |
| - |
| -static void rcu_preempt_do_callbacks(void) |
| -{ |
| - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); |
| -} |
| - |
| -#endif /* #ifdef CONFIG_RCU_BOOST */ |
| - |
| /* |
| * Queue a preemptible-RCU callback for invocation after a grace period. |
| */ |
| @@ -832,6 +813,19 @@ void exit_rcu(void) |
| |
| #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ |
| |
| +/* |
| + * If boosting, set rcuc kthreads to realtime priority. |
| + */ |
| +static void rcu_cpu_kthread_setup(unsigned int cpu) |
| +{ |
| +#ifdef CONFIG_RCU_BOOST |
| + struct sched_param sp; |
| + |
| + sp.sched_priority = kthread_prio; |
| + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
| +#endif /* #ifdef CONFIG_RCU_BOOST */ |
| +} |
| + |
| #ifdef CONFIG_RCU_BOOST |
| |
| #include "../locking/rtmutex_common.h" |
| @@ -863,16 +857,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) |
| |
| #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
| |
| -static void rcu_wake_cond(struct task_struct *t, int status) |
| -{ |
| - /* |
| - * If the thread is yielding, only wake it when this |
| - * is invoked from idle |
| - */ |
| - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) |
| - wake_up_process(t); |
| -} |
| - |
| /* |
| * Carry out RCU priority boosting on the task indicated by ->exp_tasks |
| * or ->boost_tasks, advancing the pointer to the next task in the |
| @@ -1016,23 +1000,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) |
| } |
| |
| /* |
| - * Wake up the per-CPU kthread to invoke RCU callbacks. |
| - */ |
| -static void invoke_rcu_callbacks_kthread(void) |
| -{ |
| - unsigned long flags; |
| - |
| - local_irq_save(flags); |
| - __this_cpu_write(rcu_cpu_has_work, 1); |
| - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && |
| - current != __this_cpu_read(rcu_cpu_kthread_task)) { |
| - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), |
| - __this_cpu_read(rcu_cpu_kthread_status)); |
| - } |
| - local_irq_restore(flags); |
| -} |
| - |
| -/* |
| * Is the current CPU running the RCU-callbacks kthread? |
| * Caller must have preemption disabled. |
| */ |
| @@ -1086,67 +1053,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
| return 0; |
| } |
| |
| -static void rcu_kthread_do_work(void) |
| -{ |
| - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); |
| - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); |
| - rcu_preempt_do_callbacks(); |
| -} |
| - |
| -static void rcu_cpu_kthread_setup(unsigned int cpu) |
| -{ |
| - struct sched_param sp; |
| - |
| - sp.sched_priority = kthread_prio; |
| - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
| -} |
| - |
| -static void rcu_cpu_kthread_park(unsigned int cpu) |
| -{ |
| - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; |
| -} |
| - |
| -static int rcu_cpu_kthread_should_run(unsigned int cpu) |
| -{ |
| - return __this_cpu_read(rcu_cpu_has_work); |
| -} |
| - |
| -/* |
| - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the |
| - * RCU softirq used in flavors and configurations of RCU that do not |
| - * support RCU priority boosting. |
| - */ |
| -static void rcu_cpu_kthread(unsigned int cpu) |
| -{ |
| - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); |
| - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); |
| - int spincnt; |
| - |
| - for (spincnt = 0; spincnt < 10; spincnt++) { |
| - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); |
| - local_bh_disable(); |
| - *statusp = RCU_KTHREAD_RUNNING; |
| - this_cpu_inc(rcu_cpu_kthread_loops); |
| - local_irq_disable(); |
| - work = *workp; |
| - *workp = 0; |
| - local_irq_enable(); |
| - if (work) |
| - rcu_kthread_do_work(); |
| - local_bh_enable(); |
| - if (*workp == 0) { |
| - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); |
| - *statusp = RCU_KTHREAD_WAITING; |
| - return; |
| - } |
| - } |
| - *statusp = RCU_KTHREAD_YIELDING; |
| - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); |
| - schedule_timeout_interruptible(2); |
| - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); |
| - *statusp = RCU_KTHREAD_WAITING; |
| -} |
| - |
| /* |
| * Set the per-rcu_node kthread's affinity to cover all CPUs that are |
| * served by the rcu_node in question. The CPU hotplug lock is still |
| @@ -1177,26 +1083,12 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) |
| free_cpumask_var(cm); |
| } |
| |
| -static struct smp_hotplug_thread rcu_cpu_thread_spec = { |
| - .store = &rcu_cpu_kthread_task, |
| - .thread_should_run = rcu_cpu_kthread_should_run, |
| - .thread_fn = rcu_cpu_kthread, |
| - .thread_comm = "rcuc/%u", |
| - .setup = rcu_cpu_kthread_setup, |
| - .park = rcu_cpu_kthread_park, |
| -}; |
| - |
| /* |
| * Spawn boost kthreads -- called as soon as the scheduler is running. |
| */ |
| static void __init rcu_spawn_boost_kthreads(void) |
| { |
| struct rcu_node *rnp; |
| - int cpu; |
| - |
| - for_each_possible_cpu(cpu) |
| - per_cpu(rcu_cpu_has_work, cpu) = 0; |
| - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); |
| rcu_for_each_leaf_node(rcu_state_p, rnp) |
| (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); |
| } |
| @@ -1219,11 +1111,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) |
| raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
| } |
| |
| -static void invoke_rcu_callbacks_kthread(void) |
| -{ |
| - WARN_ON_ONCE(1); |
| -} |
| - |
| static bool rcu_is_callbacks_kthread(void) |
| { |
| return false; |
| -- |
| 2.1.4 |
| |