| From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> |
| Date: Mon, 4 Nov 2013 13:21:10 -0800 |
| Subject: rcu: Eliminate softirq processing from rcutree |
| |
| Running RCU out of softirq is a problem for some workloads that would |
| like to manage RCU core processing independently of other softirq work, |
| for example, setting kthread priority. This commit therefore moves the |
| RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER kthread |
| named rcuc. The SCHED_OTHER approach avoids the scalability problems |
| that appeared with the earlier attempt to move RCU core processing to |
| from softirq to kthreads. That said, kernels built with RCU_BOOST=y |
| will run the rcuc kthreads at the RCU-boosting priority. |
| |
| Reported-by: Thomas Gleixner <tglx@linutronix.de> |
| Tested-by: Mike Galbraith <bitbucket@online.de> |
| Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| --- |
| kernel/rcu/tree.c | 112 +++++++++++++++++++++++++++++++--- |
| kernel/rcu/tree.h | 5 - |
| kernel/rcu/tree_plugin.h | 151 +++-------------------------------------------- |
| 3 files changed, 114 insertions(+), 154 deletions(-) |
| |
| --- a/kernel/rcu/tree.c |
| +++ b/kernel/rcu/tree.c |
| @@ -58,6 +58,13 @@ |
| #include <linux/trace_events.h> |
| #include <linux/suspend.h> |
| #include <linux/ftrace.h> |
| +#include <linux/delay.h> |
| +#include <linux/gfp.h> |
| +#include <linux/oom.h> |
| +#include <linux/smpboot.h> |
| +#include <linux/jiffies.h> |
| +#include <linux/sched/isolation.h> |
| +#include "../time/tick-internal.h" |
| |
| #include "tree.h" |
| #include "rcu.h" |
| @@ -2934,18 +2941,17 @@ static void |
| /* |
| * Do RCU core processing for the current CPU. |
| */ |
| -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) |
| +static __latent_entropy void rcu_process_callbacks(void) |
| { |
| struct rcu_state *rsp; |
| |
| if (cpu_is_offline(smp_processor_id())) |
| return; |
| - trace_rcu_utilization(TPS("Start RCU core")); |
| for_each_rcu_flavor(rsp) |
| __rcu_process_callbacks(rsp); |
| - trace_rcu_utilization(TPS("End RCU core")); |
| } |
| |
| +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); |
| /* |
| * Schedule RCU callback invocation. If the specified type of RCU |
| * does not support RCU priority boosting, just do a direct call, |
| @@ -2957,18 +2963,105 @@ static void invoke_rcu_callbacks(struct |
| { |
| if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) |
| return; |
| - if (likely(!rsp->boost)) { |
| - rcu_do_batch(rsp, rdp); |
| + rcu_do_batch(rsp, rdp); |
| +} |
| + |
| +static void rcu_wake_cond(struct task_struct *t, int status) |
| +{ |
| + /* |
| + * If the thread is yielding, only wake it when this |
| + * is invoked from idle |
| + */ |
| + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current))) |
| + wake_up_process(t); |
| +} |
| + |
| +/* |
| + * Wake up this CPU's rcuc kthread to do RCU core processing. |
| + */ |
| +static void invoke_rcu_core(void) |
| +{ |
| + unsigned long flags; |
| + struct task_struct *t; |
| + |
| + if (!cpu_online(smp_processor_id())) |
| return; |
| + local_irq_save(flags); |
| + __this_cpu_write(rcu_cpu_has_work, 1); |
| + t = __this_cpu_read(rcu_cpu_kthread_task); |
| + if (t != NULL && current != t) |
| + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status)); |
| + local_irq_restore(flags); |
| +} |
| + |
| +static void rcu_cpu_kthread_park(unsigned int cpu) |
| +{ |
| + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; |
| +} |
| + |
| +static int rcu_cpu_kthread_should_run(unsigned int cpu) |
| +{ |
| + return __this_cpu_read(rcu_cpu_has_work); |
| +} |
| + |
| +/* |
| + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the |
| + * RCU softirq used in flavors and configurations of RCU that do not |
| + * support RCU priority boosting. |
| + */ |
| +static void rcu_cpu_kthread(unsigned int cpu) |
| +{ |
| + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); |
| + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); |
| + int spincnt; |
| + |
| + for (spincnt = 0; spincnt < 10; spincnt++) { |
| + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); |
| + local_bh_disable(); |
| + *statusp = RCU_KTHREAD_RUNNING; |
| + this_cpu_inc(rcu_cpu_kthread_loops); |
| + local_irq_disable(); |
| + work = *workp; |
| + *workp = 0; |
| + local_irq_enable(); |
| + if (work) |
| + rcu_process_callbacks(); |
| + local_bh_enable(); |
| + if (*workp == 0) { |
| + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); |
| + *statusp = RCU_KTHREAD_WAITING; |
| + return; |
| + } |
| } |
| - invoke_rcu_callbacks_kthread(); |
| + *statusp = RCU_KTHREAD_YIELDING; |
| + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); |
| + schedule_timeout_interruptible(2); |
| + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); |
| + *statusp = RCU_KTHREAD_WAITING; |
| } |
| |
| -static void invoke_rcu_core(void) |
| +static struct smp_hotplug_thread rcu_cpu_thread_spec = { |
| + .store = &rcu_cpu_kthread_task, |
| + .thread_should_run = rcu_cpu_kthread_should_run, |
| + .thread_fn = rcu_cpu_kthread, |
| + .thread_comm = "rcuc/%u", |
| + .setup = rcu_cpu_kthread_setup, |
| + .park = rcu_cpu_kthread_park, |
| +}; |
| + |
| +/* |
| + * Spawn per-CPU RCU core processing kthreads. |
| + */ |
| +static int __init rcu_spawn_core_kthreads(void) |
| { |
| - if (cpu_online(smp_processor_id())) |
| - raise_softirq(RCU_SOFTIRQ); |
| + int cpu; |
| + |
| + for_each_possible_cpu(cpu) |
| + per_cpu(rcu_cpu_has_work, cpu) = 0; |
| + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); |
| + return 0; |
| } |
| +early_initcall(rcu_spawn_core_kthreads); |
| |
| /* |
| * Handle any core-RCU processing required by a call_rcu() invocation. |
| @@ -4238,7 +4331,6 @@ void __init rcu_init(void) |
| if (dump_tree) |
| rcu_dump_rcu_node_tree(&rcu_sched_state); |
| __rcu_init_preempt(); |
| - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
| |
| /* |
| * We don't need protection against CPU-hotplug here because |
| --- a/kernel/rcu/tree.h |
| +++ b/kernel/rcu/tree.h |
| @@ -442,12 +442,10 @@ extern struct rcu_state rcu_preempt_stat |
| int rcu_dynticks_snap(struct rcu_dynticks *rdtp); |
| bool rcu_eqs_special_set(int cpu); |
| |
| -#ifdef CONFIG_RCU_BOOST |
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
| DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); |
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
| DECLARE_PER_CPU(char, rcu_cpu_has_work); |
| -#endif /* #ifdef CONFIG_RCU_BOOST */ |
| |
| #ifndef RCU_TREE_NONCORE |
| |
| @@ -467,10 +465,9 @@ void call_rcu(struct rcu_head *head, rcu |
| static void __init __rcu_init_preempt(void); |
| static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
| static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
| -static void invoke_rcu_callbacks_kthread(void); |
| static bool rcu_is_callbacks_kthread(void); |
| +static void rcu_cpu_kthread_setup(unsigned int cpu); |
| #ifdef CONFIG_RCU_BOOST |
| -static void rcu_preempt_do_callbacks(void); |
| static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
| struct rcu_node *rnp); |
| #endif /* #ifdef CONFIG_RCU_BOOST */ |
| --- a/kernel/rcu/tree_plugin.h |
| +++ b/kernel/rcu/tree_plugin.h |
| @@ -24,42 +24,16 @@ |
| * Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| */ |
| |
| -#include <linux/delay.h> |
| -#include <linux/gfp.h> |
| -#include <linux/oom.h> |
| -#include <linux/sched/debug.h> |
| -#include <linux/smpboot.h> |
| -#include <linux/jiffies.h> |
| -#include <linux/sched/isolation.h> |
| -#include <uapi/linux/sched/types.h> |
| -#include "../time/tick-internal.h" |
| - |
| -#ifdef CONFIG_RCU_BOOST |
| - |
| #include "../locking/rtmutex_common.h" |
| |
| /* |
| * Control variables for per-CPU and per-rcu_node kthreads. These |
| * handle all flavors of RCU. |
| */ |
| -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); |
| DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
| DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
| DEFINE_PER_CPU(char, rcu_cpu_has_work); |
| |
| -#else /* #ifdef CONFIG_RCU_BOOST */ |
| - |
| -/* |
| - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST, |
| - * all uses are in dead code. Provide a definition to keep the compiler |
| - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place. |
| - * This probably needs to be excluded from -rt builds. |
| - */ |
| -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; }) |
| -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1) |
| - |
| -#endif /* #else #ifdef CONFIG_RCU_BOOST */ |
| - |
| #ifdef CONFIG_RCU_NOCB_CPU |
| static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ |
| static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ |
| @@ -684,15 +658,6 @@ static void rcu_preempt_check_callbacks( |
| t->rcu_read_unlock_special.b.need_qs = true; |
| } |
| |
| -#ifdef CONFIG_RCU_BOOST |
| - |
| -static void rcu_preempt_do_callbacks(void) |
| -{ |
| - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); |
| -} |
| - |
| -#endif /* #ifdef CONFIG_RCU_BOOST */ |
| - |
| /** |
| * call_rcu() - Queue an RCU callback for invocation after a grace period. |
| * @head: structure to be used for queueing the RCU updates. |
| @@ -915,18 +880,21 @@ void exit_rcu(void) |
| |
| #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ |
| |
| +/* |
| + * If boosting, set rcuc kthreads to realtime priority. |
| + */ |
| +static void rcu_cpu_kthread_setup(unsigned int cpu) |
| +{ |
| #ifdef CONFIG_RCU_BOOST |
| + struct sched_param sp; |
| |
| -static void rcu_wake_cond(struct task_struct *t, int status) |
| -{ |
| - /* |
| - * If the thread is yielding, only wake it when this |
| - * is invoked from idle |
| - */ |
| - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) |
| - wake_up_process(t); |
| + sp.sched_priority = kthread_prio; |
| + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
| +#endif /* #ifdef CONFIG_RCU_BOOST */ |
| } |
| |
| +#ifdef CONFIG_RCU_BOOST |
| + |
| /* |
| * Carry out RCU priority boosting on the task indicated by ->exp_tasks |
| * or ->boost_tasks, advancing the pointer to the next task in the |
| @@ -1069,23 +1037,6 @@ static void rcu_initiate_boost(struct rc |
| } |
| |
| /* |
| - * Wake up the per-CPU kthread to invoke RCU callbacks. |
| - */ |
| -static void invoke_rcu_callbacks_kthread(void) |
| -{ |
| - unsigned long flags; |
| - |
| - local_irq_save(flags); |
| - __this_cpu_write(rcu_cpu_has_work, 1); |
| - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && |
| - current != __this_cpu_read(rcu_cpu_kthread_task)) { |
| - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), |
| - __this_cpu_read(rcu_cpu_kthread_status)); |
| - } |
| - local_irq_restore(flags); |
| -} |
| - |
| -/* |
| * Is the current CPU running the RCU-callbacks kthread? |
| * Caller must have preemption disabled. |
| */ |
| @@ -1139,67 +1090,6 @@ static int rcu_spawn_one_boost_kthread(s |
| return 0; |
| } |
| |
| -static void rcu_kthread_do_work(void) |
| -{ |
| - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); |
| - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); |
| - rcu_preempt_do_callbacks(); |
| -} |
| - |
| -static void rcu_cpu_kthread_setup(unsigned int cpu) |
| -{ |
| - struct sched_param sp; |
| - |
| - sp.sched_priority = kthread_prio; |
| - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
| -} |
| - |
| -static void rcu_cpu_kthread_park(unsigned int cpu) |
| -{ |
| - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; |
| -} |
| - |
| -static int rcu_cpu_kthread_should_run(unsigned int cpu) |
| -{ |
| - return __this_cpu_read(rcu_cpu_has_work); |
| -} |
| - |
| -/* |
| - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the |
| - * RCU softirq used in flavors and configurations of RCU that do not |
| - * support RCU priority boosting. |
| - */ |
| -static void rcu_cpu_kthread(unsigned int cpu) |
| -{ |
| - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); |
| - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); |
| - int spincnt; |
| - |
| - for (spincnt = 0; spincnt < 10; spincnt++) { |
| - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); |
| - local_bh_disable(); |
| - *statusp = RCU_KTHREAD_RUNNING; |
| - this_cpu_inc(rcu_cpu_kthread_loops); |
| - local_irq_disable(); |
| - work = *workp; |
| - *workp = 0; |
| - local_irq_enable(); |
| - if (work) |
| - rcu_kthread_do_work(); |
| - local_bh_enable(); |
| - if (*workp == 0) { |
| - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); |
| - *statusp = RCU_KTHREAD_WAITING; |
| - return; |
| - } |
| - } |
| - *statusp = RCU_KTHREAD_YIELDING; |
| - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); |
| - schedule_timeout_interruptible(2); |
| - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); |
| - *statusp = RCU_KTHREAD_WAITING; |
| -} |
| - |
| /* |
| * Set the per-rcu_node kthread's affinity to cover all CPUs that are |
| * served by the rcu_node in question. The CPU hotplug lock is still |
| @@ -1230,26 +1120,12 @@ static void rcu_boost_kthread_setaffinit |
| free_cpumask_var(cm); |
| } |
| |
| -static struct smp_hotplug_thread rcu_cpu_thread_spec = { |
| - .store = &rcu_cpu_kthread_task, |
| - .thread_should_run = rcu_cpu_kthread_should_run, |
| - .thread_fn = rcu_cpu_kthread, |
| - .thread_comm = "rcuc/%u", |
| - .setup = rcu_cpu_kthread_setup, |
| - .park = rcu_cpu_kthread_park, |
| -}; |
| - |
| /* |
| * Spawn boost kthreads -- called as soon as the scheduler is running. |
| */ |
| static void __init rcu_spawn_boost_kthreads(void) |
| { |
| struct rcu_node *rnp; |
| - int cpu; |
| - |
| - for_each_possible_cpu(cpu) |
| - per_cpu(rcu_cpu_has_work, cpu) = 0; |
| - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); |
| rcu_for_each_leaf_node(rcu_state_p, rnp) |
| (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); |
| } |
| @@ -1272,11 +1148,6 @@ static void rcu_initiate_boost(struct rc |
| raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
| } |
| |
| -static void invoke_rcu_callbacks_kthread(void) |
| -{ |
| - WARN_ON_ONCE(1); |
| -} |
| - |
| static bool rcu_is_callbacks_kthread(void) |
| { |
| return false; |