| From: John Stultz <johnstul@us.ibm.com> |
| Date: Fri, 3 Jul 2009 08:29:58 -0500 |
| Subject: posix-timers: Thread posix-cpu-timers on -rt |
| |
| posix-cpu-timer code takes non -rt safe locks in hard irq |
| context. Move it to a thread. |
| |
| [ 3.0 fixes from Peter Zijlstra <peterz@infradead.org> ] |
| |
| Signed-off-by: John Stultz <johnstul@us.ibm.com> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| |
| --- |
| include/linux/init_task.h | 7 + |
| include/linux/sched.h | 3 |
| kernel/fork.c | 3 |
| kernel/time/posix-cpu-timers.c | 157 +++++++++++++++++++++++++++++++++++++++-- |
| 4 files changed, 166 insertions(+), 4 deletions(-) |
| |
| --- a/include/linux/init_task.h |
| +++ b/include/linux/init_task.h |
| @@ -167,6 +167,12 @@ extern struct cred init_cred; |
| # define INIT_PERF_EVENTS(tsk) |
| #endif |
| |
| +#if defined(CONFIG_POSIX_TIMERS) && defined(CONFIG_PREEMPT_RT_BASE) |
| +# define INIT_TIMER_LIST .posix_timer_list = NULL, |
| +#else |
| +# define INIT_TIMER_LIST |
| +#endif |
| + |
| #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN |
| # define INIT_VTIME(tsk) \ |
| .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ |
| @@ -269,6 +275,7 @@ extern struct cred init_cred; |
| INIT_CPU_TIMERS(tsk) \ |
| .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ |
| .timer_slack_ns = 50000, /* 50 usec default slack */ \ |
| + INIT_TIMER_LIST \ |
| .pids = { \ |
| [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ |
| [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -710,6 +710,9 @@ struct task_struct { |
| #ifdef CONFIG_POSIX_TIMERS |
| struct task_cputime cputime_expires; |
| struct list_head cpu_timers[3]; |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| + struct task_struct *posix_timer_list; |
| +#endif |
| #endif |
| |
| /* Process credentials: */ |
| --- a/kernel/fork.c |
| +++ b/kernel/fork.c |
| @@ -1451,6 +1451,9 @@ static void rt_mutex_init_task(struct ta |
| */ |
| static void posix_cpu_timers_init(struct task_struct *tsk) |
| { |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| + tsk->posix_timer_list = NULL; |
| +#endif |
| tsk->cputime_expires.prof_exp = 0; |
| tsk->cputime_expires.virt_exp = 0; |
| tsk->cputime_expires.sched_exp = 0; |
| --- a/kernel/time/posix-cpu-timers.c |
| +++ b/kernel/time/posix-cpu-timers.c |
| @@ -2,8 +2,10 @@ |
| * Implement CPU time clocks for the POSIX clock interface. |
| */ |
| |
| +#include <uapi/linux/sched/types.h> |
| #include <linux/sched/signal.h> |
| #include <linux/sched/cputime.h> |
| +#include <linux/sched/rt.h> |
| #include <linux/posix-timers.h> |
| #include <linux/errno.h> |
| #include <linux/math64.h> |
| @@ -12,6 +14,7 @@ |
| #include <trace/events/timer.h> |
| #include <linux/tick.h> |
| #include <linux/workqueue.h> |
| +#include <linux/smpboot.h> |
| |
| /* |
| * Called after updating RLIMIT_CPU to run cpu timer and update |
| @@ -590,7 +593,7 @@ static int posix_cpu_timer_set(struct k_ |
| /* |
| * Disarm any old timer after extracting its expiry time. |
| */ |
| - WARN_ON_ONCE(!irqs_disabled()); |
| + WARN_ON_ONCE_NONRT(!irqs_disabled()); |
| |
| ret = 0; |
| old_incr = timer->it.cpu.incr; |
| @@ -1014,7 +1017,7 @@ void posix_cpu_timer_schedule(struct k_i |
| /* |
| * Now re-arm for the new expiry time. |
| */ |
| - WARN_ON_ONCE(!irqs_disabled()); |
| + WARN_ON_ONCE_NONRT(!irqs_disabled()); |
| arm_timer(timer); |
| unlock_task_sighand(p, &flags); |
| |
| @@ -1103,13 +1106,13 @@ static inline int fastpath_timer_check(s |
| * already updated our counts. We need to check if any timers fire now. |
| * Interrupts are disabled. |
| */ |
| -void run_posix_cpu_timers(struct task_struct *tsk) |
| +static void __run_posix_cpu_timers(struct task_struct *tsk) |
| { |
| LIST_HEAD(firing); |
| struct k_itimer *timer, *next; |
| unsigned long flags; |
| |
| - WARN_ON_ONCE(!irqs_disabled()); |
| + WARN_ON_ONCE_NONRT(!irqs_disabled()); |
| |
| /* |
| * The fast path checks that there are no expired thread or thread |
| @@ -1163,6 +1166,152 @@ void run_posix_cpu_timers(struct task_st |
| } |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| +#include <linux/kthread.h> |
| +#include <linux/cpu.h> |
| +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); |
| +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); |
| +DEFINE_PER_CPU(bool, posix_timer_th_active); |
| + |
| +static void posix_cpu_kthread_fn(unsigned int cpu) |
| +{ |
| + struct task_struct *tsk = NULL; |
| + struct task_struct *next = NULL; |
| + |
| + BUG_ON(per_cpu(posix_timer_task, cpu) != current); |
| + |
| + /* grab task list */ |
| + raw_local_irq_disable(); |
| + tsk = per_cpu(posix_timer_tasklist, cpu); |
| + per_cpu(posix_timer_tasklist, cpu) = NULL; |
| + raw_local_irq_enable(); |
| + |
| + /* its possible the list is empty, just return */ |
| + if (!tsk) |
| + return; |
| + |
| + /* Process task list */ |
| + while (1) { |
| + /* save next */ |
| + next = tsk->posix_timer_list; |
| + |
| + /* run the task timers, clear its ptr and |
| + * unreference it |
| + */ |
| + __run_posix_cpu_timers(tsk); |
| + tsk->posix_timer_list = NULL; |
| + put_task_struct(tsk); |
| + |
| + /* check if this is the last on the list */ |
| + if (next == tsk) |
| + break; |
| + tsk = next; |
| + } |
| +} |
| + |
| +static inline int __fastpath_timer_check(struct task_struct *tsk) |
| +{ |
| + /* tsk == current, ensure it is safe to use ->signal/sighand */ |
| + if (unlikely(tsk->exit_state)) |
| + return 0; |
| + |
| + if (!task_cputime_zero(&tsk->cputime_expires)) |
| + return 1; |
| + |
| + if (!task_cputime_zero(&tsk->signal->cputime_expires)) |
| + return 1; |
| + |
| + return 0; |
| +} |
| + |
| +void run_posix_cpu_timers(struct task_struct *tsk) |
| +{ |
| + unsigned int cpu = smp_processor_id(); |
| + struct task_struct *tasklist; |
| + |
| + BUG_ON(!irqs_disabled()); |
| + |
| + if (per_cpu(posix_timer_th_active, cpu) != true) |
| + return; |
| + |
| + /* get per-cpu references */ |
| + tasklist = per_cpu(posix_timer_tasklist, cpu); |
| + |
| + /* check to see if we're already queued */ |
| + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { |
| + get_task_struct(tsk); |
| + if (tasklist) { |
| + tsk->posix_timer_list = tasklist; |
| + } else { |
| + /* |
| + * The list is terminated by a self-pointing |
| + * task_struct |
| + */ |
| + tsk->posix_timer_list = tsk; |
| + } |
| + per_cpu(posix_timer_tasklist, cpu) = tsk; |
| + |
| + wake_up_process(per_cpu(posix_timer_task, cpu)); |
| + } |
| +} |
| + |
| +static int posix_cpu_kthread_should_run(unsigned int cpu) |
| +{ |
| + return __this_cpu_read(posix_timer_tasklist) != NULL; |
| +} |
| + |
| +static void posix_cpu_kthread_park(unsigned int cpu) |
| +{ |
| + this_cpu_write(posix_timer_th_active, false); |
| +} |
| + |
| +static void posix_cpu_kthread_unpark(unsigned int cpu) |
| +{ |
| + this_cpu_write(posix_timer_th_active, true); |
| +} |
| + |
| +static void posix_cpu_kthread_setup(unsigned int cpu) |
| +{ |
| + struct sched_param sp; |
| + |
| + sp.sched_priority = MAX_RT_PRIO - 1; |
| + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
| + posix_cpu_kthread_unpark(cpu); |
| +} |
| + |
| +static struct smp_hotplug_thread posix_cpu_thread = { |
| + .store = &posix_timer_task, |
| + .thread_should_run = posix_cpu_kthread_should_run, |
| + .thread_fn = posix_cpu_kthread_fn, |
| + .thread_comm = "posixcputmr/%u", |
| + .setup = posix_cpu_kthread_setup, |
| + .park = posix_cpu_kthread_park, |
| + .unpark = posix_cpu_kthread_unpark, |
| +}; |
| + |
| +static int __init posix_cpu_thread_init(void) |
| +{ |
| + /* Start one for boot CPU. */ |
| + unsigned long cpu; |
| + int ret; |
| + |
| + /* init the per-cpu posix_timer_tasklets */ |
| + for_each_possible_cpu(cpu) |
| + per_cpu(posix_timer_tasklist, cpu) = NULL; |
| + |
| + ret = smpboot_register_percpu_thread(&posix_cpu_thread); |
| + WARN_ON(ret); |
| + |
| + return 0; |
| +} |
| +early_initcall(posix_cpu_thread_init); |
| +#else /* CONFIG_PREEMPT_RT_BASE */ |
| +void run_posix_cpu_timers(struct task_struct *tsk) |
| +{ |
| + __run_posix_cpu_timers(tsk); |
| +} |
| +#endif /* CONFIG_PREEMPT_RT_BASE */ |
| + |
| /* |
| * Set one of the process-wide special case CPU timers or RLIMIT_CPU. |
| * The tsk->sighand->siglock must be held by the caller. |