| From: John Stultz <johnstul@us.ibm.com> |
| Date: Fri, 3 Jul 2009 08:29:58 -0500 |
| Subject: posix-timers: Thread posix-cpu-timers on -rt |
| |
| posix-cpu-timer code takes non -rt safe locks in hard irq |
| context. Move it to a thread. |
| |
| [ 3.0 fixes from Peter Zijlstra <peterz@infradead.org> ] |
| |
| Signed-off-by: John Stultz <johnstul@us.ibm.com> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| |
| --- |
| include/linux/init_task.h | 7 + |
| include/linux/sched.h | 3 |
| kernel/fork.c | 3 |
| kernel/time/posix-cpu-timers.c | 193 ++++++++++++++++++++++++++++++++++++++++- |
| 4 files changed, 202 insertions(+), 4 deletions(-) |
| |
| --- a/include/linux/init_task.h |
| +++ b/include/linux/init_task.h |
| @@ -148,6 +148,12 @@ extern struct task_group root_task_group |
| # define INIT_PERF_EVENTS(tsk) |
| #endif |
| |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| +# define INIT_TIMER_LIST .posix_timer_list = NULL, |
| +#else |
| +# define INIT_TIMER_LIST |
| +#endif |
| + |
| #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN |
| # define INIT_VTIME(tsk) \ |
| .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ |
| @@ -239,6 +245,7 @@ extern struct task_group root_task_group |
| .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ |
| .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ |
| .timer_slack_ns = 50000, /* 50 usec default slack */ \ |
| + INIT_TIMER_LIST \ |
| .pids = { \ |
| [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ |
| [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -1635,6 +1635,9 @@ struct task_struct { |
| |
| struct task_cputime cputime_expires; |
| struct list_head cpu_timers[3]; |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| + struct task_struct *posix_timer_list; |
| +#endif |
| |
| /* process credentials */ |
| const struct cred __rcu *real_cred; /* objective and real subjective task |
| --- a/kernel/fork.c |
| +++ b/kernel/fork.c |
| @@ -1274,6 +1274,9 @@ static void rt_mutex_init_task(struct ta |
| */ |
| static void posix_cpu_timers_init(struct task_struct *tsk) |
| { |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| + tsk->posix_timer_list = NULL; |
| +#endif |
| tsk->cputime_expires.prof_exp = 0; |
| tsk->cputime_expires.virt_exp = 0; |
| tsk->cputime_expires.sched_exp = 0; |
| --- a/kernel/time/posix-cpu-timers.c |
| +++ b/kernel/time/posix-cpu-timers.c |
| @@ -3,6 +3,7 @@ |
| */ |
| |
| #include <linux/sched.h> |
| +#include <linux/sched/rt.h> |
| #include <linux/posix-timers.h> |
| #include <linux/errno.h> |
| #include <linux/math64.h> |
| @@ -620,7 +621,7 @@ static int posix_cpu_timer_set(struct k_ |
| /* |
| * Disarm any old timer after extracting its expiry time. |
| */ |
| - WARN_ON_ONCE(!irqs_disabled()); |
| + WARN_ON_ONCE_NONRT(!irqs_disabled()); |
| |
| ret = 0; |
| old_incr = timer->it.cpu.incr; |
| @@ -1064,7 +1065,7 @@ void posix_cpu_timer_schedule(struct k_i |
| /* |
| * Now re-arm for the new expiry time. |
| */ |
| - WARN_ON_ONCE(!irqs_disabled()); |
| + WARN_ON_ONCE_NONRT(!irqs_disabled()); |
| arm_timer(timer); |
| unlock_task_sighand(p, &flags); |
| |
| @@ -1153,13 +1154,13 @@ static inline int fastpath_timer_check(s |
| * already updated our counts. We need to check if any timers fire now. |
| * Interrupts are disabled. |
| */ |
| -void run_posix_cpu_timers(struct task_struct *tsk) |
| +static void __run_posix_cpu_timers(struct task_struct *tsk) |
| { |
| LIST_HEAD(firing); |
| struct k_itimer *timer, *next; |
| unsigned long flags; |
| |
| - WARN_ON_ONCE(!irqs_disabled()); |
| + WARN_ON_ONCE_NONRT(!irqs_disabled()); |
| |
| /* |
| * The fast path checks that there are no expired thread or thread |
| @@ -1213,6 +1214,190 @@ void run_posix_cpu_timers(struct task_st |
| } |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| +#include <linux/kthread.h> |
| +#include <linux/cpu.h> |
| +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); |
| +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); |
| + |
| +static int posix_cpu_timers_thread(void *data) |
| +{ |
| + int cpu = (long)data; |
| + |
| + BUG_ON(per_cpu(posix_timer_task,cpu) != current); |
| + |
| + while (!kthread_should_stop()) { |
| + struct task_struct *tsk = NULL; |
| + struct task_struct *next = NULL; |
| + |
| + if (cpu_is_offline(cpu)) |
| + goto wait_to_die; |
| + |
| + /* grab task list */ |
| + raw_local_irq_disable(); |
| + tsk = per_cpu(posix_timer_tasklist, cpu); |
| + per_cpu(posix_timer_tasklist, cpu) = NULL; |
| + raw_local_irq_enable(); |
| + |
| + /* its possible the list is empty, just return */ |
| + if (!tsk) { |
| + set_current_state(TASK_INTERRUPTIBLE); |
| + schedule(); |
| + __set_current_state(TASK_RUNNING); |
| + continue; |
| + } |
| + |
| + /* Process task list */ |
| + while (1) { |
| + /* save next */ |
| + next = tsk->posix_timer_list; |
| + |
| + /* run the task timers, clear its ptr and |
| + * unreference it |
| + */ |
| + __run_posix_cpu_timers(tsk); |
| + tsk->posix_timer_list = NULL; |
| + put_task_struct(tsk); |
| + |
| + /* check if this is the last on the list */ |
| + if (next == tsk) |
| + break; |
| + tsk = next; |
| + } |
| + } |
| + return 0; |
| + |
| +wait_to_die: |
| + /* Wait for kthread_stop */ |
| + set_current_state(TASK_INTERRUPTIBLE); |
| + while (!kthread_should_stop()) { |
| + schedule(); |
| + set_current_state(TASK_INTERRUPTIBLE); |
| + } |
| + __set_current_state(TASK_RUNNING); |
| + return 0; |
| +} |
| + |
| +static inline int __fastpath_timer_check(struct task_struct *tsk) |
| +{ |
| + /* tsk == current, ensure it is safe to use ->signal/sighand */ |
| + if (unlikely(tsk->exit_state)) |
| + return 0; |
| + |
| + if (!task_cputime_zero(&tsk->cputime_expires)) |
| + return 1; |
| + |
| + if (!task_cputime_zero(&tsk->signal->cputime_expires)) |
| + return 1; |
| + |
| + return 0; |
| +} |
| + |
| +void run_posix_cpu_timers(struct task_struct *tsk) |
| +{ |
| + unsigned long cpu = smp_processor_id(); |
| + struct task_struct *tasklist; |
| + |
| + BUG_ON(!irqs_disabled()); |
| + if(!per_cpu(posix_timer_task, cpu)) |
| + return; |
| + /* get per-cpu references */ |
| + tasklist = per_cpu(posix_timer_tasklist, cpu); |
| + |
| + /* check to see if we're already queued */ |
| + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { |
| + get_task_struct(tsk); |
| + if (tasklist) { |
| + tsk->posix_timer_list = tasklist; |
| + } else { |
| + /* |
| + * The list is terminated by a self-pointing |
| + * task_struct |
| + */ |
| + tsk->posix_timer_list = tsk; |
| + } |
| + per_cpu(posix_timer_tasklist, cpu) = tsk; |
| + |
| + wake_up_process(per_cpu(posix_timer_task, cpu)); |
| + } |
| +} |
| + |
| +/* |
| + * posix_cpu_thread_call - callback that gets triggered when a CPU is added. |
| + * Here we can start up the necessary migration thread for the new CPU. |
| + */ |
| +static int posix_cpu_thread_call(struct notifier_block *nfb, |
| + unsigned long action, void *hcpu) |
| +{ |
| + int cpu = (long)hcpu; |
| + struct task_struct *p; |
| + struct sched_param param; |
| + |
| + switch (action) { |
| + case CPU_UP_PREPARE: |
| + p = kthread_create(posix_cpu_timers_thread, hcpu, |
| + "posixcputmr/%d",cpu); |
| + if (IS_ERR(p)) |
| + return NOTIFY_BAD; |
| + p->flags |= PF_NOFREEZE; |
| + kthread_bind(p, cpu); |
| + /* Must be high prio to avoid getting starved */ |
| + param.sched_priority = MAX_RT_PRIO-1; |
| + sched_setscheduler(p, SCHED_FIFO, ¶m); |
| + per_cpu(posix_timer_task,cpu) = p; |
| + break; |
| + case CPU_ONLINE: |
| + /* Strictly unneccessary, as first user will wake it. */ |
| + wake_up_process(per_cpu(posix_timer_task,cpu)); |
| + break; |
| +#ifdef CONFIG_HOTPLUG_CPU |
| + case CPU_UP_CANCELED: |
| + /* Unbind it from offline cpu so it can run. Fall thru. */ |
| + kthread_bind(per_cpu(posix_timer_task, cpu), |
| + cpumask_any(cpu_online_mask)); |
| + kthread_stop(per_cpu(posix_timer_task,cpu)); |
| + per_cpu(posix_timer_task,cpu) = NULL; |
| + break; |
| + case CPU_DEAD: |
| + kthread_stop(per_cpu(posix_timer_task,cpu)); |
| + per_cpu(posix_timer_task,cpu) = NULL; |
| + break; |
| +#endif |
| + } |
| + return NOTIFY_OK; |
| +} |
| + |
| +/* Register at highest priority so that task migration (migrate_all_tasks) |
| + * happens before everything else. |
| + */ |
| +static struct notifier_block posix_cpu_thread_notifier = { |
| + .notifier_call = posix_cpu_thread_call, |
| + .priority = 10 |
| +}; |
| + |
| +static int __init posix_cpu_thread_init(void) |
| +{ |
| + void *hcpu = (void *)(long)smp_processor_id(); |
| + /* Start one for boot CPU. */ |
| + unsigned long cpu; |
| + |
| + /* init the per-cpu posix_timer_tasklets */ |
| + for_each_possible_cpu(cpu) |
| + per_cpu(posix_timer_tasklist, cpu) = NULL; |
| + |
| + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu); |
| + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu); |
| + register_cpu_notifier(&posix_cpu_thread_notifier); |
| + return 0; |
| +} |
| +early_initcall(posix_cpu_thread_init); |
| +#else /* CONFIG_PREEMPT_RT_BASE */ |
| +void run_posix_cpu_timers(struct task_struct *tsk) |
| +{ |
| + __run_posix_cpu_timers(tsk); |
| +} |
| +#endif /* CONFIG_PREEMPT_RT_BASE */ |
| + |
| /* |
| * Set one of the process-wide special case CPU timers or RLIMIT_CPU. |
| * The tsk->sighand->siglock must be held by the caller. |