| From 39a6655cbc80276b6167563f3f8f10ceb4cf65fc Mon Sep 17 00:00:00 2001 |
| From: Steven Rostedt <rostedt@goodmis.org> |
| Date: Tue, 4 Mar 2014 12:28:32 -0500 |
| Subject: [PATCH] cpu_chill: Add a UNINTERRUPTIBLE hrtimer_nanosleep |
| |
| We hit another bug that was caused by switching cpu_chill() from |
| msleep() to hrtimer_nanosleep(). |
| |
| This time it is a livelock. The problem is that hrtimer_nanosleep() |
| calls schedule with the state == TASK_INTERRUPTIBLE. But these means |
| that if a signal is pending, the scheduler wont schedule, and will |
| simply change the current task state back to TASK_RUNNING. This |
| nullifies the whole point of cpu_chill() in the first place. That is, |
| if a task is spinning on a try_lock() and it preempted the owner of the |
| lock, if it has a signal pending, it will never give up the CPU to let |
| the owner of the lock run. |
| |
| I made a static function __hrtimer_nanosleep() that takes a fifth |
| parameter "state", which determines the task state of that the |
| nanosleep() will be in. The normal hrtimer_nanosleep() will act the |
| same, but cpu_chill() will call the __hrtimer_nanosleep() directly with |
| the TASK_UNINTERRUPTIBLE state. |
| |
| cpu_chill() only cares that the first sleep happens, and does not care |
| about the state of the restart schedule (in hrtimer_nanosleep_restart). |
| |
| Reported-by: Ulrich Obergfell <uobergfe@redhat.com> |
| Signed-off-by: Steven Rostedt <rostedt@goodmis.org> |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| [PG: promote timespec to timespec64 to match mainline and fix 32 bit builds] |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c |
| index 2a7f1ab0b21c..fd26eade86a3 100644 |
| --- a/kernel/time/hrtimer.c |
| +++ b/kernel/time/hrtimer.c |
| @@ -1664,10 +1664,11 @@ EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack); |
| #endif |
| |
| |
| -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) |
| +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode, |
| + unsigned long state) |
| { |
| do { |
| - set_current_state(TASK_INTERRUPTIBLE); |
| + set_current_state(state); |
| hrtimer_start_expires(&t->timer, mode); |
| |
| if (likely(t->task)) |
| @@ -1709,7 +1710,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) |
| HRTIMER_MODE_ABS, current); |
| hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); |
| |
| - if (do_nanosleep(&t, HRTIMER_MODE_ABS)) |
| + /* cpu_chill() does not care about restart state. */ |
| + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE)) |
| goto out; |
| |
| rmtp = restart->nanosleep.rmtp; |
| @@ -1726,8 +1728,10 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) |
| return ret; |
| } |
| |
| -long hrtimer_nanosleep(struct timespec64 *rqtp, struct timespec __user *rmtp, |
| - const enum hrtimer_mode mode, const clockid_t clockid) |
| +static long |
| +__hrtimer_nanosleep(struct timespec64 *rqtp, struct timespec __user *rmtp, |
| + const enum hrtimer_mode mode, const clockid_t clockid, |
| + unsigned long state) |
| { |
| struct restart_block *restart; |
| struct hrtimer_sleeper t; |
| @@ -1741,7 +1745,7 @@ long hrtimer_nanosleep(struct timespec64 *rqtp, struct timespec __user *rmtp, |
| hrtimer_init_sleeper_on_stack(&t, clockid, mode, current); |
| hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); |
| |
| - if (do_nanosleep(&t, mode)) |
| + if (do_nanosleep(&t, mode, state)) |
| goto out; |
| |
| /* Absolute timers do not update the rmtp value and restart: */ |
| @@ -1768,6 +1772,12 @@ long hrtimer_nanosleep(struct timespec64 *rqtp, struct timespec __user *rmtp, |
| return ret; |
| } |
| |
| +long hrtimer_nanosleep(struct timespec64 *rqtp, struct timespec __user *rmtp, |
| + const enum hrtimer_mode mode, const clockid_t clockid) |
| +{ |
| + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE); |
| +} |
| + |
| SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, |
| struct timespec __user *, rmtp) |
| { |
| @@ -1790,13 +1800,14 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, |
| */ |
| void cpu_chill(void) |
| { |
| - struct timespec tu = { |
| + struct timespec64 tu = { |
| .tv_nsec = NSEC_PER_MSEC, |
| }; |
| unsigned int freeze_flag = current->flags & PF_NOFREEZE; |
| |
| current->flags |= PF_NOFREEZE; |
| - hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC_HARD); |
| + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC_HARD, |
| + TASK_UNINTERRUPTIBLE); |
| if (!freeze_flag) |
| current->flags &= ~PF_NOFREEZE; |
| } |
| -- |
| 2.1.4 |
| |