| From 4887c91c6ce8806230f95e29ce90641e2d53b424 Mon Sep 17 00:00:00 2001 |
| From: Thomas Gleixner <tglx@linutronix.de> |
| Date: Fri, 3 Jul 2009 08:44:24 -0500 |
| Subject: [PATCH] rtmutex: prevent missed wakeups |
| |
| commit 487ac70841b3165e207442b071dc943147612de6 in tip. |
| |
| The sleeping locks implementation based on rtmutexes can miss wakeups |
| for two reasons: |
| |
| 1) The unconditional usage TASK_UNINTERRUPTIBLE for the blocking state |
| |
| Results in missed wakeups from wake_up_interruptible*() |
| |
| state = TASK_INTERRUPTIBLE; |
| blocks_on_lock() |
| state = TASK_UNINTERRUPTIBLE; |
| schedule(); |
| .... |
| acquires_lock(); |
| restore_state(); |
| |
| Until the waiter has restored its state wake_up_interruptible*() will |
| fail. |
| |
| 2) The rtmutex wakeup intermediate state TASK_RUNNING_MUTEX |
| |
| Results in missed wakeups from wake_up*() |
| |
| waiter is woken by mutex wakeup |
| waiter->state = TASK_RUNNING_MUTEX; |
| .... |
| acquires_lock(); |
| restore_state(); |
| |
| Until the waiter has restored its state wake_up*() will fail. |
| |
| Solution: |
| |
| Instead of setting the state to TASK_RUNNING_MUTEX in the mutex wakeup |
| case we logically OR TASK_RUNNING_MUTEX to the current waiter |
| state. This keeps the original bits (TASK_INTERRUPTIBLE / |
| TASK_UNINTERRUPTIBLE) intact and lets wakeups succeed. When a task |
| blocks on a lock in state TASK_INTERRUPTIBLE and is woken up by a real |
| wakeup, then we store the state = TASK_RUNNING for the restore and can |
| safely use TASK_UNINTERRUPTIBLE from that point to avoid further |
| wakeups which just let us loop in the lock code. |
| |
| This also removes the extra TASK_RUNNING_MUTEX flags from the |
| wakeup_process*() functions as they are not longer necessary. |
| |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/include/linux/sched.h b/include/linux/sched.h |
| index 3bb5def..a958544 100644 |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -214,8 +214,7 @@ extern char ___assert_task_state[1 - 2*!!( |
| |
| /* Convenience macros for the sake of wake_up */ |
| #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) |
| -#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED | \ |
| - TASK_RUNNING_MUTEX) |
| +#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) |
| |
| /* get_task_state() */ |
| #define TASK_REPORT (TASK_RUNNING | TASK_RUNNING_MUTEX | \ |
| diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c |
| index a4c8b7e..900f11c 100644 |
| --- a/kernel/rtmutex.c |
| +++ b/kernel/rtmutex.c |
| @@ -729,16 +729,32 @@ static int adaptive_wait(struct rt_mutex_waiter *waiter, |
| /* |
| * The state setting needs to preserve the original state and needs to |
| * take care of non rtmutex wakeups. |
| + * |
| + * Called with rtmutex->wait_lock held to serialize against rtmutex |
| + * wakeups(). |
| */ |
| static inline unsigned long |
| rt_set_current_blocked_state(unsigned long saved_state) |
| { |
| - unsigned long state; |
| + unsigned long state, block_state; |
| + |
| + /* |
| + * If state is TASK_INTERRUPTIBLE, then we set the state for |
| + * blocking to TASK_INTERRUPTIBLE as well, otherwise we would |
| + * miss real wakeups via wake_up_interruptible(). If such a |
| + * wakeup happens we see the running state and preserve it in |
| + * saved_state. Now we can ignore further wakeups as we will |
| + * return in state running from our "spin" sleep. |
| + */ |
| + if (saved_state == TASK_INTERRUPTIBLE) |
| + block_state = TASK_INTERRUPTIBLE; |
| + else |
| + block_state = TASK_UNINTERRUPTIBLE; |
| |
| - state = xchg(¤t->state, TASK_UNINTERRUPTIBLE); |
| + state = xchg(¤t->state, block_state); |
| /* |
| * Take care of non rtmutex wakeups. rtmutex wakeups |
| - * set the state to TASK_RUNNING_MUTEX. |
| + * or TASK_RUNNING_MUTEX to (UN)INTERRUPTIBLE. |
| */ |
| if (state == TASK_RUNNING) |
| saved_state = TASK_RUNNING; |
| diff --git a/kernel/sched.c b/kernel/sched.c |
| index ebbd311..6be80f6 100644 |
| --- a/kernel/sched.c |
| +++ b/kernel/sched.c |
| @@ -2475,8 +2475,16 @@ out_running: |
| trace_sched_wakeup(rq, p, success); |
| check_preempt_curr(rq, p, wake_flags); |
| |
| + /* |
| + * For a mutex wakeup we or TASK_RUNNING_MUTEX to the task |
| + * state to preserve the original state, so a real wakeup |
| + * still can see the (UN)INTERRUPTIBLE bits in the state check |
| + * above. We dont have to worry about the | TASK_RUNNING_MUTEX |
| + * here. The waiter is serialized by the mutex lock and nobody |
| + * else can fiddle with p->state as we hold rq lock. |
| + */ |
| if (mutex) |
| - p->state = TASK_RUNNING_MUTEX; |
| + p->state |= TASK_RUNNING_MUTEX; |
| else |
| p->state = TASK_RUNNING; |
| #ifdef CONFIG_SMP |
| @@ -2538,7 +2546,7 @@ EXPORT_SYMBOL(wake_up_process_mutex_sync); |
| |
| int wake_up_state(struct task_struct *p, unsigned int state) |
| { |
| - return try_to_wake_up(p, state | TASK_RUNNING_MUTEX, 0, 0); |
| + return try_to_wake_up(p, state, 0, 0); |
| } |
| |
| /* |
| @@ -5547,7 +5555,7 @@ need_resched_nonpreemptible: |
| update_rq_clock(rq); |
| clear_tsk_need_resched(prev); |
| |
| - if ((prev->state & ~TASK_RUNNING_MUTEX) && |
| + if (!(prev->state & TASK_RUNNING_MUTEX) && prev->state && |
| !(preempt_count() & PREEMPT_ACTIVE)) { |
| if (unlikely(signal_pending_state(prev->state, prev))) |
| prev->state = TASK_RUNNING; |
| @@ -5749,8 +5757,7 @@ asmlinkage void __sched preempt_schedule_irq(void) |
| int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, |
| void *key) |
| { |
| - return try_to_wake_up(curr->private, mode | TASK_RUNNING_MUTEX, |
| - wake_flags, 0); |
| + return try_to_wake_up(curr->private, mode, wake_flags, 0); |
| } |
| EXPORT_SYMBOL(default_wake_function); |
| |
| -- |
| 1.7.1.1 |
| |