blob: 1efc9fdf30c78d2d1bcaf85adca6db43be78a6b7 [file] [log] [blame]
From 4887c91c6ce8806230f95e29ce90641e2d53b424 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Jul 2009 08:44:24 -0500
Subject: [PATCH] rtmutex: prevent missed wakeups
commit 487ac70841b3165e207442b071dc943147612de6 in tip.
The sleeping locks implementation based on rtmutexes can miss wakeups
for two reasons:
1) The unconditional usage TASK_UNINTERRUPTIBLE for the blocking state
Results in missed wakeups from wake_up_interruptible*()
state = TASK_INTERRUPTIBLE;
blocks_on_lock()
state = TASK_UNINTERRUPTIBLE;
schedule();
....
acquires_lock();
restore_state();
Until the waiter has restored its state wake_up_interruptible*() will
fail.
2) The rtmutex wakeup intermediate state TASK_RUNNING_MUTEX
Results in missed wakeups from wake_up*()
waiter is woken by mutex wakeup
waiter->state = TASK_RUNNING_MUTEX;
....
acquires_lock();
restore_state();
Until the waiter has restored its state wake_up*() will fail.
Solution:
Instead of setting the state to TASK_RUNNING_MUTEX in the mutex wakeup
case we logically OR TASK_RUNNING_MUTEX to the current waiter
state. This keeps the original bits (TASK_INTERRUPTIBLE /
TASK_UNINTERRUPTIBLE) intact and lets wakeups succeed. When a task
blocks on a lock in state TASK_INTERRUPTIBLE and is woken up by a real
wakeup, then we store the state = TASK_RUNNING for the restore and can
safely use TASK_UNINTERRUPTIBLE from that point to avoid further
wakeups which just let us loop in the lock code.
This also removes the extra TASK_RUNNING_MUTEX flags from the
wakeup_process*() functions as they are not longer necessary.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3bb5def..a958544 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -214,8 +214,7 @@ extern char ___assert_task_state[1 - 2*!!(
/* Convenience macros for the sake of wake_up */
#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
-#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED | \
- TASK_RUNNING_MUTEX)
+#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
/* get_task_state() */
#define TASK_REPORT (TASK_RUNNING | TASK_RUNNING_MUTEX | \
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a4c8b7e..900f11c 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -729,16 +729,32 @@ static int adaptive_wait(struct rt_mutex_waiter *waiter,
/*
* The state setting needs to preserve the original state and needs to
* take care of non rtmutex wakeups.
+ *
+ * Called with rtmutex->wait_lock held to serialize against rtmutex
+ * wakeups().
*/
static inline unsigned long
rt_set_current_blocked_state(unsigned long saved_state)
{
- unsigned long state;
+ unsigned long state, block_state;
+
+ /*
+ * If state is TASK_INTERRUPTIBLE, then we set the state for
+ * blocking to TASK_INTERRUPTIBLE as well, otherwise we would
+ * miss real wakeups via wake_up_interruptible(). If such a
+ * wakeup happens we see the running state and preserve it in
+ * saved_state. Now we can ignore further wakeups as we will
+ * return in state running from our "spin" sleep.
+ */
+ if (saved_state == TASK_INTERRUPTIBLE)
+ block_state = TASK_INTERRUPTIBLE;
+ else
+ block_state = TASK_UNINTERRUPTIBLE;
- state = xchg(&current->state, TASK_UNINTERRUPTIBLE);
+ state = xchg(&current->state, block_state);
/*
* Take care of non rtmutex wakeups. rtmutex wakeups
- * set the state to TASK_RUNNING_MUTEX.
+ * or TASK_RUNNING_MUTEX to (UN)INTERRUPTIBLE.
*/
if (state == TASK_RUNNING)
saved_state = TASK_RUNNING;
diff --git a/kernel/sched.c b/kernel/sched.c
index ebbd311..6be80f6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2475,8 +2475,16 @@ out_running:
trace_sched_wakeup(rq, p, success);
check_preempt_curr(rq, p, wake_flags);
+ /*
+ * For a mutex wakeup we or TASK_RUNNING_MUTEX to the task
+ * state to preserve the original state, so a real wakeup
+ * still can see the (UN)INTERRUPTIBLE bits in the state check
+ * above. We dont have to worry about the | TASK_RUNNING_MUTEX
+ * here. The waiter is serialized by the mutex lock and nobody
+ * else can fiddle with p->state as we hold rq lock.
+ */
if (mutex)
- p->state = TASK_RUNNING_MUTEX;
+ p->state |= TASK_RUNNING_MUTEX;
else
p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
@@ -2538,7 +2546,7 @@ EXPORT_SYMBOL(wake_up_process_mutex_sync);
int wake_up_state(struct task_struct *p, unsigned int state)
{
- return try_to_wake_up(p, state | TASK_RUNNING_MUTEX, 0, 0);
+ return try_to_wake_up(p, state, 0, 0);
}
/*
@@ -5547,7 +5555,7 @@ need_resched_nonpreemptible:
update_rq_clock(rq);
clear_tsk_need_resched(prev);
- if ((prev->state & ~TASK_RUNNING_MUTEX) &&
+ if (!(prev->state & TASK_RUNNING_MUTEX) && prev->state &&
!(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely(signal_pending_state(prev->state, prev)))
prev->state = TASK_RUNNING;
@@ -5749,8 +5757,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
void *key)
{
- return try_to_wake_up(curr->private, mode | TASK_RUNNING_MUTEX,
- wake_flags, 0);
+ return try_to_wake_up(curr->private, mode, wake_flags, 0);
}
EXPORT_SYMBOL(default_wake_function);
--
1.7.1.1