| From: Steven Rostedt <rostedt@goodmis.org> |
| Date: Tue, 14 Jul 2015 14:26:34 +0200 |
| Subject: futex: Fix bug on when a requeued RT task times out |
| |
| Requeue with timeout causes a bug with PREEMPT_RT. |
| |
| The bug comes from a timed out condition. |
| |
| |
| TASK 1 TASK 2 |
| ------ ------ |
| futex_wait_requeue_pi() |
| futex_wait_queue_me() |
| <timed out> |
| |
| double_lock_hb(); |
| |
| raw_spin_lock(pi_lock); |
| if (current->pi_blocked_on) { |
| } else { |
| current->pi_blocked_on = PI_WAKE_INPROGRESS; |
| run_spin_unlock(pi_lock); |
| spin_lock(hb->lock); <-- blocked! |
| |
| |
| plist_for_each_entry_safe(this) { |
| rt_mutex_start_proxy_lock(); |
| task_blocks_on_rt_mutex(); |
| BUG_ON(task->pi_blocked_on)!!!! |
| |
| The BUG_ON() actually has a check for PI_WAKE_INPROGRESS, but the |
| problem is that, after TASK 1 sets PI_WAKE_INPROGRESS, it then tries to |
| grab the hb->lock, which it fails to do so. As the hb->lock is a mutex, |
| it will block and set the "pi_blocked_on" to the hb->lock. |
| |
| When TASK 2 goes to requeue it, the check for PI_WAKE_INPROGESS fails |
| because the task1's pi_blocked_on is no longer set to that, but instead, |
| set to the hb->lock. |
| |
| The fix: |
| |
| When calling rt_mutex_start_proxy_lock() a check is made to see |
| if the proxy tasks pi_blocked_on is set. If so, exit out early. |
| Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies |
| the proxy task that it is being requeued, and will handle things |
| appropriately. |
| |
| |
| Signed-off-by: Steven Rostedt <rostedt@goodmis.org> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| --- |
| kernel/locking/rtmutex.c | 31 ++++++++++++++++++++++++++++++- |
| kernel/locking/rtmutex_common.h | 1 + |
| 2 files changed, 31 insertions(+), 1 deletion(-) |
| |
| --- a/kernel/locking/rtmutex.c |
| +++ b/kernel/locking/rtmutex.c |
| @@ -138,7 +138,8 @@ static void fixup_rt_mutex_waiters(struc |
| |
| static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) |
| { |
| - return waiter && waiter != PI_WAKEUP_INPROGRESS; |
| + return waiter && waiter != PI_WAKEUP_INPROGRESS && |
| + waiter != PI_REQUEUE_INPROGRESS; |
| } |
| |
| /* |
| @@ -1779,6 +1780,34 @@ int __rt_mutex_start_proxy_lock(struct r |
| if (try_to_take_rt_mutex(lock, task, NULL)) |
| return 1; |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| + /* |
| + * In PREEMPT_RT there's an added race. |
| + * If the task, that we are about to requeue, times out, |
| + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue |
| + * to skip this task. But right after the task sets |
| + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then |
| + * block on the spin_lock(&hb->lock), which in RT is an rtmutex. |
| + * This will replace the PI_WAKEUP_INPROGRESS with the actual |
| + * lock that it blocks on. We *must not* place this task |
| + * on this proxy lock in that case. |
| + * |
| + * To prevent this race, we first take the task's pi_lock |
| + * and check if it has updated its pi_blocked_on. If it has, |
| + * we assume that it woke up and we return -EAGAIN. |
| + * Otherwise, we set the task's pi_blocked_on to |
| + * PI_REQUEUE_INPROGRESS, so that if the task is waking up |
| + * it will know that we are in the process of requeuing it. |
| + */ |
| + raw_spin_lock(&task->pi_lock); |
| + if (task->pi_blocked_on) { |
| + raw_spin_unlock(&task->pi_lock); |
| + return -EAGAIN; |
| + } |
| + task->pi_blocked_on = PI_REQUEUE_INPROGRESS; |
| + raw_spin_unlock(&task->pi_lock); |
| +#endif |
| + |
| /* We enforce deadlock detection for futexes */ |
| ret = task_blocks_on_rt_mutex(lock, waiter, task, |
| RT_MUTEX_FULL_CHAINWALK); |
| --- a/kernel/locking/rtmutex_common.h |
| +++ b/kernel/locking/rtmutex_common.h |
| @@ -131,6 +131,7 @@ enum rtmutex_chainwalk { |
| * PI-futex support (proxy locking functions, etc.): |
| */ |
| #define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) |
| +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2) |
| |
| extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); |
| extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, |