| Subject: futex: Handle faults correctly for PI futexes |
| From: Thomas Gleixner <tglx@linutronix.de> |
| Date: Mon Jan 18 19:01:21 2021 +0100 |
| |
| From: Thomas Gleixner <tglx@linutronix.de> |
| |
| commit 34b1a1ce1458f50ef27c54e28eb9b1947012907a upstream |
| |
| fixup_pi_state_owner() tries to ensure that the state of the rtmutex, |
| pi_state and the user space value related to the PI futex are consistent |
| before returning to user space. In case that the user space value update |
| faults and the fault cannot be resolved by faulting the page in via |
| fault_in_user_writeable() the function returns with -EFAULT and leaves |
| the rtmutex and pi_state owner state inconsistent. |
| |
| A subsequent futex_unlock_pi() operates on the inconsistent pi_state and |
| releases the rtmutex despite not owning it which can corrupt the RB tree of |
| the rtmutex and cause a subsequent kernel stack use after free. |
| |
| It was suggested to loop forever in fixup_pi_state_owner() if the fault |
| cannot be resolved, but that results in runaway tasks which is especially |
| undesired when the problem happens due to a programming error and not due |
| to malice. |
| |
| As the user space value cannot be fixed up, the proper solution is to make |
| the rtmutex and the pi_state consistent so both have the same owner. This |
| leaves the user space value out of sync. Any subsequent operation on the |
| futex will fail because the 10th rule of PI futexes (pi_state owner and |
| user space value are consistent) has been violated. |
| |
| As a consequence this removes the inept attempts of 'fixing' the situation |
| in case that the current task owns the rtmutex when returning with an |
| unresolvable fault by unlocking the rtmutex which left pi_state::owner and |
| rtmutex::owner out of sync in a different and only slightly less dangerous |
| way. |
| |
| Fixes: 1b7558e457ed ("futexes: fix fault handling in futex_lock_pi") |
| Reported-by: gzobqq@gmail.com |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Cc: stable@vger.kernel.org |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| kernel/futex.c | 56 ++++++++++++++++++++------------------------------------ |
| 1 file changed, 20 insertions(+), 36 deletions(-) |
| |
| --- a/kernel/futex.c |
| +++ b/kernel/futex.c |
| @@ -1052,7 +1052,8 @@ static inline void exit_pi_state_list(st |
| * FUTEX_OWNER_DIED bit. See [4] |
| * |
| * [10] There is no transient state which leaves owner and user space |
| - * TID out of sync. |
| + * TID out of sync. Except one error case where the kernel is denied |
| + * write access to the user address, see fixup_pi_state_owner(). |
| * |
| * |
| * Serialization and lifetime rules: |
| @@ -2613,6 +2614,24 @@ handle_err: |
| if (!err) |
| goto retry; |
| |
| + /* |
| + * fault_in_user_writeable() failed so user state is immutable. At |
| + * best we can make the kernel state consistent but user state will |
| + * be most likely hosed and any subsequent unlock operation will be |
| + * rejected due to PI futex rule [10]. |
| + * |
| + * Ensure that the rtmutex owner is also the pi_state owner despite |
| + * the user space value claiming something different. There is no |
| + * point in unlocking the rtmutex if current is the owner as it |
| + * would need to wait until the next waiter has taken the rtmutex |
| + * to guarantee consistent state. Keep it simple. Userspace asked |
| + * for this wreckaged state. |
| + * |
| + * The rtmutex has an owner - either current or some other |
| + * task. See the EAGAIN loop above. |
| + */ |
| + pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); |
| + |
| return err; |
| } |
| |
| @@ -2893,7 +2912,6 @@ static int futex_lock_pi(u32 __user *uad |
| ktime_t *time, int trylock) |
| { |
| struct hrtimer_sleeper timeout, *to; |
| - struct futex_pi_state *pi_state = NULL; |
| struct task_struct *exiting = NULL; |
| struct rt_mutex_waiter rt_waiter; |
| struct futex_hash_bucket *hb; |
| @@ -3030,23 +3048,9 @@ no_block: |
| if (res) |
| ret = (res < 0) ? res : 0; |
| |
| - /* |
| - * If fixup_owner() faulted and was unable to handle the fault, unlock |
| - * it and return the fault to userspace. |
| - */ |
| - if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) { |
| - pi_state = q.pi_state; |
| - get_pi_state(pi_state); |
| - } |
| - |
| /* Unqueue and drop the lock */ |
| unqueue_me_pi(&q); |
| |
| - if (pi_state) { |
| - rt_mutex_futex_unlock(&pi_state->pi_mutex); |
| - put_pi_state(pi_state); |
| - } |
| - |
| goto out_put_key; |
| |
| out_unlock_put_key: |
| @@ -3312,7 +3316,6 @@ static int futex_wait_requeue_pi(u32 __u |
| u32 __user *uaddr2) |
| { |
| struct hrtimer_sleeper timeout, *to; |
| - struct futex_pi_state *pi_state = NULL; |
| struct rt_mutex_waiter rt_waiter; |
| struct futex_hash_bucket *hb; |
| union futex_key key2 = FUTEX_KEY_INIT; |
| @@ -3390,10 +3393,6 @@ static int futex_wait_requeue_pi(u32 __u |
| if (q.pi_state && (q.pi_state->owner != current)) { |
| spin_lock(q.lock_ptr); |
| ret = fixup_pi_state_owner(uaddr2, &q, current); |
| - if (ret < 0 && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { |
| - pi_state = q.pi_state; |
| - get_pi_state(pi_state); |
| - } |
| /* |
| * Drop the reference to the pi state which |
| * the requeue_pi() code acquired for us. |
| @@ -3435,25 +3434,10 @@ static int futex_wait_requeue_pi(u32 __u |
| if (res) |
| ret = (res < 0) ? res : 0; |
| |
| - /* |
| - * If fixup_pi_state_owner() faulted and was unable to handle |
| - * the fault, unlock the rt_mutex and return the fault to |
| - * userspace. |
| - */ |
| - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { |
| - pi_state = q.pi_state; |
| - get_pi_state(pi_state); |
| - } |
| - |
| /* Unqueue and drop the lock. */ |
| unqueue_me_pi(&q); |
| } |
| |
| - if (pi_state) { |
| - rt_mutex_futex_unlock(&pi_state->pi_mutex); |
| - put_pi_state(pi_state); |
| - } |
| - |
| if (ret == -EINTR) { |
| /* |
| * We've already been requeued, but cannot restart by calling |