releases/5.4.94/futex_Handle_faults_correctly_for_PI_futexes.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 Subject: futex: Handle faults correctly for PI futexes
 From: Thomas Gleixner <tglx@linutronix.de>
 Date: Mon Jan 18 19:01:21 2021 +0100

 From: Thomas Gleixner <tglx@linutronix.de>

 commit 34b1a1ce1458f50ef27c54e28eb9b1947012907a upstream

 fixup_pi_state_owner() tries to ensure that the state of the rtmutex,
 pi_state and the user space value related to the PI futex are consistent
 before returning to user space. In case that the user space value update
 faults and the fault cannot be resolved by faulting the page in via
 fault_in_user_writeable() the function returns with -EFAULT and leaves
 the rtmutex and pi_state owner state inconsistent.

 A subsequent futex_unlock_pi() operates on the inconsistent pi_state and
 releases the rtmutex despite not owning it which can corrupt the RB tree of
 the rtmutex and cause a subsequent kernel stack use after free.

 It was suggested to loop forever in fixup_pi_state_owner() if the fault
 cannot be resolved, but that results in runaway tasks which is especially
 undesired when the problem happens due to a programming error and not due
 to malice.

 As the user space value cannot be fixed up, the proper solution is to make
 the rtmutex and the pi_state consistent so both have the same owner. This
 leaves the user space value out of sync. Any subsequent operation on the
 futex will fail because the 10th rule of PI futexes (pi_state owner and
 user space value are consistent) has been violated.

 As a consequence this removes the inept attempts of 'fixing' the situation
 in case that the current task owns the rtmutex when returning with an
 unresolvable fault by unlocking the rtmutex which left pi_state::owner and
 rtmutex::owner out of sync in a different and only slightly less dangerous
 way.

 Fixes: 1b7558e457ed ("futexes: fix fault handling in futex_lock_pi")
 Reported-by: gzobqq@gmail.com
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
 Cc: stable@vger.kernel.org
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 ---
  kernel/futex.c |   56 ++++++++++++++++++++------------------------------------
  1 file changed, 20 insertions(+), 36 deletions(-)

 --- a/kernel/futex.c
 +++ b/kernel/futex.c
 @@ -1052,7 +1052,8 @@ static inline void exit_pi_state_list(st
   *	FUTEX_OWNER_DIED bit. See [4]
   *
   * [10] There is no transient state which leaves owner and user space
 - *	TID out of sync.
 + *	TID out of sync. Except one error case where the kernel is denied
 + *	write access to the user address, see fixup_pi_state_owner().
   *
   *
   * Serialization and lifetime rules:
 @@ -2613,6 +2614,24 @@ handle_err:
  	if (!err)
  		goto retry;

 +	/*
 +	 * fault_in_user_writeable() failed so user state is immutable. At
 +	 * best we can make the kernel state consistent but user state will
 +	 * be most likely hosed and any subsequent unlock operation will be
 +	 * rejected due to PI futex rule [10].
 +	 *
 +	 * Ensure that the rtmutex owner is also the pi_state owner despite
 +	 * the user space value claiming something different. There is no
 +	 * point in unlocking the rtmutex if current is the owner as it
 +	 * would need to wait until the next waiter has taken the rtmutex
 +	 * to guarantee consistent state. Keep it simple. Userspace asked
 +	 * for this wreckaged state.
 +	 *
 +	 * The rtmutex has an owner - either current or some other
 +	 * task. See the EAGAIN loop above.
 +	 */
 +	pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
 +
  	return err;
  }

 @@ -2893,7 +2912,6 @@ static int futex_lock_pi(u32 __user *uad
  			 ktime_t *time, int trylock)
  {
  	struct hrtimer_sleeper timeout, *to;
 -	struct futex_pi_state *pi_state = NULL;
  	struct task_struct *exiting = NULL;
  	struct rt_mutex_waiter rt_waiter;
  	struct futex_hash_bucket *hb;
 @@ -3030,23 +3048,9 @@ no_block:
  	if (res)
  		ret = (res < 0) ? res : 0;

 -	/*
 -	 * If fixup_owner() faulted and was unable to handle the fault, unlock
 -	 * it and return the fault to userspace.
 -	 */
 -	if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
 -		pi_state = q.pi_state;
 -		get_pi_state(pi_state);
 -	}
 -
  	/* Unqueue and drop the lock */
  	unqueue_me_pi(&q);

 -	if (pi_state) {
 -		rt_mutex_futex_unlock(&pi_state->pi_mutex);
 -		put_pi_state(pi_state);
 -	}
 -
  	goto out_put_key;

  out_unlock_put_key:
 @@ -3312,7 +3316,6 @@ static int futex_wait_requeue_pi(u32 __u
  				 u32 __user *uaddr2)
  {
  	struct hrtimer_sleeper timeout, *to;
 -	struct futex_pi_state *pi_state = NULL;
  	struct rt_mutex_waiter rt_waiter;
  	struct futex_hash_bucket *hb;
  	union futex_key key2 = FUTEX_KEY_INIT;
 @@ -3390,10 +3393,6 @@ static int futex_wait_requeue_pi(u32 __u
  		if (q.pi_state && (q.pi_state->owner != current)) {
  			spin_lock(q.lock_ptr);
  			ret = fixup_pi_state_owner(uaddr2, &q, current);
 -			if (ret < 0 && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
 -				pi_state = q.pi_state;
 -				get_pi_state(pi_state);
 -			}
  			/*
  			 * Drop the reference to the pi state which
  			 * the requeue_pi() code acquired for us.
 @@ -3435,25 +3434,10 @@ static int futex_wait_requeue_pi(u32 __u
  		if (res)
  			ret = (res < 0) ? res : 0;

 -		/*
 -		 * If fixup_pi_state_owner() faulted and was unable to handle
 -		 * the fault, unlock the rt_mutex and return the fault to
 -		 * userspace.
 -		 */
 -		if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
 -			pi_state = q.pi_state;
 -			get_pi_state(pi_state);
 -		}
 -
  		/* Unqueue and drop the lock. */
  		unqueue_me_pi(&q);
  	}

 -	if (pi_state) {
 -		rt_mutex_futex_unlock(&pi_state->pi_mutex);
 -		put_pi_state(pi_state);
 -	}
 -
  	if (ret == -EINTR) {
  		/*
  		 * We've already been requeued, but cannot restart by calling
	Subject: futex: Handle faults correctly for PI futexes
	From: Thomas Gleixner <tglx@linutronix.de>
	Date: Mon Jan 18 19:01:21 2021 +0100

	From: Thomas Gleixner <tglx@linutronix.de>

	commit 34b1a1ce1458f50ef27c54e28eb9b1947012907a upstream

	fixup_pi_state_owner() tries to ensure that the state of the rtmutex,
	pi_state and the user space value related to the PI futex are consistent
	before returning to user space. In case that the user space value update
	faults and the fault cannot be resolved by faulting the page in via
	fault_in_user_writeable() the function returns with -EFAULT and leaves
	the rtmutex and pi_state owner state inconsistent.

	A subsequent futex_unlock_pi() operates on the inconsistent pi_state and
	releases the rtmutex despite not owning it which can corrupt the RB tree of
	the rtmutex and cause a subsequent kernel stack use after free.

	It was suggested to loop forever in fixup_pi_state_owner() if the fault
	cannot be resolved, but that results in runaway tasks which is especially
	undesired when the problem happens due to a programming error and not due
	to malice.

	As the user space value cannot be fixed up, the proper solution is to make
	the rtmutex and the pi_state consistent so both have the same owner. This
	leaves the user space value out of sync. Any subsequent operation on the
	futex will fail because the 10th rule of PI futexes (pi_state owner and
	user space value are consistent) has been violated.

	As a consequence this removes the inept attempts of 'fixing' the situation
	in case that the current task owns the rtmutex when returning with an
	unresolvable fault by unlocking the rtmutex which left pi_state::owner and
	rtmutex::owner out of sync in a different and only slightly less dangerous
	way.

	Fixes: 1b7558e457ed ("futexes: fix fault handling in futex_lock_pi")
	Reported-by: gzobqq@gmail.com
	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
	Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
	Cc: stable@vger.kernel.org
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	---
	kernel/futex.c \| 56 ++++++++++++++++++++------------------------------------
	1 file changed, 20 insertions(+), 36 deletions(-)

	--- a/kernel/futex.c
	+++ b/kernel/futex.c
	@@ -1052,7 +1052,8 @@ static inline void exit_pi_state_list(st
	* FUTEX_OWNER_DIED bit. See [4]
	*
	* [10] There is no transient state which leaves owner and user space
	- * TID out of sync.
	+ * TID out of sync. Except one error case where the kernel is denied
	+ * write access to the user address, see fixup_pi_state_owner().
	*
	*
	* Serialization and lifetime rules:
	@@ -2613,6 +2614,24 @@ handle_err:
	if (!err)
	goto retry;

	+ /*
	+ * fault_in_user_writeable() failed so user state is immutable. At
	+ * best we can make the kernel state consistent but user state will
	+ * be most likely hosed and any subsequent unlock operation will be
	+ * rejected due to PI futex rule [10].
	+ *
	+ * Ensure that the rtmutex owner is also the pi_state owner despite
	+ * the user space value claiming something different. There is no
	+ * point in unlocking the rtmutex if current is the owner as it
	+ * would need to wait until the next waiter has taken the rtmutex
	+ * to guarantee consistent state. Keep it simple. Userspace asked
	+ * for this wreckaged state.
	+ *
	+ * The rtmutex has an owner - either current or some other
	+ * task. See the EAGAIN loop above.
	+ */
	+ pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
	+
	return err;
	}

	@@ -2893,7 +2912,6 @@ static int futex_lock_pi(u32 __user *uad
	ktime_t *time, int trylock)
	{
	struct hrtimer_sleeper timeout, *to;
	- struct futex_pi_state *pi_state = NULL;
	struct task_struct *exiting = NULL;
	struct rt_mutex_waiter rt_waiter;
	struct futex_hash_bucket *hb;
	@@ -3030,23 +3048,9 @@ no_block:
	if (res)
	ret = (res < 0) ? res : 0;

	- /*
	- * If fixup_owner() faulted and was unable to handle the fault, unlock
	- * it and return the fault to userspace.
	- */
	- if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
	- pi_state = q.pi_state;
	- get_pi_state(pi_state);
	- }
	-
	/* Unqueue and drop the lock */
	unqueue_me_pi(&q);

	- if (pi_state) {
	- rt_mutex_futex_unlock(&pi_state->pi_mutex);
	- put_pi_state(pi_state);
	- }
	-
	goto out_put_key;

	out_unlock_put_key:
	@@ -3312,7 +3316,6 @@ static int futex_wait_requeue_pi(u32 __u
	u32 __user *uaddr2)
	{
	struct hrtimer_sleeper timeout, *to;
	- struct futex_pi_state *pi_state = NULL;
	struct rt_mutex_waiter rt_waiter;
	struct futex_hash_bucket *hb;
	union futex_key key2 = FUTEX_KEY_INIT;
	@@ -3390,10 +3393,6 @@ static int futex_wait_requeue_pi(u32 __u
	if (q.pi_state && (q.pi_state->owner != current)) {
	spin_lock(q.lock_ptr);
	ret = fixup_pi_state_owner(uaddr2, &q, current);
	- if (ret < 0 && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
	- pi_state = q.pi_state;
	- get_pi_state(pi_state);
	- }
	/*
	* Drop the reference to the pi state which
	* the requeue_pi() code acquired for us.
	@@ -3435,25 +3434,10 @@ static int futex_wait_requeue_pi(u32 __u
	if (res)
	ret = (res < 0) ? res : 0;

	- /*
	- * If fixup_pi_state_owner() faulted and was unable to handle
	- * the fault, unlock the rt_mutex and return the fault to
	- * userspace.
	- */
	- if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
	- pi_state = q.pi_state;
	- get_pi_state(pi_state);
	- }
	-
	/* Unqueue and drop the lock. */
	unqueue_me_pi(&q);
	}

	- if (pi_state) {
	- rt_mutex_futex_unlock(&pi_state->pi_mutex);
	- put_pi_state(pi_state);
	- }
	-
	if (ret == -EINTR) {
	/*
	* We've already been requeued, but cannot restart by calling