releases/4.4.255/futex-replace-pf_exitpidone-with-a-state.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From foo@baz Mon Feb  1 04:21:37 PM CET 2021
 From: Lee Jones <lee.jones@linaro.org>
 Date: Mon,  1 Feb 2021 15:12:05 +0000
 Subject: futex: Replace PF_EXITPIDONE with a state
 To: stable@vger.kernel.org
 Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
 Message-ID: <20210201151214.2193508-4-lee.jones@linaro.org>

 From: Thomas Gleixner <tglx@linutronix.de>

 commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream.

 The futex exit handling relies on PF_ flags. That's suboptimal as it
 requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in
 the middle of do_exit() to enforce the observability of PF_EXITING in the
 futex code.

 Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic
 over to the new state. The PF_EXITING dependency will be cleaned up in a
 later step.

 This prepares for handling various futex exit issues later.

 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 Reviewed-by: Ingo Molnar <mingo@kernel.org>
 Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
 Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 Signed-off-by: Lee Jones <lee.jones@linaro.org>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 ---
  include/linux/futex.h |   34 ++++++++++++++++++++++++++++++++++
  include/linux/sched.h |    2 +-
  kernel/exit.c         |   18 ++----------------
  kernel/futex.c        |   17 ++++++++---------
  4 files changed, 45 insertions(+), 26 deletions(-)

 --- a/include/linux/futex.h
 +++ b/include/linux/futex.h
 @@ -55,6 +55,11 @@ union futex_key {
  #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }

  #ifdef CONFIG_FUTEX
 +enum {
 +	FUTEX_STATE_OK,
 +	FUTEX_STATE_DEAD,
 +};
 +
  static inline void futex_init_task(struct task_struct *tsk)
  {
  	tsk->robust_list = NULL;
 @@ -63,6 +68,34 @@ static inline void futex_init_task(struc
  #endif
  	INIT_LIST_HEAD(&tsk->pi_state_list);
  	tsk->pi_state_cache = NULL;
 +	tsk->futex_state = FUTEX_STATE_OK;
 +}
 +
 +/**
 + * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
 + * @tsk:	task to set the state on
 + *
 + * Set the futex exit state of the task lockless. The futex waiter code
 + * observes that state when a task is exiting and loops until the task has
 + * actually finished the futex cleanup. The worst case for this is that the
 + * waiter runs through the wait loop until the state becomes visible.
 + *
 + * This has two callers:
 + *
 + * - futex_mm_release() after the futex exit cleanup has been done
 + *
 + * - do_exit() from the recursive fault handling path.
 + *
 + * In case of a recursive fault this is best effort. Either the futex exit
 + * code has run already or not. If the OWNER_DIED bit has been set on the
 + * futex then the waiter can take it over. If not, the problem is pushed
 + * back to user space. If the futex exit code did not run yet, then an
 + * already queued waiter might block forever, but there is nothing which
 + * can be done about that.
 + */
 +static inline void futex_exit_done(struct task_struct *tsk)
 +{
 +	tsk->futex_state = FUTEX_STATE_DEAD;
  }

  void futex_mm_release(struct task_struct *tsk);
 @@ -72,5 +105,6 @@ long do_futex(u32 __user *uaddr, int op,
  #else
  static inline void futex_init_task(struct task_struct *tsk) { }
  static inline void futex_mm_release(struct task_struct *tsk) { }
 +static inline void futex_exit_done(struct task_struct *tsk) { }
  #endif
  #endif
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
 @@ -1704,6 +1704,7 @@ struct task_struct {
  #endif
  	struct list_head pi_state_list;
  	struct futex_pi_state *pi_state_cache;
 +	unsigned int futex_state;
  #endif
  #ifdef CONFIG_PERF_EVENTS
  	struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
 @@ -2099,7 +2100,6 @@ extern void thread_group_cputime_adjuste
   * Per process flags
   */
  #define PF_EXITING	0x00000004	/* getting shut down */
 -#define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
  #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
  #define PF_WQ_WORKER	0x00000020	/* I'm a workqueue worker */
  #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
 --- a/kernel/exit.c
 +++ b/kernel/exit.c
 @@ -695,16 +695,7 @@ void do_exit(long code)
  	 */
  	if (unlikely(tsk->flags & PF_EXITING)) {
  		pr_alert("Fixing recursive fault but reboot is needed!\n");
 -		/*
 -		 * We can do this unlocked here. The futex code uses
 -		 * this flag just to verify whether the pi state
 -		 * cleanup has been done or not. In the worst case it
 -		 * loops once more. We pretend that the cleanup was
 -		 * done as there is no way to return. Either the
 -		 * OWNER_DIED bit is set by now or we push the blocked
 -		 * task into the wait for ever nirwana as well.
 -		 */
 -		tsk->flags |= PF_EXITPIDONE;
 +		futex_exit_done(tsk);
  		set_current_state(TASK_UNINTERRUPTIBLE);
  		schedule();
  	}
 @@ -793,12 +784,7 @@ void do_exit(long code)
  	 * Make sure we are holding no locks:
  	 */
  	debug_check_no_locks_held();
 -	/*
 -	 * We can do this unlocked here. The futex code uses this flag
 -	 * just to verify whether the pi state cleanup has been done
 -	 * or not. In the worst case it loops once more.
 -	 */
 -	tsk->flags |= PF_EXITPIDONE;
 +	futex_exit_done(tsk);

  	if (tsk->io_context)
  		exit_io_context(tsk);
 --- a/kernel/futex.c
 +++ b/kernel/futex.c
 @@ -1094,19 +1094,18 @@ static int attach_to_pi_owner(u32 uval,
  	}

  	/*
 -	 * We need to look at the task state flags to figure out,
 -	 * whether the task is exiting. To protect against the do_exit
 -	 * change of the task flags, we do this protected by
 -	 * p->pi_lock:
 +	 * We need to look at the task state to figure out, whether the
 +	 * task is exiting. To protect against the change of the task state
 +	 * in futex_exit_release(), we do this protected by p->pi_lock:
  	 */
  	raw_spin_lock_irq(&p->pi_lock);
 -	if (unlikely(p->flags & PF_EXITING)) {
 +	if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
  		/*
 -		 * The task is on the way out. When PF_EXITPIDONE is
 -		 * set, we know that the task has finished the
 -		 * cleanup:
 +		 * The task is on the way out. When the futex state is
 +		 * FUTEX_STATE_DEAD, we know that the task has finished
 +		 * the cleanup:
  		 */
 -		int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
 +		int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN;

  		raw_spin_unlock_irq(&p->pi_lock);
  		put_task_struct(p);
	From foo@baz Mon Feb 1 04:21:37 PM CET 2021
	From: Lee Jones <lee.jones@linaro.org>
	Date: Mon, 1 Feb 2021 15:12:05 +0000
	Subject: futex: Replace PF_EXITPIDONE with a state
	To: stable@vger.kernel.org
	Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org>
	Message-ID: <20210201151214.2193508-4-lee.jones@linaro.org>

	From: Thomas Gleixner <tglx@linutronix.de>

	commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream.

	The futex exit handling relies on PF_ flags. That's suboptimal as it
	requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in
	the middle of do_exit() to enforce the observability of PF_EXITING in the
	futex code.

	Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic
	over to the new state. The PF_EXITING dependency will be cleaned up in a
	later step.

	This prepares for handling various futex exit issues later.

	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
	Reviewed-by: Ingo Molnar <mingo@kernel.org>
	Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
	Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Signed-off-by: Lee Jones <lee.jones@linaro.org>
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	---
	include/linux/futex.h \| 34 ++++++++++++++++++++++++++++++++++
	include/linux/sched.h \| 2 +-
	kernel/exit.c \| 18 ++----------------
	kernel/futex.c \| 17 ++++++++---------
	4 files changed, 45 insertions(+), 26 deletions(-)

	--- a/include/linux/futex.h
	+++ b/include/linux/futex.h
	@@ -55,6 +55,11 @@ union futex_key {
	#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }

	#ifdef CONFIG_FUTEX
	+enum {
	+ FUTEX_STATE_OK,
	+ FUTEX_STATE_DEAD,
	+};
	+
	static inline void futex_init_task(struct task_struct *tsk)
	{
	tsk->robust_list = NULL;
	@@ -63,6 +68,34 @@ static inline void futex_init_task(struc
	#endif
	INIT_LIST_HEAD(&tsk->pi_state_list);
	tsk->pi_state_cache = NULL;
	+ tsk->futex_state = FUTEX_STATE_OK;
	+}
	+
	+/**
	+ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
	+ * @tsk: task to set the state on
	+ *
	+ * Set the futex exit state of the task lockless. The futex waiter code
	+ * observes that state when a task is exiting and loops until the task has
	+ * actually finished the futex cleanup. The worst case for this is that the
	+ * waiter runs through the wait loop until the state becomes visible.
	+ *
	+ * This has two callers:
	+ *
	+ * - futex_mm_release() after the futex exit cleanup has been done
	+ *
	+ * - do_exit() from the recursive fault handling path.
	+ *
	+ * In case of a recursive fault this is best effort. Either the futex exit
	+ * code has run already or not. If the OWNER_DIED bit has been set on the
	+ * futex then the waiter can take it over. If not, the problem is pushed
	+ * back to user space. If the futex exit code did not run yet, then an
	+ * already queued waiter might block forever, but there is nothing which
	+ * can be done about that.
	+ */
	+static inline void futex_exit_done(struct task_struct *tsk)
	+{
	+ tsk->futex_state = FUTEX_STATE_DEAD;
	}

	void futex_mm_release(struct task_struct *tsk);
	@@ -72,5 +105,6 @@ long do_futex(u32 __user *uaddr, int op,
	#else
	static inline void futex_init_task(struct task_struct *tsk) { }
	static inline void futex_mm_release(struct task_struct *tsk) { }
	+static inline void futex_exit_done(struct task_struct *tsk) { }
	#endif
	#endif
	--- a/include/linux/sched.h
	+++ b/include/linux/sched.h
	@@ -1704,6 +1704,7 @@ struct task_struct {
	#endif
	struct list_head pi_state_list;
	struct futex_pi_state *pi_state_cache;
	+ unsigned int futex_state;
	#endif
	#ifdef CONFIG_PERF_EVENTS
	struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
	@@ -2099,7 +2100,6 @@ extern void thread_group_cputime_adjuste
	* Per process flags
	*/
	#define PF_EXITING 0x00000004 /* getting shut down */
	-#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
	#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
	#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
	#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
	--- a/kernel/exit.c
	+++ b/kernel/exit.c
	@@ -695,16 +695,7 @@ void do_exit(long code)
	*/
	if (unlikely(tsk->flags & PF_EXITING)) {
	pr_alert("Fixing recursive fault but reboot is needed!\n");
	- /*
	- * We can do this unlocked here. The futex code uses
	- * this flag just to verify whether the pi state
	- * cleanup has been done or not. In the worst case it
	- * loops once more. We pretend that the cleanup was
	- * done as there is no way to return. Either the
	- * OWNER_DIED bit is set by now or we push the blocked
	- * task into the wait for ever nirwana as well.
	- */
	- tsk->flags \|= PF_EXITPIDONE;
	+ futex_exit_done(tsk);
	set_current_state(TASK_UNINTERRUPTIBLE);
	schedule();
	}
	@@ -793,12 +784,7 @@ void do_exit(long code)
	* Make sure we are holding no locks:
	*/
	debug_check_no_locks_held();
	- /*
	- * We can do this unlocked here. The futex code uses this flag
	- * just to verify whether the pi state cleanup has been done
	- * or not. In the worst case it loops once more.
	- */
	- tsk->flags \|= PF_EXITPIDONE;
	+ futex_exit_done(tsk);

	if (tsk->io_context)
	exit_io_context(tsk);
	--- a/kernel/futex.c
	+++ b/kernel/futex.c
	@@ -1094,19 +1094,18 @@ static int attach_to_pi_owner(u32 uval,
	}

	/*
	- * We need to look at the task state flags to figure out,
	- * whether the task is exiting. To protect against the do_exit
	- * change of the task flags, we do this protected by
	- * p->pi_lock:
	+ * We need to look at the task state to figure out, whether the
	+ * task is exiting. To protect against the change of the task state
	+ * in futex_exit_release(), we do this protected by p->pi_lock:
	*/
	raw_spin_lock_irq(&p->pi_lock);
	- if (unlikely(p->flags & PF_EXITING)) {
	+ if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
	/*
	- * The task is on the way out. When PF_EXITPIDONE is
	- * set, we know that the task has finished the
	- * cleanup:
	+ * The task is on the way out. When the futex state is
	+ * FUTEX_STATE_DEAD, we know that the task has finished
	+ * the cleanup:
	*/
	- int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
	+ int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN;

	raw_spin_unlock_irq(&p->pi_lock);
	put_task_struct(p);