| From foo@baz Mon Feb 1 04:21:37 PM CET 2021 |
| From: Lee Jones <lee.jones@linaro.org> |
| Date: Mon, 1 Feb 2021 15:12:11 +0000 |
| Subject: futex: Provide state handling for exec() as well |
| To: stable@vger.kernel.org |
| Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Lee Jones <lee.jones@linaro.org> |
| Message-ID: <20210201151214.2193508-10-lee.jones@linaro.org> |
| |
| From: Thomas Gleixner <tglx@linutronix.de> |
| |
| commit af8cbda2cfcaa5515d61ec500498d46e9a8247e2 upstream. |
| |
| exec() attempts to handle potentially held futexes gracefully by running |
| the futex exit handling code like exit() does. |
| |
| The current implementation has no protection against concurrent incoming |
| waiters. The reason is that the futex state cannot be set to |
| FUTEX_STATE_DEAD after the cleanup because the task struct is still active |
| and just about to execute the new binary. |
| |
| While its arguably buggy when a task holds a futex over exec(), for |
| consistency sake the state handling can at least cover the actual futex |
| exit cleanup section. This provides state consistency protection accross |
| the cleanup. As the futex state of the task becomes FUTEX_STATE_OK after the |
| cleanup has been finished, this cannot prevent subsequent attempts to |
| attach to the task in case that the cleanup was not successfull in mopping |
| up all leftovers. |
| |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Reviewed-by: Ingo Molnar <mingo@kernel.org> |
| Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Link: https://lkml.kernel.org/r/20191106224556.753355618@linutronix.de |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| Signed-off-by: Lee Jones <lee.jones@linaro.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| kernel/futex.c | 38 ++++++++++++++++++++++++++++++++++---- |
| 1 file changed, 34 insertions(+), 4 deletions(-) |
| |
| --- a/kernel/futex.c |
| +++ b/kernel/futex.c |
| @@ -3234,7 +3234,7 @@ static void exit_robust_list(struct task |
| curr, pip); |
| } |
| |
| -void futex_exec_release(struct task_struct *tsk) |
| +static void futex_cleanup(struct task_struct *tsk) |
| { |
| if (unlikely(tsk->robust_list)) { |
| exit_robust_list(tsk); |
| @@ -3274,7 +3274,7 @@ void futex_exit_recursive(struct task_st |
| tsk->futex_state = FUTEX_STATE_DEAD; |
| } |
| |
| -void futex_exit_release(struct task_struct *tsk) |
| +static void futex_cleanup_begin(struct task_struct *tsk) |
| { |
| /* |
| * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. |
| @@ -3290,10 +3290,40 @@ void futex_exit_release(struct task_stru |
| raw_spin_lock_irq(&tsk->pi_lock); |
| tsk->futex_state = FUTEX_STATE_EXITING; |
| raw_spin_unlock_irq(&tsk->pi_lock); |
| +} |
| |
| - futex_exec_release(tsk); |
| +static void futex_cleanup_end(struct task_struct *tsk, int state) |
| +{ |
| + /* |
| + * Lockless store. The only side effect is that an observer might |
| + * take another loop until it becomes visible. |
| + */ |
| + tsk->futex_state = state; |
| +} |
| |
| - tsk->futex_state = FUTEX_STATE_DEAD; |
| +void futex_exec_release(struct task_struct *tsk) |
| +{ |
| + /* |
| + * The state handling is done for consistency, but in the case of |
| + * exec() there is no way to prevent futher damage as the PID stays |
| + * the same. But for the unlikely and arguably buggy case that a |
| + * futex is held on exec(), this provides at least as much state |
| + * consistency protection which is possible. |
| + */ |
| + futex_cleanup_begin(tsk); |
| + futex_cleanup(tsk); |
| + /* |
| + * Reset the state to FUTEX_STATE_OK. The task is alive and about |
| + * exec a new binary. |
| + */ |
| + futex_cleanup_end(tsk, FUTEX_STATE_OK); |
| +} |
| + |
| +void futex_exit_release(struct task_struct *tsk) |
| +{ |
| + futex_cleanup_begin(tsk); |
| + futex_cleanup(tsk); |
| + futex_cleanup_end(tsk, FUTEX_STATE_DEAD); |
| } |
| |
| long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |