| From: Oleg Nesterov <oleg@redhat.com> |
| Subject: signal/x86: Delay calling signals in atomic |
| |
| On x86_64 we must disable preemption before we enable interrupts |
| for stack faults, int3 and debugging, because the current task is using |
| a per CPU debug stack defined by the IST. If we schedule out, another task |
| can come in and use the same stack and cause the stack to be corrupted |
| and crash the kernel on return. |
| |
| When CONFIG_PREEMPT_RT_FULL is enabled, spin_locks become mutexes, and |
| one of these is the spin lock used in signal handling. |
| |
| Some of the debug code (int3) causes do_trap() to send a signal. |
| This function calls a spin lock that has been converted to a mutex |
| and has the possibility to sleep. If this happens, the above issues with |
| the corrupted stack is possible. |
| |
| Instead of calling the signal right away, for PREEMPT_RT and x86_64, |
| the signal information is stored on the stacks task_struct and |
| TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume |
| code will send the signal when preemption is enabled. |
| |
| [ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT_FULL to |
| ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ] |
| |
| Cc: stable-rt@vger.kernel.org |
| Signed-off-by: Oleg Nesterov <oleg@redhat.com> |
| Signed-off-by: Steven Rostedt <rostedt@goodmis.org> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| --- |
| |
| arch/x86/include/asm/signal.h | 13 +++++++++++++ |
| arch/x86/kernel/signal.c | 8 ++++++++ |
| include/linux/sched.h | 4 ++++ |
| kernel/signal.c | 37 +++++++++++++++++++++++++++++++++++-- |
| 4 files changed, 60 insertions(+), 2 deletions(-) |
| |
| Index: linux-stable/arch/x86/include/asm/signal.h |
| =================================================================== |
| --- linux-stable.orig/arch/x86/include/asm/signal.h |
| +++ linux-stable/arch/x86/include/asm/signal.h |
| @@ -31,6 +31,19 @@ typedef struct { |
| unsigned long sig[_NSIG_WORDS]; |
| } sigset_t; |
| |
| +/* |
| + * Because some traps use the IST stack, we must keep |
| + * preemption disabled while calling do_trap(), but do_trap() |
| + * may call force_sig_info() which will grab the signal spin_locks |
| + * for the task, which in PREEMPT_RT_FULL are mutexes. |
| + * By defining ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will |
| + * set TIF_NOTIFY_RESUME and set up the signal to be sent on exit |
| + * of the trap. |
| + */ |
| +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64) |
| +#define ARCH_RT_DELAYS_SIGNAL_SEND |
| +#endif |
| + |
| #else |
| /* Here we must cater to libcs that poke about in kernel headers. */ |
| |
| Index: linux-stable/arch/x86/kernel/signal.c |
| =================================================================== |
| --- linux-stable.orig/arch/x86/kernel/signal.c |
| +++ linux-stable/arch/x86/kernel/signal.c |
| @@ -785,6 +785,14 @@ do_notify_resume(struct pt_regs *regs, v |
| mce_notify_process(); |
| #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ |
| |
| +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND |
| + if (unlikely(current->forced_info.si_signo)) { |
| + struct task_struct *t = current; |
| + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); |
| + t->forced_info.si_signo = 0; |
| + } |
| +#endif |
| + |
| if (thread_info_flags & _TIF_UPROBE) { |
| clear_thread_flag(TIF_UPROBE); |
| uprobe_notify_resume(regs); |
| Index: linux-stable/include/linux/sched.h |
| =================================================================== |
| --- linux-stable.orig/include/linux/sched.h |
| +++ linux-stable/include/linux/sched.h |
| @@ -1408,6 +1408,10 @@ struct task_struct { |
| sigset_t blocked, real_blocked; |
| sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ |
| struct sigpending pending; |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + /* TODO: move me into ->restart_block ? */ |
| + struct siginfo forced_info; |
| +#endif |
| |
| unsigned long sas_ss_sp; |
| size_t sas_ss_size; |
| Index: linux-stable/kernel/signal.c |
| =================================================================== |
| --- linux-stable.orig/kernel/signal.c |
| +++ linux-stable/kernel/signal.c |
| @@ -1305,8 +1305,8 @@ int do_send_sig_info(int sig, struct sig |
| * We don't want to have recursive SIGSEGV's etc, for example, |
| * that is why we also clear SIGNAL_UNKILLABLE. |
| */ |
| -int |
| -force_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
| +static int |
| +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
| { |
| unsigned long int flags; |
| int ret, blocked, ignored; |
| @@ -1331,6 +1331,39 @@ force_sig_info(int sig, struct siginfo * |
| return ret; |
| } |
| |
| +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
| +{ |
| +/* |
| + * On some archs, PREEMPT_RT has to delay sending a signal from a trap |
| + * since it can not enable preemption, and the signal code's spin_locks |
| + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will |
| + * send the signal on exit of the trap. |
| + */ |
| +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND |
| + if (in_atomic()) { |
| + if (WARN_ON_ONCE(t != current)) |
| + return 0; |
| + if (WARN_ON_ONCE(t->forced_info.si_signo)) |
| + return 0; |
| + |
| + if (is_si_special(info)) { |
| + WARN_ON_ONCE(info != SEND_SIG_PRIV); |
| + t->forced_info.si_signo = sig; |
| + t->forced_info.si_errno = 0; |
| + t->forced_info.si_code = SI_KERNEL; |
| + t->forced_info.si_pid = 0; |
| + t->forced_info.si_uid = 0; |
| + } else { |
| + t->forced_info = *info; |
| + } |
| + |
| + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); |
| + return 0; |
| + } |
| +#endif |
| + return do_force_sig_info(sig, info, t); |
| +} |
| + |
| /* |
| * Nuke all other threads in the group. |
| */ |