| From: Oleg Nesterov <oleg@redhat.com> |
| Date: Tue, 14 Jul 2015 14:26:34 +0200 |
| Subject: signal/x86: Delay calling signals in atomic |
| |
| On x86_64 we must disable preemption before we enable interrupts |
| for stack faults, int3 and debugging, because the current task is using |
| a per CPU debug stack defined by the IST. If we schedule out, another task |
| can come in and use the same stack and cause the stack to be corrupted |
| and crash the kernel on return. |
| |
| When CONFIG_PREEMPT_RT_FULL is enabled, spin_locks become mutexes, and |
| one of these is the spin lock used in signal handling. |
| |
| Some of the debug code (int3) causes do_trap() to send a signal. |
| This function calls a spin lock that has been converted to a mutex |
| and has the possibility to sleep. If this happens, the above issues with |
| the corrupted stack is possible. |
| |
| Instead of calling the signal right away, for PREEMPT_RT and x86_64, |
| the signal information is stored on the stacks task_struct and |
| TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume |
| code will send the signal when preemption is enabled. |
| |
| [ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT_FULL to |
| ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ] |
| |
| |
| Signed-off-by: Oleg Nesterov <oleg@redhat.com> |
| Signed-off-by: Steven Rostedt <rostedt@goodmis.org> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| --- |
| |
| arch/x86/entry/common.c | 7 +++++++ |
| arch/x86/include/asm/signal.h | 13 +++++++++++++ |
| include/linux/sched.h | 4 ++++ |
| kernel/signal.c | 37 +++++++++++++++++++++++++++++++++++-- |
| 4 files changed, 59 insertions(+), 2 deletions(-) |
| |
| --- a/arch/x86/entry/common.c |
| +++ b/arch/x86/entry/common.c |
| @@ -148,6 +148,13 @@ static void exit_to_usermode_loop(struct |
| if (cached_flags & _TIF_NEED_RESCHED) |
| schedule(); |
| |
| +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND |
| + if (unlikely(current->forced_info.si_signo)) { |
| + struct task_struct *t = current; |
| + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); |
| + t->forced_info.si_signo = 0; |
| + } |
| +#endif |
| if (cached_flags & _TIF_UPROBE) |
| uprobe_notify_resume(regs); |
| |
| --- a/arch/x86/include/asm/signal.h |
| +++ b/arch/x86/include/asm/signal.h |
| @@ -27,6 +27,19 @@ typedef struct { |
| #define SA_IA32_ABI 0x02000000u |
| #define SA_X32_ABI 0x01000000u |
| |
| +/* |
| + * Because some traps use the IST stack, we must keep preemption |
| + * disabled while calling do_trap(), but do_trap() may call |
| + * force_sig_info() which will grab the signal spin_locks for the |
| + * task, which in PREEMPT_RT_FULL are mutexes. By defining |
| + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set |
| + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the |
| + * trap. |
| + */ |
| +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64) |
| +#define ARCH_RT_DELAYS_SIGNAL_SEND |
| +#endif |
| + |
| #ifndef CONFIG_COMPAT |
| typedef sigset_t compat_sigset_t; |
| #endif |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -1695,6 +1695,10 @@ struct task_struct { |
| sigset_t blocked, real_blocked; |
| sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ |
| struct sigpending pending; |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + /* TODO: move me into ->restart_block ? */ |
| + struct siginfo forced_info; |
| +#endif |
| |
| unsigned long sas_ss_sp; |
| size_t sas_ss_size; |
| --- a/kernel/signal.c |
| +++ b/kernel/signal.c |
| @@ -1216,8 +1216,8 @@ int do_send_sig_info(int sig, struct sig |
| * We don't want to have recursive SIGSEGV's etc, for example, |
| * that is why we also clear SIGNAL_UNKILLABLE. |
| */ |
| -int |
| -force_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
| +static int |
| +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
| { |
| unsigned long int flags; |
| int ret, blocked, ignored; |
| @@ -1242,6 +1242,39 @@ force_sig_info(int sig, struct siginfo * |
| return ret; |
| } |
| |
| +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
| +{ |
| +/* |
| + * On some archs, PREEMPT_RT has to delay sending a signal from a trap |
| + * since it can not enable preemption, and the signal code's spin_locks |
| + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will |
| + * send the signal on exit of the trap. |
| + */ |
| +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND |
| + if (in_atomic()) { |
| + if (WARN_ON_ONCE(t != current)) |
| + return 0; |
| + if (WARN_ON_ONCE(t->forced_info.si_signo)) |
| + return 0; |
| + |
| + if (is_si_special(info)) { |
| + WARN_ON_ONCE(info != SEND_SIG_PRIV); |
| + t->forced_info.si_signo = sig; |
| + t->forced_info.si_errno = 0; |
| + t->forced_info.si_code = SI_KERNEL; |
| + t->forced_info.si_pid = 0; |
| + t->forced_info.si_uid = 0; |
| + } else { |
| + t->forced_info = *info; |
| + } |
| + |
| + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); |
| + return 0; |
| + } |
| +#endif |
| + return do_force_sig_info(sig, info, t); |
| +} |
| + |
| /* |
| * Nuke all other threads in the group. |
| */ |