patches/oleg-signal-rt-fix.patch - pub/scm/linux/kernel/git/rt/linux-rt-devel - Git at Google

 From: Oleg Nesterov <oleg@redhat.com>
 Date: Tue, 14 Jul 2015 14:26:34 +0200
 Subject: signal/x86: Delay calling signals in atomic

 On x86_64 we must disable preemption before we enable interrupts
 for stack faults, int3 and debugging, because the current task is using
 a per CPU debug stack defined by the IST. If we schedule out, another task
 can come in and use the same stack and cause the stack to be corrupted
 and crash the kernel on return.

 When CONFIG_PREEMPT_RT_FULL is enabled, spin_locks become mutexes, and
 one of these is the spin lock used in signal handling.

 Some of the debug code (int3) causes do_trap() to send a signal.
 This function calls a spin lock that has been converted to a mutex
 and has the possibility to sleep. If this happens, the above issues with
 the corrupted stack is possible.

 Instead of calling the signal right away, for PREEMPT_RT and x86_64,
 the signal information is stored on the stacks task_struct and
 TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume
 code will send the signal when preemption is enabled.

 [ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT_FULL to
   ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ]


 Signed-off-by: Oleg Nesterov <oleg@redhat.com>
 Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 ---

  arch/x86/entry/common.c       |    7 +++++++
  arch/x86/include/asm/signal.h |   13 +++++++++++++
  include/linux/sched.h         |    4 ++++
  kernel/signal.c               |   37 +++++++++++++++++++++++++++++++++++--
  4 files changed, 59 insertions(+), 2 deletions(-)

 --- a/arch/x86/entry/common.c
 +++ b/arch/x86/entry/common.c
 @@ -151,6 +151,13 @@ static void exit_to_usermode_loop(struct
  		if (cached_flags & _TIF_NEED_RESCHED)
  			schedule();

 +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
 +		if (unlikely(current->forced_info.si_signo)) {
 +			struct task_struct *t = current;
 +			force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
 +			t->forced_info.si_signo = 0;
 +		}
 +#endif
  		if (cached_flags & _TIF_UPROBE)
  			uprobe_notify_resume(regs);

 --- a/arch/x86/include/asm/signal.h
 +++ b/arch/x86/include/asm/signal.h
 @@ -28,6 +28,19 @@ typedef struct {
  #define SA_IA32_ABI	0x02000000u
  #define SA_X32_ABI	0x01000000u

 +/*
 + * Because some traps use the IST stack, we must keep preemption
 + * disabled while calling do_trap(), but do_trap() may call
 + * force_sig_info() which will grab the signal spin_locks for the
 + * task, which in PREEMPT_RT_FULL are mutexes.  By defining
 + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
 + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
 + * trap.
 + */
 +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
 +#define ARCH_RT_DELAYS_SIGNAL_SEND
 +#endif
 +
  #ifndef CONFIG_COMPAT
  typedef sigset_t compat_sigset_t;
  #endif
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
 @@ -805,6 +805,10 @@ struct task_struct {
  	/* Restored if set_restore_sigmask() was used: */
  	sigset_t			saved_sigmask;
  	struct sigpending		pending;
 +#ifdef CONFIG_PREEMPT_RT_FULL
 +	/* TODO: move me into ->restart_block ? */
 +	struct				siginfo forced_info;
 +#endif
  	unsigned long			sas_ss_sp;
  	size_t				sas_ss_size;
  	unsigned int			sas_ss_flags;
 --- a/kernel/signal.c
 +++ b/kernel/signal.c
 @@ -1185,8 +1185,8 @@ int do_send_sig_info(int sig, struct sig
   * We don't want to have recursive SIGSEGV's etc, for example,
   * that is why we also clear SIGNAL_UNKILLABLE.
   */
 -int
 -force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 +static int
 +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  {
  	unsigned long int flags;
  	int ret, blocked, ignored;
 @@ -1215,6 +1215,39 @@ force_sig_info(int sig, struct siginfo *
  	return ret;
  }

 +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 +{
 +/*
 + * On some archs, PREEMPT_RT has to delay sending a signal from a trap
 + * since it can not enable preemption, and the signal code's spin_locks
 + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
 + * send the signal on exit of the trap.
 + */
 +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
 +	if (in_atomic()) {
 +		if (WARN_ON_ONCE(t != current))
 +			return 0;
 +		if (WARN_ON_ONCE(t->forced_info.si_signo))
 +			return 0;
 +
 +		if (is_si_special(info)) {
 +			WARN_ON_ONCE(info != SEND_SIG_PRIV);
 +			t->forced_info.si_signo = sig;
 +			t->forced_info.si_errno = 0;
 +			t->forced_info.si_code = SI_KERNEL;
 +			t->forced_info.si_pid = 0;
 +			t->forced_info.si_uid = 0;
 +		} else {
 +			t->forced_info = *info;
 +		}
 +
 +		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
 +		return 0;
 +	}
 +#endif
 +	return do_force_sig_info(sig, info, t);
 +}
 +
  /*
   * Nuke all other threads in the group.
   */
	From: Oleg Nesterov <oleg@redhat.com>
	Date: Tue, 14 Jul 2015 14:26:34 +0200
	Subject: signal/x86: Delay calling signals in atomic

	On x86_64 we must disable preemption before we enable interrupts
	for stack faults, int3 and debugging, because the current task is using
	a per CPU debug stack defined by the IST. If we schedule out, another task
	can come in and use the same stack and cause the stack to be corrupted
	and crash the kernel on return.

	When CONFIG_PREEMPT_RT_FULL is enabled, spin_locks become mutexes, and
	one of these is the spin lock used in signal handling.

	Some of the debug code (int3) causes do_trap() to send a signal.
	This function calls a spin lock that has been converted to a mutex
	and has the possibility to sleep. If this happens, the above issues with
	the corrupted stack is possible.

	Instead of calling the signal right away, for PREEMPT_RT and x86_64,
	the signal information is stored on the stacks task_struct and
	TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume
	code will send the signal when preemption is enabled.

	[ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT_FULL to
	ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ]


	Signed-off-by: Oleg Nesterov <oleg@redhat.com>
	Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
	---

	arch/x86/entry/common.c \| 7 +++++++
	arch/x86/include/asm/signal.h \| 13 +++++++++++++
	include/linux/sched.h \| 4 ++++
	kernel/signal.c \| 37 +++++++++++++++++++++++++++++++++++--
	4 files changed, 59 insertions(+), 2 deletions(-)

	--- a/arch/x86/entry/common.c
	+++ b/arch/x86/entry/common.c
	@@ -151,6 +151,13 @@ static void exit_to_usermode_loop(struct
	if (cached_flags & _TIF_NEED_RESCHED)
	schedule();

	+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
	+ if (unlikely(current->forced_info.si_signo)) {
	+ struct task_struct *t = current;
	+ force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
	+ t->forced_info.si_signo = 0;
	+ }
	+#endif
	if (cached_flags & _TIF_UPROBE)
	uprobe_notify_resume(regs);

	--- a/arch/x86/include/asm/signal.h
	+++ b/arch/x86/include/asm/signal.h
	@@ -28,6 +28,19 @@ typedef struct {
	#define SA_IA32_ABI 0x02000000u
	#define SA_X32_ABI 0x01000000u

	+/*
	+ * Because some traps use the IST stack, we must keep preemption
	+ * disabled while calling do_trap(), but do_trap() may call
	+ * force_sig_info() which will grab the signal spin_locks for the
	+ * task, which in PREEMPT_RT_FULL are mutexes. By defining
	+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
	+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
	+ * trap.
	+ */
	+#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
	+#define ARCH_RT_DELAYS_SIGNAL_SEND
	+#endif
	+
	#ifndef CONFIG_COMPAT
	typedef sigset_t compat_sigset_t;
	#endif
	--- a/include/linux/sched.h
	+++ b/include/linux/sched.h
	@@ -805,6 +805,10 @@ struct task_struct {
	/* Restored if set_restore_sigmask() was used: */
	sigset_t saved_sigmask;
	struct sigpending pending;
	+#ifdef CONFIG_PREEMPT_RT_FULL
	+ /* TODO: move me into ->restart_block ? */
	+ struct siginfo forced_info;
	+#endif
	unsigned long sas_ss_sp;
	size_t sas_ss_size;
	unsigned int sas_ss_flags;
	--- a/kernel/signal.c
	+++ b/kernel/signal.c
	@@ -1185,8 +1185,8 @@ int do_send_sig_info(int sig, struct sig
	* We don't want to have recursive SIGSEGV's etc, for example,
	* that is why we also clear SIGNAL_UNKILLABLE.
	*/
	-int
	-force_sig_info(int sig, struct siginfo info, struct task_struct t)
	+static int
	+do_force_sig_info(int sig, struct siginfo info, struct task_struct t)
	{
	unsigned long int flags;
	int ret, blocked, ignored;
	@@ -1215,6 +1215,39 @@ force_sig_info(int sig, struct siginfo *
	return ret;
	}

	+int force_sig_info(int sig, struct siginfo info, struct task_struct t)
	+{
	+/*
	+ * On some archs, PREEMPT_RT has to delay sending a signal from a trap
	+ * since it can not enable preemption, and the signal code's spin_locks
	+ * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
	+ * send the signal on exit of the trap.
	+ */
	+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
	+ if (in_atomic()) {
	+ if (WARN_ON_ONCE(t != current))
	+ return 0;
	+ if (WARN_ON_ONCE(t->forced_info.si_signo))
	+ return 0;
	+
	+ if (is_si_special(info)) {
	+ WARN_ON_ONCE(info != SEND_SIG_PRIV);
	+ t->forced_info.si_signo = sig;
	+ t->forced_info.si_errno = 0;
	+ t->forced_info.si_code = SI_KERNEL;
	+ t->forced_info.si_pid = 0;
	+ t->forced_info.si_uid = 0;
	+ } else {
	+ t->forced_info = *info;
	+ }
	+
	+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
	+ return 0;
	+ }
	+#endif
	+ return do_force_sig_info(sig, info, t);
	+}
	+
	/*
	* Nuke all other threads in the group.
	*/