| From 616d24835eeafa8ef3466479db028abfdfc77531 Mon Sep 17 00:00:00 2001 |
| From: Brian Gerst <brgerst@gmail.com> |
| Date: Sat, 13 Aug 2016 12:38:20 -0400 |
| Subject: [PATCH] sched/x86: Pass kernel thread parameters in 'struct |
| fork_frame' |
| |
| commit 616d24835eeafa8ef3466479db028abfdfc77531 upstream. |
| |
| Instead of setting up a fake pt_regs context, put the kernel thread |
| function pointer and arg into the unused callee-restored registers |
| of 'struct fork_frame'. |
| |
| Signed-off-by: Brian Gerst <brgerst@gmail.com> |
| Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> |
| Cc: Andy Lutomirski <luto@kernel.org> |
| Cc: Borislav Petkov <bp@alien8.de> |
| Cc: Denys Vlasenko <dvlasenk@redhat.com> |
| Cc: H. Peter Anvin <hpa@zytor.com> |
| Cc: Linus Torvalds <torvalds@linux-foundation.org> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Link: http://lkml.kernel.org/r/1471106302-10159-6-git-send-email-brgerst@gmail.com |
| Signed-off-by: Ingo Molnar <mingo@kernel.org> |
| |
| diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S |
| index bf8f221f9c94..b75a8bcd2d23 100644 |
| --- a/arch/x86/entry/entry_32.S |
| +++ b/arch/x86/entry/entry_32.S |
| @@ -240,35 +240,34 @@ END(__switch_to_asm) |
| * A newly forked process directly context switches into this address. |
| * |
| * eax: prev task we switched from |
| + * ebx: kernel thread func (NULL for user thread) |
| + * edi: kernel thread arg |
| */ |
| ENTRY(ret_from_fork) |
| pushl %eax |
| call schedule_tail |
| popl %eax |
| |
| + testl %ebx, %ebx |
| + jnz 1f /* kernel threads are uncommon */ |
| + |
| +2: |
| /* When we fork, we trace the syscall return in the child, too. */ |
| movl %esp, %eax |
| call syscall_return_slowpath |
| jmp restore_all |
| -END(ret_from_fork) |
| - |
| -ENTRY(ret_from_kernel_thread) |
| - pushl %eax |
| - call schedule_tail |
| - popl %eax |
| - movl PT_EBP(%esp), %eax |
| - call *PT_EBX(%esp) |
| - movl $0, PT_EAX(%esp) |
| |
| + /* kernel thread */ |
| +1: movl %edi, %eax |
| + call *%ebx |
| /* |
| - * Kernel threads return to userspace as if returning from a syscall. |
| - * We should check whether anything actually uses this path and, if so, |
| - * consider switching it over to ret_from_fork. |
| + * A kernel thread is allowed to return here after successfully |
| + * calling do_execve(). Exit to userspace to complete the execve() |
| + * syscall. |
| */ |
| - movl %esp, %eax |
| - call syscall_return_slowpath |
| - jmp restore_all |
| -ENDPROC(ret_from_kernel_thread) |
| + movl $0, PT_EAX(%esp) |
| + jmp 2b |
| +END(ret_from_fork) |
| |
| /* |
| * Return to user mode is not as complex as all this looks, |
| diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S |
| index c1af8acd366b..c0373d667674 100644 |
| --- a/arch/x86/entry/entry_64.S |
| +++ b/arch/x86/entry/entry_64.S |
| @@ -407,37 +407,34 @@ END(__switch_to_asm) |
| * A newly forked process directly context switches into this address. |
| * |
| * rax: prev task we switched from |
| + * rbx: kernel thread func (NULL for user thread) |
| + * r12: kernel thread arg |
| */ |
| ENTRY(ret_from_fork) |
| movq %rax, %rdi |
| call schedule_tail /* rdi: 'prev' task parameter */ |
| |
| - testb $3, CS(%rsp) /* from kernel_thread? */ |
| - jnz 1f |
| - |
| - /* |
| - * We came from kernel_thread. This code path is quite twisted, and |
| - * someone should clean it up. |
| - * |
| - * copy_thread_tls stashes the function pointer in RBX and the |
| - * parameter to be passed in RBP. The called function is permitted |
| - * to call do_execve and thereby jump to user mode. |
| - */ |
| - movq RBP(%rsp), %rdi |
| - call *RBX(%rsp) |
| - movl $0, RAX(%rsp) |
| - |
| - /* |
| - * Fall through as though we're exiting a syscall. This makes a |
| - * twisted sort of sense if we just called do_execve. |
| - */ |
| + testq %rbx, %rbx /* from kernel_thread? */ |
| + jnz 1f /* kernel threads are uncommon */ |
| |
| -1: |
| +2: |
| movq %rsp, %rdi |
| call syscall_return_slowpath /* returns with IRQs disabled */ |
| TRACE_IRQS_ON /* user mode is traced as IRQS on */ |
| SWAPGS |
| jmp restore_regs_and_iret |
| + |
| +1: |
| + /* kernel thread */ |
| + movq %r12, %rdi |
| + call *%rbx |
| + /* |
| + * A kernel thread is allowed to return here after successfully |
| + * calling do_execve(). Exit to userspace to complete the execve() |
| + * syscall. |
| + */ |
| + movq $0, RAX(%rsp) |
| + jmp 2b |
| END(ret_from_fork) |
| |
| /* |
| diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h |
| index 886d5ea09dba..5cb436acd463 100644 |
| --- a/arch/x86/include/asm/switch_to.h |
| +++ b/arch/x86/include/asm/switch_to.h |
| @@ -34,6 +34,8 @@ static inline void prepare_switch_to(struct task_struct *prev, |
| #endif |
| } |
| |
| +asmlinkage void ret_from_fork(void); |
| + |
| /* data that is pointed to by thread.sp */ |
| struct inactive_task_frame { |
| #ifdef CONFIG_X86_64 |
| diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c |
| index 4bedbc08e53c..18714a191b2d 100644 |
| --- a/arch/x86/kernel/process_32.c |
| +++ b/arch/x86/kernel/process_32.c |
| @@ -55,9 +55,6 @@ |
| #include <asm/switch_to.h> |
| #include <asm/vm86.h> |
| |
| -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
| -asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); |
| - |
| /* |
| * Return saved PC of a blocked thread. |
| */ |
| @@ -139,6 +136,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, |
| int err; |
| |
| frame->bp = 0; |
| + frame->ret_addr = (unsigned long) ret_from_fork; |
| p->thread.sp = (unsigned long) fork_frame; |
| p->thread.sp0 = (unsigned long) (childregs+1); |
| memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
| @@ -146,25 +144,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, |
| if (unlikely(p->flags & PF_KTHREAD)) { |
| /* kernel thread */ |
| memset(childregs, 0, sizeof(struct pt_regs)); |
| - frame->ret_addr = (unsigned long) ret_from_kernel_thread; |
| - task_user_gs(p) = __KERNEL_STACK_CANARY; |
| - childregs->ds = __USER_DS; |
| - childregs->es = __USER_DS; |
| - childregs->fs = __KERNEL_PERCPU; |
| - childregs->bx = sp; /* function */ |
| - childregs->bp = arg; |
| - childregs->orig_ax = -1; |
| - childregs->cs = __KERNEL_CS | get_kernel_rpl(); |
| - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; |
| + frame->bx = sp; /* function */ |
| + frame->di = arg; |
| p->thread.io_bitmap_ptr = NULL; |
| return 0; |
| } |
| + frame->bx = 0; |
| *childregs = *current_pt_regs(); |
| childregs->ax = 0; |
| if (sp) |
| childregs->sp = sp; |
| |
| - frame->ret_addr = (unsigned long) ret_from_fork; |
| task_user_gs(p) = get_user_gs(current_pt_regs()); |
| |
| p->thread.io_bitmap_ptr = NULL; |
| diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c |
| index 827eeed03e16..b812cd0d7889 100644 |
| --- a/arch/x86/kernel/process_64.c |
| +++ b/arch/x86/kernel/process_64.c |
| @@ -50,8 +50,6 @@ |
| #include <asm/switch_to.h> |
| #include <asm/xen/hypervisor.h> |
| |
| -asmlinkage extern void ret_from_fork(void); |
| - |
| __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); |
| |
| /* Prints also some state that isn't saved in the pt_regs */ |
| @@ -165,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, |
| if (unlikely(p->flags & PF_KTHREAD)) { |
| /* kernel thread */ |
| memset(childregs, 0, sizeof(struct pt_regs)); |
| - childregs->sp = (unsigned long)childregs; |
| - childregs->ss = __KERNEL_DS; |
| - childregs->bx = sp; /* function */ |
| - childregs->bp = arg; |
| - childregs->orig_ax = -1; |
| - childregs->cs = __KERNEL_CS | get_kernel_rpl(); |
| - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; |
| + frame->bx = sp; /* function */ |
| + frame->r12 = arg; |
| return 0; |
| } |
| + frame->bx = 0; |
| *childregs = *current_pt_regs(); |
| |
| childregs->ax = 0; |
| -- |
| 2.15.0 |
| |