| From 85063fac1f72419eec4349621fe829b07f9acb1e Mon Sep 17 00:00:00 2001 |
| From: Andy Lutomirski <luto@kernel.org> |
| Date: Mon, 12 Sep 2016 15:05:51 -0700 |
| Subject: [PATCH] x86/entry/64: Clean up and document espfix64 stack setup |
| |
| commit 85063fac1f72419eec4349621fe829b07f9acb1e upstream. |
| |
| The espfix64 setup code was a bit inscrutible and contained an |
| unnecessary push of RAX. Remove that push, update all the stack |
| offsets to match, and document the whole mess. |
| |
| Reported-By: Borislav Petkov <bp@alien8.de> |
| Signed-off-by: Andy Lutomirski <luto@kernel.org> |
| Reviewed-by: Borislav Petkov <bp@suse.de> |
| Cc: Borislav Petkov <bp@alien8.de> |
| Cc: Brian Gerst <brgerst@gmail.com> |
| Cc: Denys Vlasenko <dvlasenk@redhat.com> |
| Cc: H. Peter Anvin <hpa@zytor.com> |
| Cc: Josh Poimboeuf <jpoimboe@redhat.com> |
| Cc: Linus Torvalds <torvalds@linux-foundation.org> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Link: http://lkml.kernel.org/r/e5459eb10cf1175c8b36b840bc425f210d045f35.1473717910.git.luto@kernel.org |
| Signed-off-by: Ingo Molnar <mingo@kernel.org> |
| |
| diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S |
| index c0373d667674..e7fba58f4d9c 100644 |
| --- a/arch/x86/entry/entry_64.S |
| +++ b/arch/x86/entry/entry_64.S |
| @@ -586,27 +586,69 @@ native_irq_return_iret: |
| |
| #ifdef CONFIG_X86_ESPFIX64 |
| native_irq_return_ldt: |
| - pushq %rax |
| - pushq %rdi |
| + /* |
| + * We are running with user GSBASE. All GPRs contain their user |
| + * values. We have a percpu ESPFIX stack that is eight slots |
| + * long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom |
| + * of the ESPFIX stack. |
| + * |
| + * We clobber RAX and RDI in this code. We stash RDI on the |
| + * normal stack and RAX on the ESPFIX stack. |
| + * |
| + * The ESPFIX stack layout we set up looks like this: |
| + * |
| + * --- top of ESPFIX stack --- |
| + * SS |
| + * RSP |
| + * RFLAGS |
| + * CS |
| + * RIP <-- RSP points here when we're done |
| + * RAX <-- espfix_waddr points here |
| + * --- bottom of ESPFIX stack --- |
| + */ |
| + |
| + pushq %rdi /* Stash user RDI */ |
| SWAPGS |
| movq PER_CPU_VAR(espfix_waddr), %rdi |
| - movq %rax, (0*8)(%rdi) /* RAX */ |
| - movq (2*8)(%rsp), %rax /* RIP */ |
| + movq %rax, (0*8)(%rdi) /* user RAX */ |
| + movq (1*8)(%rsp), %rax /* user RIP */ |
| movq %rax, (1*8)(%rdi) |
| - movq (3*8)(%rsp), %rax /* CS */ |
| + movq (2*8)(%rsp), %rax /* user CS */ |
| movq %rax, (2*8)(%rdi) |
| - movq (4*8)(%rsp), %rax /* RFLAGS */ |
| + movq (3*8)(%rsp), %rax /* user RFLAGS */ |
| movq %rax, (3*8)(%rdi) |
| - movq (6*8)(%rsp), %rax /* SS */ |
| + movq (5*8)(%rsp), %rax /* user SS */ |
| movq %rax, (5*8)(%rdi) |
| - movq (5*8)(%rsp), %rax /* RSP */ |
| + movq (4*8)(%rsp), %rax /* user RSP */ |
| movq %rax, (4*8)(%rdi) |
| - andl $0xffff0000, %eax |
| - popq %rdi |
| + /* Now RAX == RSP. */ |
| + |
| + andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ |
| + popq %rdi /* Restore user RDI */ |
| + |
| + /* |
| + * espfix_stack[31:16] == 0. The page tables are set up such that |
| + * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of |
| + * espfix_waddr for any X. That is, there are 65536 RO aliases of |
| + * the same page. Set up RSP so that RSP[31:16] contains the |
| + * respective 16 bits of the /userspace/ RSP and RSP nonetheless |
| + * still points to an RO alias of the ESPFIX stack. |
| + */ |
| orq PER_CPU_VAR(espfix_stack), %rax |
| SWAPGS |
| movq %rax, %rsp |
| - popq %rax |
| + |
| + /* |
| + * At this point, we cannot write to the stack any more, but we can |
| + * still read. |
| + */ |
| + popq %rax /* Restore user RAX */ |
| + |
| + /* |
| + * RSP now points to an ordinary IRET frame, except that the page |
| + * is read-only and RSP[31:16] are preloaded with the userspace |
| + * values. We can now IRET back to userspace. |
| + */ |
| jmp native_irq_return_iret |
| #endif |
| END(common_interrupt) |
| -- |
| 2.15.0 |
| |