| From foo@baz Wed Jan 3 20:37:21 CET 2018 |
| From: Hugh Dickins <hughd@google.com> |
| Date: Fri, 8 Sep 2017 19:26:30 -0700 |
| Subject: kaiser: PCID 0 for kernel and 128 for user |
| |
| From: Hugh Dickins <hughd@google.com> |
| |
| |
| Why was 4 chosen for kernel PCID and 6 for user PCID? |
| No good reason in a backport where PCIDs are only used for Kaiser. |
| |
| If we continue with those, then we shall need to add Andy Lutomirski's |
| 4.13 commit 6c690ee1039b ("x86/mm: Split read_cr3() into read_cr3_pa() |
| and __read_cr3()"), which deals with the problem of read_cr3() callers |
| finding stray bits in the cr3 that they expected to be page-aligned; |
| and for hibernation, his 4.14 commit f34902c5c6c0 ("x86/hibernate/64: |
| Mask off CR3's PCID bits in the saved CR3"). |
| |
| But if 0 is used for kernel PCID, then there's no need to add in those |
| commits - whenever the kernel looks, it sees 0 in the lower bits; and |
| 0 for kernel seems an obvious choice. |
| |
| And I naughtily propose 128 for user PCID. Because there's a place |
| in _SWITCH_TO_USER_CR3 where it takes note of the need for TLB FLUSH, |
| but needs to reset that to NOFLUSH for the next occasion. Currently |
| it does so with a "movb $(0x80)" into the high byte of the per-cpu |
| quadword, but that will cause a machine without PCID support to crash. |
| Now, if %al just happened to have 0x80 in it at that point, on a |
| machine with PCID support, but 0 on a machine without PCID support... |
| |
| (That will go badly wrong once the pgd can be at a physical address |
| above 2^56, but even with 5-level paging, physical goes up to 2^52.) |
| |
| Signed-off-by: Hugh Dickins <hughd@google.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| arch/x86/include/asm/kaiser.h | 19 ++++++++++++------- |
| arch/x86/include/asm/pgtable_types.h | 7 ++++--- |
| arch/x86/mm/tlb.c | 3 +++ |
| 3 files changed, 19 insertions(+), 10 deletions(-) |
| |
| --- a/arch/x86/include/asm/kaiser.h |
| +++ b/arch/x86/include/asm/kaiser.h |
| @@ -29,14 +29,19 @@ orq X86_CR3_PCID_KERN_VAR, \reg |
| movq \reg, %cr3 |
| .endm |
| |
| -.macro _SWITCH_TO_USER_CR3 reg |
| +.macro _SWITCH_TO_USER_CR3 reg regb |
| +/* |
| + * regb must be the low byte portion of reg: because we have arranged |
| + * for the low byte of the user PCID to serve as the high byte of NOFLUSH |
| + * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are |
| + * not enabled): so that the one register can update both memory and cr3. |
| + */ |
| movq %cr3, \reg |
| andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg |
| orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg |
| js 9f |
| -// FLUSH this time, reset to NOFLUSH for next time |
| -// But if nopcid? Consider using 0x80 for user pcid? |
| -movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) |
| +/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */ |
| +movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) |
| 9: |
| movq \reg, %cr3 |
| .endm |
| @@ -49,7 +54,7 @@ popq %rax |
| |
| .macro SWITCH_USER_CR3 |
| pushq %rax |
| -_SWITCH_TO_USER_CR3 %rax |
| +_SWITCH_TO_USER_CR3 %rax %al |
| popq %rax |
| .endm |
| |
| @@ -61,7 +66,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b |
| |
| .macro SWITCH_USER_CR3_NO_STACK |
| movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) |
| -_SWITCH_TO_USER_CR3 %rax |
| +_SWITCH_TO_USER_CR3 %rax %al |
| movq PER_CPU_VAR(unsafe_stack_register_backup), %rax |
| .endm |
| |
| @@ -69,7 +74,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b |
| |
| .macro SWITCH_KERNEL_CR3 reg |
| .endm |
| -.macro SWITCH_USER_CR3 reg |
| +.macro SWITCH_USER_CR3 reg regb |
| .endm |
| .macro SWITCH_USER_CR3_NO_STACK |
| .endm |
| --- a/arch/x86/include/asm/pgtable_types.h |
| +++ b/arch/x86/include/asm/pgtable_types.h |
| @@ -146,16 +146,17 @@ |
| |
| /* Mask for all the PCID-related bits in CR3: */ |
| #define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) |
| +#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) |
| + |
| #if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) |
| -#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL)) |
| -#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL)) |
| +/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */ |
| +#define X86_CR3_PCID_ASID_USER (_AC(0x80,UL)) |
| |
| #define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN) |
| #define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER) |
| #define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN) |
| #define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER) |
| #else |
| -#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) |
| #define X86_CR3_PCID_ASID_USER (_AC(0x0,UL)) |
| /* |
| * PCIDs are unsupported on 32-bit and none of these bits can be |
| --- a/arch/x86/mm/tlb.c |
| +++ b/arch/x86/mm/tlb.c |
| @@ -50,6 +50,9 @@ static void load_new_mm_cr3(pgd_t *pgdir |
| * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could |
| * do it here, but can only be used if X86_FEATURE_INVPCID is |
| * available - and many machines support pcid without invpcid. |
| + * |
| + * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0; |
| + * but keep that line in there in case something changes. |
| */ |
| new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; |
| kaiser_flush_tlb_on_return_to_user(); |