blob: 622d9e1ac740f1fb982b6227672faafb54e4fd3d [file] [log] [blame]
From: Hugh Dickins <hughd@google.com>
Date: Sun, 24 Sep 2017 16:59:49 -0700
Subject: kaiser: add "nokaiser" boot option, using ALTERNATIVE
Added "nokaiser" boot option: an early param like "noinvpcid".
Most places now check int kaiser_enabled (#defined 0 when not
CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S
and entry_64_compat.S are using the ALTERNATIVE technique, which
patches in the preferred instructions at runtime. That technique
is tied to x86 cpu features, so X86_FEATURE_KAISER fabricated
("" in its comment so "kaiser" not magicked into /proc/cpuinfo).
Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that,
but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when
nokaiser like when !CONFIG_KAISER, but not setting either when kaiser -
neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL
won't get set in some obscure corner, or something add PGE into CR4.
By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled,
all page table setup which uses pte_pfn() masks it out of the ptes.
It's slightly shameful that the same declaration versus definition of
kaiser_enabled appears in not one, not two, but in three header files
(asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h). I felt safer that way,
than with #including any of those in any of the others; and did not
feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes
them all, so we shall hear about it if they get out of synch.
Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER
from kaiser.c; removed the unused native_get_normal_pgd(); removed
the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some
comments. But more interestingly, set CR4.PSE in secondary_startup_64:
the manual is clear that it does not matter whether it's 0 or 1 when
4-level-pts are enabled, but I was distracted to find cr4 different on
BSP and auxiliaries - BSP alone was adding PSE, in init_memory_mapping().
(cherry picked from Change-Id: I8e5bec716944444359cbd19f6729311eff943e9a)
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
Documentation/kernel-parameters.txt | 2 ++
arch/x86/ia32/ia32entry.S | 2 ++
arch/x86/include/asm/cpufeature.h | 3 +++
arch/x86/include/asm/kaiser.h | 27 ++++++++++++++++++++-------
arch/x86/include/asm/pgtable.h | 19 +++++++++++++------
arch/x86/include/asm/pgtable_64.h | 13 ++++---------
arch/x86/include/asm/pgtable_types.h | 4 ----
arch/x86/include/asm/tlbflush.h | 35 +++++++++++++++++++++--------------
arch/x86/kernel/cpu/common.c | 30 +++++++++++++++++++++++++++++-
arch/x86/kernel/entry_64.S | 6 +++++-
arch/x86/kernel/espfix_64.c | 3 ++-
arch/x86/kernel/head_64.S | 4 ++--
arch/x86/mm/init.c | 2 +-
arch/x86/mm/init_64.c | 10 ++++++++++
arch/x86/mm/kaiser.c | 26 ++++++++++++++++++++++----
arch/x86/mm/pgtable.c | 8 ++------
arch/x86/mm/tlb.c | 4 +---
17 files changed, 139 insertions(+), 59 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4f43fd315ecd..28c23bdc1af4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1803,6 +1803,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nojitter [IA-64] Disables jitter checking for ITC timers.
+ nokaiser [X86-64] Disable KAISER isolation of kernel from user.
+
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 7eb0d4792800..130db0702d9f 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -13,6 +13,8 @@
#include <asm/thread_info.h>
#include <asm/segment.h>
#include <asm/pgtable_types.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeature.h>
#include <asm/kaiser.h>
#include <asm/irqflags.h>
#include <linux/linkage.h>
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e65fb1220573..cb967c9db459 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -178,6 +178,9 @@
#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */
#define X86_FEATURE_INVPCID_SINGLE (7*32+ 8) /* Effectively INVPCID && CR4.PCIDE=1 */
+/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+#define X86_FEATURE_KAISER ( 7*32+31) /* "" CONFIG_KAISER w/o nokaiser */
+
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */
diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
index 6f4c8ef46881..2fe06d06241c 100644
--- a/arch/x86/include/asm/kaiser.h
+++ b/arch/x86/include/asm/kaiser.h
@@ -46,28 +46,33 @@ movq \reg, %cr3
.endm
.macro SWITCH_KERNEL_CR3
-pushq %rax
+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
_SWITCH_TO_KERNEL_CR3 %rax
popq %rax
+8:
.endm
.macro SWITCH_USER_CR3
-pushq %rax
+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
_SWITCH_TO_USER_CR3 %rax %al
popq %rax
+8:
.endm
.macro SWITCH_KERNEL_CR3_NO_STACK
-movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+ALTERNATIVE "jmp 8f", \
+ __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \
+ X86_FEATURE_KAISER
_SWITCH_TO_KERNEL_CR3 %rax
movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+8:
.endm
#else /* CONFIG_KAISER */
-.macro SWITCH_KERNEL_CR3 reg
+.macro SWITCH_KERNEL_CR3
.endm
-.macro SWITCH_USER_CR3 reg regb
+.macro SWITCH_USER_CR3
.endm
.macro SWITCH_KERNEL_CR3_NO_STACK
.endm
@@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+extern int kaiser_enabled;
+#else
+#define kaiser_enabled 0
+#endif /* CONFIG_KAISER */
+
+/*
+ * Kaiser function prototypes are needed even when CONFIG_KAISER is not set,
+ * so as to build with tests on kaiser_enabled instead of #ifdefs.
+ */
+
/**
* kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
* @addr: the start address of the range
@@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
*/
extern void kaiser_init(void);
-#endif /* CONFIG_KAISER */
-
#endif /* __ASSEMBLY */
#endif /* _ASM_X86_KAISER_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index b1c8b8d3b02a..a467c19e64b5 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -17,6 +17,11 @@
#ifndef __ASSEMBLY__
#include <asm/x86_init.h>
+#ifdef CONFIG_KAISER
+extern int kaiser_enabled;
+#else
+#define kaiser_enabled 0
+#endif
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -577,7 +582,7 @@ static inline int pgd_bad(pgd_t pgd)
* page table by accident; it will fault on the first
* instruction it tries to run. See native_set_pgd().
*/
- if (IS_ENABLED(CONFIG_KAISER))
+ if (kaiser_enabled)
ignore_flags |= _PAGE_NX;
return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
@@ -780,12 +785,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
*/
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
- memcpy(dst, src, count * sizeof(pgd_t));
+ memcpy(dst, src, count * sizeof(pgd_t));
#ifdef CONFIG_KAISER
- /* Clone the shadow pgd part as well */
- memcpy(native_get_shadow_pgd(dst),
- native_get_shadow_pgd(src),
- count * sizeof(pgd_t));
+ if (kaiser_enabled) {
+ /* Clone the shadow pgd part as well */
+ memcpy(native_get_shadow_pgd(dst),
+ native_get_shadow_pgd(src),
+ count * sizeof(pgd_t));
+ }
#endif
}
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index a3bf3de9893b..a4a2b700a4e6 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -110,13 +110,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
{
+#ifdef CONFIG_DEBUG_VM
+ /* linux/mmdebug.h may not have been included at this point */
+ BUG_ON(!kaiser_enabled);
+#endif
return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
}
-
-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
-{
- return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
-}
#else
static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
{
@@ -126,10 +125,6 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
{
return NULL;
}
-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
-{
- return pgdp;
-}
#endif /* CONFIG_KAISER */
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 6e1315068a62..3a3c6d014696 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -39,11 +39,7 @@
#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
-#ifdef CONFIG_KAISER
-#define _PAGE_GLOBAL (_AT(pteval_t, 0))
-#else
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
-#endif
#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 288195901c8a..616eca18ed31 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -69,9 +69,11 @@ static inline void invpcid_flush_all_nonglobals(void)
* to avoid the need for asm/kaiser.h in unexpected places.
*/
#ifdef CONFIG_KAISER
+extern int kaiser_enabled;
extern void kaiser_setup_pcid(void);
extern void kaiser_flush_tlb_on_return_to_user(void);
#else
+#define kaiser_enabled 0
static inline void kaiser_setup_pcid(void)
{
}
@@ -96,7 +98,7 @@ static inline void __native_flush_tlb(void)
* back:
*/
preempt_disable();
- if (this_cpu_has(X86_FEATURE_PCID))
+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
kaiser_flush_tlb_on_return_to_user();
native_write_cr3(native_read_cr3());
preempt_enable();
@@ -104,13 +106,15 @@ static inline void __native_flush_tlb(void)
static inline void __native_flush_tlb_global(void)
{
-#ifdef CONFIG_KAISER
- /* Globals are not used at all */
- __native_flush_tlb();
-#else
unsigned long flags;
unsigned long cr4;
+ if (kaiser_enabled) {
+ /* Globals are not used at all */
+ __native_flush_tlb();
+ return;
+ }
+
if (this_cpu_has(X86_FEATURE_INVPCID)) {
/*
* Using INVPCID is considerably faster than a pair of writes
@@ -130,13 +134,16 @@ static inline void __native_flush_tlb_global(void)
raw_local_irq_save(flags);
cr4 = native_read_cr4();
- /* clear PGE */
- native_write_cr4(cr4 & ~X86_CR4_PGE);
- /* write old PGE again and flush TLBs */
- native_write_cr4(cr4);
+ if (cr4 & X86_CR4_PGE) {
+ /* clear PGE and flush TLB of all entries */
+ native_write_cr4(cr4 & ~X86_CR4_PGE);
+ /* restore PGE as it was before */
+ native_write_cr4(cr4);
+ } else {
+ native_write_cr3(native_read_cr3());
+ }
raw_local_irq_restore(flags);
-#endif
}
static inline void __native_flush_tlb_single(unsigned long addr)
@@ -151,7 +158,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
*/
if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
- if (this_cpu_has(X86_FEATURE_PCID))
+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
kaiser_flush_tlb_on_return_to_user();
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
return;
@@ -166,9 +173,9 @@ static inline void __native_flush_tlb_single(unsigned long addr)
* Make sure to do only a single invpcid when KAISER is
* disabled and we have only a single ASID.
*/
- if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
- invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
- invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
+ if (kaiser_enabled)
+ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
}
static inline void __flush_tlb_all(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b567c89fc628..dbad6b1ee60e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -171,6 +171,20 @@ static int __init x86_pcid_setup(char *s)
return 1;
}
__setup("nopcid", x86_pcid_setup);
+
+static int __init x86_nokaiser_setup(char *s)
+{
+ /* nokaiser doesn't accept parameters */
+ if (s)
+ return -EINVAL;
+#ifdef CONFIG_KAISER
+ kaiser_enabled = 0;
+ setup_clear_cpu_cap(X86_FEATURE_KAISER);
+ pr_info("nokaiser: KAISER feature disabled\n");
+#endif
+ return 0;
+}
+early_param("nokaiser", x86_nokaiser_setup);
#endif
static int __init x86_noinvpcid_setup(char *s)
@@ -313,7 +327,8 @@ static __cpuinit void setup_smep(struct cpuinfo_x86 *c)
static void setup_pcid(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_PCID)) {
- if (cpu_has(c, X86_FEATURE_PGE) && IS_ENABLED(CONFIG_X86_64)) {
+ if (IS_ENABLED(CONFIG_X86_64) &&
+ (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled)) {
/*
* Regardless of whether PCID is enumerated, the
* SDM says that it can't be enabled in 32-bit mode.
@@ -680,6 +695,10 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_power = cpuid_edx(0x80000007);
init_scattered_cpuid_features(c);
+#ifdef CONFIG_KAISER
+ if (kaiser_enabled)
+ set_cpu_cap(c, X86_FEATURE_KAISER);
+#endif
}
static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@@ -1229,6 +1248,15 @@ void __cpuinit cpu_init(void)
int cpu;
int i;
+ if (!kaiser_enabled) {
+ /*
+ * secondary_startup_64() deferred setting PGE in cr4:
+ * init_memory_mapping() sets it on the boot cpu,
+ * but it needs to be set on each secondary cpu.
+ */
+ set_in_cr4(X86_CR4_PGE);
+ }
+
cpu = stack_smp_processor_id();
t = &per_cpu(init_tss, cpu);
oist = &per_cpu(orig_ist, cpu);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 3a4356a2f156..eb6c31b6069c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,6 +56,8 @@
#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <asm/pgtable_types.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeature.h>
#include <asm/kaiser.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -404,7 +406,7 @@ ENTRY(save_paranoid)
* unconditionally, but we need to find out whether the reverse
* should be done on return (conveyed to paranoid_exit in %ebx).
*/
- movq %cr3, %rax
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
testl $KAISER_SHADOW_PGD_OFFSET, %eax
jz 2f
orl $2, %ebx
@@ -1428,6 +1430,7 @@ ENTRY(paranoid_exit)
movq %r12, %rbx /* restore after paranoid_userspace */
TRACE_IRQS_IRETQ 0
#ifdef CONFIG_KAISER
+ /* No ALTERNATIVE for X86_FEATURE_KAISER: save_paranoid sets %ebx */
testl $2, %ebx /* SWITCH_USER_CR3 needed? */
jz paranoid_exit_no_switch
SWITCH_USER_CR3
@@ -1597,6 +1600,7 @@ ENTRY(nmi)
nmi_kernel:
movq %r12, %rbx /* restore after nmi_userspace */
#ifdef CONFIG_KAISER
+ /* No ALTERNATIVE for X86_FEATURE_KAISER: save_paranoid sets %ebx */
testl $2, %ebx /* SWITCH_USER_CR3 needed? */
jz nmi_exit_no_switch
SWITCH_USER_CR3
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 14cd73b0e634..a1944faa739c 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -135,9 +135,10 @@ void __init init_espfix_bsp(void)
* area to ensure it is mapped into the shadow user page
* tables.
*/
- if (IS_ENABLED(CONFIG_KAISER))
+ if (kaiser_enabled) {
set_pgd(native_get_shadow_pgd(pgd_p),
__pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
+ }
/* Randomize the locations */
init_espfix_random();
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6e697ac3fb54..28aef29c42de 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -166,8 +166,8 @@ ENTRY(secondary_startup_64)
/* Sanitize CPU configuration */
call verify_cpu
- /* Enable PAE mode and PGE */
- movl $(X86_CR4_PAE | X86_CR4_PGE), %eax
+ /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */
+ movl $(X86_CR4_PAE | X86_CR4_PSE), %eax
movq %rax, %cr4
/* Setup early boot stage 4 level pagetables. */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index dba71b25a546..f897b97ab897 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -161,7 +161,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
set_in_cr4(X86_CR4_PSE);
/* Enable PGE if available */
- if (cpu_has_pge) {
+ if (cpu_has_pge && !kaiser_enabled) {
set_in_cr4(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL;
}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 44b93da18401..1f1b8ed9b06f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -312,6 +312,16 @@ void __init cleanup_highmap(void)
continue;
if (vaddr < (unsigned long) _text || vaddr > end)
set_pmd(pmd, __pmd(0));
+ else if (kaiser_enabled) {
+ /*
+ * level2_kernel_pgt is initialized with _PAGE_GLOBAL:
+ * clear that now. This is not important, so long as
+ * CR4.PGE remains clear, but it removes an anomaly.
+ * Physical mapping setup below avoids _PAGE_GLOBAL
+ * by use of massage_pgprot() inside pfn_pte() etc.
+ */
+ set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL));
+ }
}
}
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
index fb16d79fc07a..bef8a415048b 100644
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -21,7 +21,9 @@ extern struct mm_struct init_mm;
#include <asm/pgalloc.h>
#include <asm/desc.h>
-#ifdef CONFIG_KAISER
+int kaiser_enabled __read_mostly = 1;
+EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
+
DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
/*
@@ -165,8 +167,8 @@ static pte_t *kaiser_pagetable_walk(unsigned long address)
return pte_offset_kernel(pmd, address);
}
-int kaiser_add_user_map(const void *__start_addr, unsigned long size,
- unsigned long flags)
+static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
+ unsigned long flags)
{
int ret = 0;
pte_t *pte;
@@ -175,6 +177,15 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size,
unsigned long end_addr = PAGE_ALIGN(start_addr + size);
unsigned long target_address;
+ /*
+ * It is convenient for callers to pass in __PAGE_KERNEL etc,
+ * and there is no actual harm from setting _PAGE_GLOBAL, so
+ * long as CR4.PGE is not set. But it is nonetheless troubling
+ * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser"
+ * requires that not to be #defined to 0): so mask it off here.
+ */
+ flags &= ~_PAGE_GLOBAL;
+
if (flags & _PAGE_USER)
BUG_ON(address < FIXADDR_START || end_addr >= FIXADDR_TOP);
@@ -264,6 +275,8 @@ void __init kaiser_init(void)
{
int cpu;
+ if (!kaiser_enabled)
+ return;
kaiser_init_all_pgds();
for_each_possible_cpu(cpu) {
@@ -303,6 +316,8 @@ void __init kaiser_init(void)
/* Add a mapping to the shadow mapping, and synchronize the mappings */
int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
{
+ if (!kaiser_enabled)
+ return 0;
return kaiser_add_user_map((const void *)addr, size, flags);
}
@@ -312,6 +327,8 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size)
unsigned long addr;
pte_t *pte;
+ if (!kaiser_enabled)
+ return;
for (addr = start; addr < end; addr += PAGE_SIZE) {
pte = kaiser_pagetable_walk(addr);
if (pte)
@@ -333,6 +350,8 @@ static inline bool is_userspace_pgd(pgd_t *pgdp)
pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
{
+ if (!kaiser_enabled)
+ return pgd;
/*
* Do we need to also populate the shadow pgd? Check _PAGE_USER to
* skip cases like kexec and EFI which make temporary low mappings.
@@ -389,4 +408,3 @@ void kaiser_flush_tlb_on_return_to_user(void)
X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
}
EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
-#endif /* CONFIG_KAISER */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 73285602c93f..0b246ef0fa48 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -253,16 +253,12 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
}
}
-#ifdef CONFIG_KAISER
/*
- * Instead of one pmd, we aquire two pmds. Being order-1, it is
+ * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is
* both 8k in size and 8k-aligned. That lets us just flip bit 12
* in a pointer to swap between the two 4k halves.
*/
-#define PGD_ALLOCATION_ORDER 1
-#else
-#define PGD_ALLOCATION_ORDER 0
-#endif
+#define PGD_ALLOCATION_ORDER kaiser_enabled
static inline pgd_t *_pgd_alloc(void)
{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index bd0ff4eb74a0..34515ec300b8 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -21,8 +21,7 @@ static void load_new_mm_cr3(pgd_t *pgdir)
{
unsigned long new_mm_cr3 = __pa(pgdir);
-#ifdef CONFIG_KAISER
- if (this_cpu_has(X86_FEATURE_PCID)) {
+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
/*
* We reuse the same PCID for different tasks, so we must
* flush all the entries for the PCID out when we change tasks.
@@ -39,7 +38,6 @@ static void load_new_mm_cr3(pgd_t *pgdir)
new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
kaiser_flush_tlb_on_return_to_user();
}
-#endif /* CONFIG_KAISER */
/*
* Caution: many callers of this function expect