patches taken directly from the stable-queue git repository

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
diff --git a/queue/array_index_nospec-sanitize-speculative-array-de-references.patch b/queue/array_index_nospec-sanitize-speculative-array-de-references.patch
new file mode 100644
index 0000000..99fe9a2
--- /dev/null
+++ b/queue/array_index_nospec-sanitize-speculative-array-de-references.patch
@@ -0,0 +1,117 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:22 -0800
+Subject: array_index_nospec: Sanitize speculative array de-references
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit f3804203306e098dae9ca51540fcd5eb700d7f40)
+
+array_index_nospec() is proposed as a generic mechanism to mitigate
+against Spectre-variant-1 attacks, i.e. an attack that bypasses boundary
+checks via speculative execution. The array_index_nospec()
+implementation is expected to be safe for current generation CPUs across
+multiple architectures (ARM, x86).
+
+Based on an original implementation by Linus Torvalds, tweaked to remove
+speculative flows by Alexei Starovoitov, and tweaked again by Linus to
+introduce an x86 assembly implementation for the mask generation.
+
+Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
+Co-developed-by: Alexei Starovoitov <ast@kernel.org>
+Suggested-by: Cyril Novikov <cnovikov@lynx.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Russell King <linux@armlinux.org.uk>
+Cc: gregkh@linuxfoundation.org
+Cc: torvalds@linux-foundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727414229.33451.18411580953862676575.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/nospec.h |   72 +++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 72 insertions(+)
+ create mode 100644 include/linux/nospec.h
+
+--- /dev/null
++++ b/include/linux/nospec.h
+@@ -0,0 +1,72 @@
++// SPDX-License-Identifier: GPL-2.0
++// Copyright(c) 2018 Linus Torvalds. All rights reserved.
++// Copyright(c) 2018 Alexei Starovoitov. All rights reserved.
++// Copyright(c) 2018 Intel Corporation. All rights reserved.
++
++#ifndef _LINUX_NOSPEC_H
++#define _LINUX_NOSPEC_H
++
++/**
++ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * When @index is out of bounds (@index >= @size), the sign bit will be
++ * set.  Extend the sign bit to all bits and invert, giving a result of
++ * zero for an out of bounds index, or ~0 if within bounds [0, @size).
++ */
++#ifndef array_index_mask_nospec
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++						    unsigned long size)
++{
++	/*
++	 * Warn developers about inappropriate array_index_nospec() usage.
++	 *
++	 * Even if the CPU speculates past the WARN_ONCE branch, the
++	 * sign bit of @index is taken into account when generating the
++	 * mask.
++	 *
++	 * This warning is compiled out when the compiler can infer that
++	 * @index and @size are less than LONG_MAX.
++	 */
++	if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
++			"array_index_nospec() limited to range of [0, LONG_MAX]\n"))
++		return 0;
++
++	/*
++	 * Always calculate and emit the mask even if the compiler
++	 * thinks the mask is not needed. The compiler does not take
++	 * into account the value of @index under speculation.
++	 */
++	OPTIMIZER_HIDE_VAR(index);
++	return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1);
++}
++#endif
++
++/*
++ * array_index_nospec - sanitize an array index after a bounds check
++ *
++ * For a code sequence like:
++ *
++ *     if (index < size) {
++ *         index = array_index_nospec(index, size);
++ *         val = array[index];
++ *     }
++ *
++ * ...if the CPU speculates past the bounds check then
++ * array_index_nospec() will clamp the index within the range of [0,
++ * size).
++ */
++#define array_index_nospec(index, size)					\
++({									\
++	typeof(index) _i = (index);					\
++	typeof(size) _s = (size);					\
++	unsigned long _mask = array_index_mask_nospec(_i, _s);		\
++									\
++	BUILD_BUG_ON(sizeof(_i) > sizeof(long));			\
++	BUILD_BUG_ON(sizeof(_s) > sizeof(long));			\
++									\
++	_i &= _mask;							\
++	_i;								\
++})
++#endif /* _LINUX_NOSPEC_H */
diff --git a/queue/documentation-document-array_index_nospec.patch b/queue/documentation-document-array_index_nospec.patch
new file mode 100644
index 0000000..5bad41f
--- /dev/null
+++ b/queue/documentation-document-array_index_nospec.patch
@@ -0,0 +1,125 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Mon, 29 Jan 2018 17:02:16 -0800
+Subject: Documentation: Document array_index_nospec
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+
+(cherry picked from commit f84a56f73dddaeac1dba8045b007f742f61cd2da)
+
+Document the rationale and usage of the new array_index_nospec() helper.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: linux-arch@vger.kernel.org
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: gregkh@linuxfoundation.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: torvalds@linux-foundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727413645.33451.15878817161436755393.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/speculation.txt |   90 ++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 90 insertions(+)
+ create mode 100644 Documentation/speculation.txt
+
+--- /dev/null
++++ b/Documentation/speculation.txt
+@@ -0,0 +1,90 @@
++This document explains potential effects of speculation, and how undesirable
++effects can be mitigated portably using common APIs.
++
++===========
++Speculation
++===========
++
++To improve performance and minimize average latencies, many contemporary CPUs
++employ speculative execution techniques such as branch prediction, performing
++work which may be discarded at a later stage.
++
++Typically speculative execution cannot be observed from architectural state,
++such as the contents of registers. However, in some cases it is possible to
++observe its impact on microarchitectural state, such as the presence or
++absence of data in caches. Such state may form side-channels which can be
++observed to extract secret information.
++
++For example, in the presence of branch prediction, it is possible for bounds
++checks to be ignored by code which is speculatively executed. Consider the
++following code:
++
++	int load_array(int *array, unsigned int index)
++	{
++		if (index >= MAX_ARRAY_ELEMS)
++			return 0;
++		else
++			return array[index];
++	}
++
++Which, on arm64, may be compiled to an assembly sequence such as:
++
++	CMP	<index>, #MAX_ARRAY_ELEMS
++	B.LT	less
++	MOV	<returnval>, #0
++	RET
++  less:
++	LDR	<returnval>, [<array>, <index>]
++	RET
++
++It is possible that a CPU mis-predicts the conditional branch, and
++speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This
++value will subsequently be discarded, but the speculated load may affect
++microarchitectural state which can be subsequently measured.
++
++More complex sequences involving multiple dependent memory accesses may
++result in sensitive information being leaked. Consider the following
++code, building on the prior example:
++
++	int load_dependent_arrays(int *arr1, int *arr2, int index)
++	{
++		int val1, val2,
++
++		val1 = load_array(arr1, index);
++		val2 = load_array(arr2, val1);
++
++		return val2;
++	}
++
++Under speculation, the first call to load_array() may return the value
++of an out-of-bounds address, while the second call will influence
++microarchitectural state dependent on this value. This may provide an
++arbitrary read primitive.
++
++====================================
++Mitigating speculation side-channels
++====================================
++
++The kernel provides a generic API to ensure that bounds checks are
++respected even under speculation. Architectures which are affected by
++speculation-based side-channels are expected to implement these
++primitives.
++
++The array_index_nospec() helper in <linux/nospec.h> can be used to
++prevent information from being leaked via side-channels.
++
++A call to array_index_nospec(index, size) returns a sanitized index
++value that is bounded to [0, size) even under cpu speculation
++conditions.
++
++This can be used to protect the earlier load_array() example:
++
++	int load_array(int *array, unsigned int index)
++	{
++		if (index >= MAX_ARRAY_ELEMS)
++			return 0;
++		else {
++			index = array_index_nospec(index, MAX_ARRAY_ELEMS);
++			return array[index];
++		}
++	}
diff --git a/queue/kaiser-add-nokaiser-boot-option-using-alternative.patch b/queue/kaiser-add-nokaiser-boot-option-using-alternative.patch
new file mode 100644
index 0000000..b8d1005
--- /dev/null
+++ b/queue/kaiser-add-nokaiser-boot-option-using-alternative.patch
@@ -0,0 +1,652 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 24 Sep 2017 16:59:49 -0700
+Subject: kaiser: add "nokaiser" boot option, using ALTERNATIVE
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Added "nokaiser" boot option: an early param like "noinvpcid".
+Most places now check int kaiser_enabled (#defined 0 when not
+CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S
+and entry_64_compat.S are using the ALTERNATIVE technique, which
+patches in the preferred instructions at runtime.  That technique
+is tied to x86 cpu features, so X86_FEATURE_KAISER is fabricated.
+
+Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that,
+but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when
+nokaiser like when !CONFIG_KAISER, but not setting either when kaiser -
+neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL
+won't get set in some obscure corner, or something add PGE into CR4.
+By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled,
+all page table setup which uses pte_pfn() masks it out of the ptes.
+
+It's slightly shameful that the same declaration versus definition of
+kaiser_enabled appears in not one, not two, but in three header files
+(asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h).  I felt safer that way,
+than with #including any of those in any of the others; and did not
+feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes
+them all, so we shall hear about it if they get out of synch.
+
+Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER
+from kaiser.c; removed the unused native_get_normal_pgd(); removed
+the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some
+comments.  But more interestingly, set CR4.PSE in secondary_startup_64:
+the manual is clear that it does not matter whether it's 0 or 1 when
+4-level-pts are enabled, but I was distracted to find cr4 different on
+BSP and auxiliaries - BSP alone was adding PSE, in probe_page_size_mask().
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt      |    2 +
+ arch/x86/entry/entry_64.S                |   15 ++++++-----
+ arch/x86/include/asm/cpufeatures.h       |    3 ++
+ arch/x86/include/asm/kaiser.h            |   27 +++++++++++++++------
+ arch/x86/include/asm/pgtable.h           |   20 +++++++++++----
+ arch/x86/include/asm/pgtable_64.h        |   13 +++-------
+ arch/x86/include/asm/pgtable_types.h     |    4 ---
+ arch/x86/include/asm/tlbflush.h          |   39 +++++++++++++++++++------------
+ arch/x86/kernel/cpu/common.c             |   28 +++++++++++++++++++++-
+ arch/x86/kernel/espfix_64.c              |    3 +-
+ arch/x86/kernel/head_64.S                |    4 +--
+ arch/x86/mm/init.c                       |    2 -
+ arch/x86/mm/init_64.c                    |   10 +++++++
+ arch/x86/mm/kaiser.c                     |   26 +++++++++++++++++---
+ arch/x86/mm/pgtable.c                    |    8 +-----
+ arch/x86/mm/tlb.c                        |    4 ---
+ tools/arch/x86/include/asm/cpufeatures.h |    3 ++
+ 17 files changed, 146 insertions(+), 65 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2763,6 +2763,8 @@ bytes respectively. Such letter suffixes
+ 
+ 	nojitter	[IA-64] Disables jitter checking for ITC timers.
+ 
++	nokaiser	[X86-64] Disable KAISER isolation of kernel from user.
++
+ 	no-kvmclock	[X86,KVM] Disable paravirtualized KVM clock driver
+ 
+ 	no-kvmapf	[X86,KVM] Disable paravirtualized asynchronous page
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1079,7 +1079,7 @@ ENTRY(paranoid_entry)
+ 	 * unconditionally, but we need to find out whether the reverse
+ 	 * should be done on return (conveyed to paranoid_exit in %ebx).
+ 	 */
+-	movq	%cr3, %rax
++	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ 	testl	$KAISER_SHADOW_PGD_OFFSET, %eax
+ 	jz	2f
+ 	orl	$2, %ebx
+@@ -1111,6 +1111,7 @@ ENTRY(paranoid_exit)
+ 	TRACE_IRQS_OFF_DEBUG
+ 	TRACE_IRQS_IRETQ_DEBUG
+ #ifdef CONFIG_KAISER
++	/* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
+ 	testl	$2, %ebx			/* SWITCH_USER_CR3 needed? */
+ 	jz	paranoid_exit_no_switch
+ 	SWITCH_USER_CR3
+@@ -1341,13 +1342,14 @@ ENTRY(nmi)
+ #ifdef CONFIG_KAISER
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+-	movq	%cr3, %rax
++	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ 	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ 	orq	x86_cr3_pcid_noflush, %rax
+ 	pushq	%rax
+ 	/* mask off "user" bit of pgd address and 12 PCID bits: */
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ 	movq	%rax, %cr3
++2:
+ #endif
+ 	call	do_nmi
+ 
+@@ -1357,8 +1359,7 @@ ENTRY(nmi)
+ 	 * kernel code that needs user CR3, but do we ever return
+ 	 * to "user mode" where we need the kernel CR3?
+ 	 */
+-	popq	%rax
+-	mov	%rax, %cr3
++	ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+ #endif
+ 
+ 	/*
+@@ -1585,13 +1586,14 @@ end_repeat_nmi:
+ #ifdef CONFIG_KAISER
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+-	movq	%cr3, %rax
++	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ 	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ 	orq	x86_cr3_pcid_noflush, %rax
+ 	pushq	%rax
+ 	/* mask off "user" bit of pgd address and 12 PCID bits: */
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ 	movq	%rax, %cr3
++2:
+ #endif
+ 
+ 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+@@ -1603,8 +1605,7 @@ end_repeat_nmi:
+ 	 * kernel code that needs user CR3, like just just before
+ 	 * a sysret.
+ 	 */
+-	popq	%rax
+-	mov	%rax, %cr3
++	ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+ #endif
+ 
+ 	testl	%ebx, %ebx			/* swapgs needed? */
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -198,6 +198,9 @@
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ 
++/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
++#define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
++
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+ #define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -46,28 +46,33 @@ movq \reg, %cr3
+ .endm
+ 
+ .macro SWITCH_KERNEL_CR3
+-pushq %rax
++ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+ _SWITCH_TO_KERNEL_CR3 %rax
+ popq %rax
++8:
+ .endm
+ 
+ .macro SWITCH_USER_CR3
+-pushq %rax
++ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+ _SWITCH_TO_USER_CR3 %rax %al
+ popq %rax
++8:
+ .endm
+ 
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+-movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++ALTERNATIVE "jmp 8f", \
++	__stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \
++	X86_FEATURE_KAISER
+ _SWITCH_TO_KERNEL_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++8:
+ .endm
+ 
+ #else /* CONFIG_KAISER */
+ 
+-.macro SWITCH_KERNEL_CR3 reg
++.macro SWITCH_KERNEL_CR3
+ .endm
+-.macro SWITCH_USER_CR3 reg regb
++.macro SWITCH_USER_CR3
+ .endm
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+ .endm
+@@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_p
+ 
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+ 
++extern int kaiser_enabled;
++#else
++#define kaiser_enabled	0
++#endif /* CONFIG_KAISER */
++
++/*
++ * Kaiser function prototypes are needed even when CONFIG_KAISER is not set,
++ * so as to build with tests on kaiser_enabled instead of #ifdefs.
++ */
++
+ /**
+  *  kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+  *  @addr: the start address of the range
+@@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsign
+  */
+ extern void kaiser_init(void);
+ 
+-#endif /* CONFIG_KAISER */
+-
+ #endif /* __ASSEMBLY */
+ 
+ #endif /* _ASM_X86_KAISER_H */
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -18,6 +18,12 @@
+ #ifndef __ASSEMBLY__
+ #include <asm/x86_init.h>
+ 
++#ifdef CONFIG_KAISER
++extern int kaiser_enabled;
++#else
++#define kaiser_enabled 0
++#endif
++
+ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+ void ptdump_walk_pgd_level_checkwx(void);
+ 
+@@ -697,7 +703,7 @@ static inline int pgd_bad(pgd_t pgd)
+ 	 * page table by accident; it will fault on the first
+ 	 * instruction it tries to run.  See native_set_pgd().
+ 	 */
+-	if (IS_ENABLED(CONFIG_KAISER))
++	if (kaiser_enabled)
+ 		ignore_flags |= _PAGE_NX;
+ 
+ 	return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
+@@ -913,12 +919,14 @@ static inline void pmdp_set_wrprotect(st
+  */
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+-       memcpy(dst, src, count * sizeof(pgd_t));
++	memcpy(dst, src, count * sizeof(pgd_t));
+ #ifdef CONFIG_KAISER
+-	/* Clone the shadow pgd part as well */
+-	memcpy(native_get_shadow_pgd(dst),
+-	       native_get_shadow_pgd(src),
+-	       count * sizeof(pgd_t));
++	if (kaiser_enabled) {
++		/* Clone the shadow pgd part as well */
++		memcpy(native_get_shadow_pgd(dst),
++			native_get_shadow_pgd(src),
++			count * sizeof(pgd_t));
++	}
+ #endif
+ }
+ 
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -111,13 +111,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t
+ 
+ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+ {
++#ifdef CONFIG_DEBUG_VM
++	/* linux/mmdebug.h may not have been included at this point */
++	BUG_ON(!kaiser_enabled);
++#endif
+ 	return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
+ }
+-
+-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+-{
+-	return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
+-}
+ #else
+ static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+@@ -128,10 +127,6 @@ static inline pgd_t *native_get_shadow_p
+ 	BUILD_BUG_ON(1);
+ 	return NULL;
+ }
+-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+-{
+-	return pgdp;
+-}
+ #endif /* CONFIG_KAISER */
+ 
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -45,11 +45,7 @@
+ #define _PAGE_ACCESSED	(_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+ #define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+ #define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+-#ifdef CONFIG_KAISER
+-#define _PAGE_GLOBAL	(_AT(pteval_t, 0))
+-#else
+ #define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+-#endif
+ #define _PAGE_SOFTW1	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_SOFTW2	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
+ #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -137,9 +137,11 @@ static inline void cr4_set_bits_and_upda
+  * to avoid the need for asm/kaiser.h in unexpected places.
+  */
+ #ifdef CONFIG_KAISER
++extern int kaiser_enabled;
+ extern void kaiser_setup_pcid(void);
+ extern void kaiser_flush_tlb_on_return_to_user(void);
+ #else
++#define kaiser_enabled 0
+ static inline void kaiser_setup_pcid(void)
+ {
+ }
+@@ -164,7 +166,7 @@ static inline void __native_flush_tlb(vo
+ 	 * back:
+ 	 */
+ 	preempt_disable();
+-	if (this_cpu_has(X86_FEATURE_PCID))
++	if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
+ 		kaiser_flush_tlb_on_return_to_user();
+ 	native_write_cr3(native_read_cr3());
+ 	preempt_enable();
+@@ -175,20 +177,30 @@ static inline void __native_flush_tlb_gl
+ 	unsigned long cr4;
+ 
+ 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
+-	/* clear PGE */
+-	native_write_cr4(cr4 & ~X86_CR4_PGE);
+-	/* write old PGE again and flush TLBs */
+-	native_write_cr4(cr4);
++	if (cr4 & X86_CR4_PGE) {
++		/* clear PGE and flush TLB of all entries */
++		native_write_cr4(cr4 & ~X86_CR4_PGE);
++		/* restore PGE as it was before */
++		native_write_cr4(cr4);
++	} else {
++		/*
++		 * x86_64 microcode update comes this way when CR4.PGE is not
++		 * enabled, and it's safer for all callers to allow this case.
++		 */
++		native_write_cr3(native_read_cr3());
++	}
+ }
+ 
+ static inline void __native_flush_tlb_global(void)
+ {
+-#ifdef CONFIG_KAISER
+-	/* Globals are not used at all */
+-	__native_flush_tlb();
+-#else
+ 	unsigned long flags;
+ 
++	if (kaiser_enabled) {
++		/* Globals are not used at all */
++		__native_flush_tlb();
++		return;
++	}
++
+ 	if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ 		/*
+ 		 * Using INVPCID is considerably faster than a pair of writes
+@@ -208,7 +220,6 @@ static inline void __native_flush_tlb_gl
+ 	raw_local_irq_save(flags);
+ 	__native_flush_tlb_global_irq_disabled();
+ 	raw_local_irq_restore(flags);
+-#endif
+ }
+ 
+ static inline void __native_flush_tlb_single(unsigned long addr)
+@@ -223,7 +234,7 @@ static inline void __native_flush_tlb_si
+ 	 */
+ 
+ 	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+-		if (this_cpu_has(X86_FEATURE_PCID))
++		if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
+ 			kaiser_flush_tlb_on_return_to_user();
+ 		asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ 		return;
+@@ -238,9 +249,9 @@ static inline void __native_flush_tlb_si
+ 	 * Make sure to do only a single invpcid when KAISER is
+ 	 * disabled and we have only a single ASID.
+ 	 */
+-	if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
+-		invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+-	invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
++	if (kaiser_enabled)
++		invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
++	invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+ }
+ 
+ static inline void __flush_tlb_all(void)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -179,6 +179,20 @@ static int __init x86_pcid_setup(char *s
+ 	return 1;
+ }
+ __setup("nopcid", x86_pcid_setup);
++
++static int __init x86_nokaiser_setup(char *s)
++{
++	/* nokaiser doesn't accept parameters */
++	if (s)
++		return -EINVAL;
++#ifdef CONFIG_KAISER
++	kaiser_enabled = 0;
++	setup_clear_cpu_cap(X86_FEATURE_KAISER);
++	pr_info("nokaiser: KAISER feature disabled\n");
++#endif
++	return 0;
++}
++early_param("nokaiser", x86_nokaiser_setup);
+ #endif
+ 
+ static int __init x86_noinvpcid_setup(char *s)
+@@ -327,7 +341,7 @@ static __always_inline void setup_smap(s
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ 	if (cpu_has(c, X86_FEATURE_PCID)) {
+-		if (cpu_has(c, X86_FEATURE_PGE)) {
++		if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) {
+ 			cr4_set_bits(X86_CR4_PCIDE);
+ 			/*
+ 			 * INVPCID has two "groups" of types:
+@@ -799,6 +813,10 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ 		c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+ 
+ 	init_scattered_cpuid_features(c);
++#ifdef CONFIG_KAISER
++	if (kaiser_enabled)
++		set_cpu_cap(c, X86_FEATURE_KAISER);
++#endif
+ }
+ 
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+@@ -1537,6 +1555,14 @@ void cpu_init(void)
+ 	 * try to read it.
+ 	 */
+ 	cr4_init_shadow();
++	if (!kaiser_enabled) {
++		/*
++		 * secondary_startup_64() deferred setting PGE in cr4:
++		 * probe_page_size_mask() sets it on the boot cpu,
++		 * but it needs to be set on each secondary cpu.
++		 */
++		cr4_set_bits(X86_CR4_PGE);
++	}
+ 
+ 	/*
+ 	 * Load microcode on this cpu if a valid microcode is available.
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -132,9 +132,10 @@ void __init init_espfix_bsp(void)
+ 	 * area to ensure it is mapped into the shadow user page
+ 	 * tables.
+ 	 */
+-	if (IS_ENABLED(CONFIG_KAISER))
++	if (kaiser_enabled) {
+ 		set_pgd(native_get_shadow_pgd(pgd_p),
+ 			__pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
++	}
+ 
+ 	/* Randomize the locations */
+ 	init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -190,8 +190,8 @@ ENTRY(secondary_startup_64)
+ 	movq	$(init_level4_pgt - __START_KERNEL_map), %rax
+ 1:
+ 
+-	/* Enable PAE mode and PGE */
+-	movl	$(X86_CR4_PAE | X86_CR4_PGE), %ecx
++	/* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */
++	movl	$(X86_CR4_PAE | X86_CR4_PSE), %ecx
+ 	movq	%rcx, %cr4
+ 
+ 	/* Setup early boot stage 4 level pagetables. */
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -177,7 +177,7 @@ static void __init probe_page_size_mask(
+ 		cr4_set_bits_and_update_boot(X86_CR4_PSE);
+ 
+ 	/* Enable PGE if available */
+-	if (boot_cpu_has(X86_FEATURE_PGE)) {
++	if (boot_cpu_has(X86_FEATURE_PGE) && !kaiser_enabled) {
+ 		cr4_set_bits_and_update_boot(X86_CR4_PGE);
+ 		__supported_pte_mask |= _PAGE_GLOBAL;
+ 	} else
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -324,6 +324,16 @@ void __init cleanup_highmap(void)
+ 			continue;
+ 		if (vaddr < (unsigned long) _text || vaddr > end)
+ 			set_pmd(pmd, __pmd(0));
++		else if (kaiser_enabled) {
++			/*
++			 * level2_kernel_pgt is initialized with _PAGE_GLOBAL:
++			 * clear that now.  This is not important, so long as
++			 * CR4.PGE remains clear, but it removes an anomaly.
++			 * Physical mapping setup below avoids _PAGE_GLOBAL
++			 * by use of massage_pgprot() inside pfn_pte() etc.
++			 */
++			set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL));
++		}
+ 	}
+ }
+ 
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -16,7 +16,9 @@
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
+ 
+-#ifdef CONFIG_KAISER
++int kaiser_enabled __read_mostly = 1;
++EXPORT_SYMBOL(kaiser_enabled);	/* for inlined TLB flush functions */
++
+ __visible
+ DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ 
+@@ -167,8 +169,8 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 	return pte_offset_kernel(pmd, address);
+ }
+ 
+-int kaiser_add_user_map(const void *__start_addr, unsigned long size,
+-			unsigned long flags)
++static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
++			       unsigned long flags)
+ {
+ 	int ret = 0;
+ 	pte_t *pte;
+@@ -177,6 +179,15 @@ int kaiser_add_user_map(const void *__st
+ 	unsigned long end_addr = PAGE_ALIGN(start_addr + size);
+ 	unsigned long target_address;
+ 
++	/*
++	 * It is convenient for callers to pass in __PAGE_KERNEL etc,
++	 * and there is no actual harm from setting _PAGE_GLOBAL, so
++	 * long as CR4.PGE is not set.  But it is nonetheless troubling
++	 * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser"
++	 * requires that not to be #defined to 0): so mask it off here.
++	 */
++	flags &= ~_PAGE_GLOBAL;
++
+ 	for (; address < end_addr; address += PAGE_SIZE) {
+ 		target_address = get_pa_from_mapping(address);
+ 		if (target_address == -1) {
+@@ -263,6 +274,8 @@ void __init kaiser_init(void)
+ {
+ 	int cpu;
+ 
++	if (!kaiser_enabled)
++		return;
+ 	kaiser_init_all_pgds();
+ 
+ 	for_each_possible_cpu(cpu) {
+@@ -311,6 +324,8 @@ void __init kaiser_init(void)
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+ {
++	if (!kaiser_enabled)
++		return 0;
+ 	return kaiser_add_user_map((const void *)addr, size, flags);
+ }
+ 
+@@ -322,6 +337,8 @@ void kaiser_remove_mapping(unsigned long
+ 	unsigned long addr, next;
+ 	pgd_t *pgd;
+ 
++	if (!kaiser_enabled)
++		return;
+ 	pgd = native_get_shadow_pgd(pgd_offset_k(start));
+ 	for (addr = start; addr < end; pgd++, addr = next) {
+ 		next = pgd_addr_end(addr, end);
+@@ -343,6 +360,8 @@ static inline bool is_userspace_pgd(pgd_
+ 
+ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
++	if (!kaiser_enabled)
++		return pgd;
+ 	/*
+ 	 * Do we need to also populate the shadow pgd?  Check _PAGE_USER to
+ 	 * skip cases like kexec and EFI which make temporary low mappings.
+@@ -399,4 +418,3 @@ void kaiser_flush_tlb_on_return_to_user(
+ 			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+-#endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -345,16 +345,12 @@ static inline void _pgd_free(pgd_t *pgd)
+ }
+ #else
+ 
+-#ifdef CONFIG_KAISER
+ /*
+- * Instead of one pmd, we aquire two pmds.  Being order-1, it is
++ * Instead of one pgd, Kaiser acquires two pgds.  Being order-1, it is
+  * both 8k in size and 8k-aligned.  That lets us just flip bit 12
+  * in a pointer to swap between the two 4k halves.
+  */
+-#define PGD_ALLOCATION_ORDER 1
+-#else
+-#define PGD_ALLOCATION_ORDER 0
+-#endif
++#define PGD_ALLOCATION_ORDER	kaiser_enabled
+ 
+ static inline pgd_t *_pgd_alloc(void)
+ {
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -39,8 +39,7 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ 	unsigned long new_mm_cr3 = __pa(pgdir);
+ 
+-#ifdef CONFIG_KAISER
+-	if (this_cpu_has(X86_FEATURE_PCID)) {
++	if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
+ 		/*
+ 		 * We reuse the same PCID for different tasks, so we must
+ 		 * flush all the entries for the PCID out when we change tasks.
+@@ -57,7 +56,6 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ 		new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ 		kaiser_flush_tlb_on_return_to_user();
+ 	}
+-#endif /* CONFIG_KAISER */
+ 
+ 	/*
+ 	 * Caution: many callers of this function expect
+--- a/tools/arch/x86/include/asm/cpufeatures.h
++++ b/tools/arch/x86/include/asm/cpufeatures.h
+@@ -197,6 +197,9 @@
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ 
++/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
++#define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
++
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+ #define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
diff --git a/queue/kaiser-align-addition-to-x86-mm-makefile.patch b/queue/kaiser-align-addition-to-x86-mm-makefile.patch
new file mode 100644
index 0000000..d8217dd
--- /dev/null
+++ b/queue/kaiser-align-addition-to-x86-mm-makefile.patch
@@ -0,0 +1,26 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 19:51:10 -0700
+Subject: kaiser: align addition to x86/mm/Makefile
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Use tab not space so they line up properly, kaslr.o also.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/Makefile |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -37,5 +37,5 @@ obj-$(CONFIG_NUMA_EMU)		+= numa_emulatio
+ 
+ obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
+ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+-obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+-obj-$(CONFIG_KAISER) += kaiser.o
++obj-$(CONFIG_RANDOMIZE_MEMORY)	+= kaslr.o
++obj-$(CONFIG_KAISER)		+= kaiser.o
diff --git a/queue/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch b/queue/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
new file mode 100644
index 0000000..2d9ee7e
--- /dev/null
+++ b/queue/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
@@ -0,0 +1,86 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 4 Nov 2017 18:23:24 -0700
+Subject: kaiser: asm/tlbflush.h handle noPGE at lower level
+
+From: Hugh Dickins <hughd@google.com>
+
+
+I found asm/tlbflush.h too twisty, and think it safer not to avoid
+__native_flush_tlb_global_irq_disabled() in the kaiser_enabled case,
+but instead let it handle kaiser_enabled along with cr3: it can just
+use __native_flush_tlb() for that, no harm in re-disabling preemption.
+
+(This is not the same change as Kirill and Dave have suggested for
+upstream, flipping PGE in cr4: that's neat, but needs a cpu_has_pge
+check; cr3 is enough for kaiser, and thought to be cheaper than cr4.)
+
+Also delete the X86_FEATURE_INVPCID invpcid_flush_all_nonglobals()
+preference from __native_flush_tlb(): unlike the invpcid_flush_all()
+preference in __native_flush_tlb_global(), it's not seen in upstream
+4.14, and was recently reported to be surprisingly slow.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/tlbflush.h |   27 +++------------------------
+ 1 file changed, 3 insertions(+), 24 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -152,14 +152,6 @@ static inline void kaiser_flush_tlb_on_r
+ 
+ static inline void __native_flush_tlb(void)
+ {
+-	if (this_cpu_has(X86_FEATURE_INVPCID)) {
+-		/*
+-		 * Note, this works with CR4.PCIDE=0 or 1.
+-		 */
+-		invpcid_flush_all_nonglobals();
+-		return;
+-	}
+-
+ 	/*
+ 	 * If current->mm == NULL then we borrow a mm which may change during a
+ 	 * task switch and therefore we must not be preempted while we write CR3
+@@ -183,11 +175,8 @@ static inline void __native_flush_tlb_gl
+ 		/* restore PGE as it was before */
+ 		native_write_cr4(cr4);
+ 	} else {
+-		/*
+-		 * x86_64 microcode update comes this way when CR4.PGE is not
+-		 * enabled, and it's safer for all callers to allow this case.
+-		 */
+-		native_write_cr3(native_read_cr3());
++		/* do it with cr3, letting kaiser flush user PCID */
++		__native_flush_tlb();
+ 	}
+ }
+ 
+@@ -195,12 +184,6 @@ static inline void __native_flush_tlb_gl
+ {
+ 	unsigned long flags;
+ 
+-	if (kaiser_enabled) {
+-		/* Globals are not used at all */
+-		__native_flush_tlb();
+-		return;
+-	}
+-
+ 	if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ 		/*
+ 		 * Using INVPCID is considerably faster than a pair of writes
+@@ -256,11 +239,7 @@ static inline void __native_flush_tlb_si
+ 
+ static inline void __flush_tlb_all(void)
+ {
+-	if (boot_cpu_has(X86_FEATURE_PGE))
+-		__flush_tlb_global();
+-	else
+-		__flush_tlb();
+-
++	__flush_tlb_global();
+ 	/*
+ 	 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+ 	 * we'd end up flushing kernel translations for the current ASID but
diff --git a/queue/kaiser-cleanups-while-trying-for-gold-link.patch b/queue/kaiser-cleanups-while-trying-for-gold-link.patch
new file mode 100644
index 0000000..5e95f5b
--- /dev/null
+++ b/queue/kaiser-cleanups-while-trying-for-gold-link.patch
@@ -0,0 +1,134 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 21 Aug 2017 20:11:43 -0700
+Subject: kaiser: cleanups while trying for gold link
+
+From: Hugh Dickins <hughd@google.com>
+
+
+While trying to get our gold link to work, four cleanups:
+matched the gdt_page declaration to its definition;
+in fiddling unsuccessfully with PERCPU_INPUT(), lined up backslashes;
+lined up the backslashes according to convention in percpu-defs.h;
+deleted the unused irq_stack_pointer addition to irq_stack_union.
+
+Sad to report that aligning backslashes does not appear to help gold
+align to 8192: but while these did not help, they are worth keeping.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/desc.h       |    2 +-
+ arch/x86/include/asm/processor.h  |    5 -----
+ include/asm-generic/vmlinux.lds.h |   18 ++++++++----------
+ include/linux/percpu-defs.h       |   24 ++++++++++++------------
+ 4 files changed, 21 insertions(+), 28 deletions(-)
+
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -43,7 +43,7 @@ struct gdt_page {
+ 	struct desc_struct gdt[GDT_ENTRIES];
+ } __attribute__((aligned(PAGE_SIZE)));
+ 
+-DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
++DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page);
+ 
+ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+ {
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -335,11 +335,6 @@ union irq_stack_union {
+ 		char gs_base[40];
+ 		unsigned long stack_canary;
+ 	};
+-
+-	struct {
+-		char irq_stack_pointer[64];
+-		char unused[IRQ_STACK_SIZE - 64];
+-	};
+ };
+ 
+ DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -778,16 +778,14 @@
+  */
+ #define PERCPU_INPUT(cacheline)						\
+ 	VMLINUX_SYMBOL(__per_cpu_start) = .;				\
+-	\
+-	VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .;        \
+-	*(.data..percpu..first)           \
+-	. = ALIGN(cacheline);           \
+-	*(.data..percpu..user_mapped)            \
+-	*(.data..percpu..user_mapped..shared_aligned)        \
+-	. = ALIGN(PAGE_SIZE);           \
+-	*(.data..percpu..user_mapped..page_aligned)          \
+-	VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .;        \
+-	\
++	VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .;		\
++	*(.data..percpu..first)						\
++	. = ALIGN(cacheline);						\
++	*(.data..percpu..user_mapped)					\
++	*(.data..percpu..user_mapped..shared_aligned)			\
++	. = ALIGN(PAGE_SIZE);						\
++	*(.data..percpu..user_mapped..page_aligned)			\
++	VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .;			\
+ 	. = ALIGN(PAGE_SIZE);						\
+ 	*(.data..percpu..page_aligned)					\
+ 	. = ALIGN(cacheline);						\
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -121,10 +121,10 @@
+ #define DEFINE_PER_CPU(type, name)					\
+ 	DEFINE_PER_CPU_SECTION(type, name, "")
+ 
+-#define DECLARE_PER_CPU_USER_MAPPED(type, name)         \
++#define DECLARE_PER_CPU_USER_MAPPED(type, name)				\
+ 	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+ 
+-#define DEFINE_PER_CPU_USER_MAPPED(type, name)          \
++#define DEFINE_PER_CPU_USER_MAPPED(type, name)				\
+ 	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+ 
+ /*
+@@ -156,11 +156,11 @@
+ 	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
+ 	____cacheline_aligned_in_smp
+ 
+-#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)			\
++#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)		\
+ 	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+ 	____cacheline_aligned_in_smp
+ 
+-#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)			\
++#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)		\
+ 	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+ 	____cacheline_aligned_in_smp
+ 
+@@ -185,18 +185,18 @@
+ /*
+  * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
+  */
+-#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)      \
+-  DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned")   \
+-  __aligned(PAGE_SIZE)
+-
+-#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)       \
+-  DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned")    \
+-  __aligned(PAGE_SIZE)
++#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)		\
++	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++	__aligned(PAGE_SIZE)
++
++#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)		\
++	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++	__aligned(PAGE_SIZE)
+ 
+ /*
+  * Declaration/definition used for per-CPU variables that must be read mostly.
+  */
+-#define DECLARE_PER_CPU_READ_MOSTLY(type, name)			\
++#define DECLARE_PER_CPU_READ_MOSTLY(type, name)				\
+ 	DECLARE_PER_CPU_SECTION(type, name, "..read_mostly")
+ 
+ #define DEFINE_PER_CPU_READ_MOSTLY(type, name)				\
diff --git a/queue/kaiser-delete-kaiser_real_switch-option.patch b/queue/kaiser-delete-kaiser_real_switch-option.patch
new file mode 100644
index 0000000..bea2039
--- /dev/null
+++ b/queue/kaiser-delete-kaiser_real_switch-option.patch
@@ -0,0 +1,79 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 18:30:43 -0700
+Subject: kaiser: delete KAISER_REAL_SWITCH option
+
+From: Hugh Dickins <hughd@google.com>
+
+
+We fail to see what CONFIG_KAISER_REAL_SWITCH is for: it seems to be
+left over from early development, and now just obscures tricky parts
+of the code.  Delete it before adding PCIDs, or nokaiser boot option.
+
+(Or if there is some good reason to keep the option, then it needs
+a help text - and a "depends on KAISER", so that all those without
+KAISER are not asked the question.  But we'd much rather delete it.)
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S     |    4 ----
+ arch/x86/include/asm/kaiser.h |    4 ----
+ security/Kconfig              |    4 ----
+ 3 files changed, 12 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1317,9 +1317,7 @@ ENTRY(nmi)
+ 	/* %rax is saved above, so OK to clobber here */
+ 	movq	%cr3, %rax
+ 	pushq	%rax
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ 	andq	$(~KAISER_SHADOW_PGD_OFFSET), %rax
+-#endif
+ 	movq	%rax, %cr3
+ #endif
+ 	call	do_nmi
+@@ -1560,9 +1558,7 @@ end_repeat_nmi:
+ 	/* %rax is saved above, so OK to clobber here */
+ 	movq	%cr3, %rax
+ 	pushq	%rax
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ 	andq	$(~KAISER_SHADOW_PGD_OFFSET), %rax
+-#endif
+ 	movq	%rax, %cr3
+ #endif
+ 
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -21,17 +21,13 @@
+ 
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
+-#endif
+ movq \reg, %cr3
+ .endm
+ 
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ orq $(KAISER_SHADOW_PGD_OFFSET), \reg
+-#endif
+ movq \reg, %cr3
+ .endm
+ 
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -41,10 +41,6 @@ config KAISER
+ 
+ 	  If you are unsure how to answer this question, answer Y.
+ 
+-config KAISER_REAL_SWITCH
+-	bool "KAISER: actually switch page tables"
+-	default y
+-
+ config SECURITYFS
+ 	bool "Enable the securityfs filesystem"
+ 	help
diff --git a/queue/kaiser-disabled-on-xen-pv.patch b/queue/kaiser-disabled-on-xen-pv.patch
new file mode 100644
index 0000000..c306014
--- /dev/null
+++ b/queue/kaiser-disabled-on-xen-pv.patch
@@ -0,0 +1,42 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Tue, 2 Jan 2018 14:19:49 +0100
+Subject: kaiser: disabled on Xen PV
+
+From: Jiri Kosina <jkosina@suse.cz>
+
+
+Kaiser cannot be used on paravirtualized MMUs (namely reading and writing CR3).
+This does not work with KAISER as the CR3 switch from and to user space PGD
+would require to map the whole XEN_PV machinery into both.
+
+More importantly, enabling KAISER on Xen PV doesn't make too much sense, as PV
+guests use distinct %cr3 values for kernel and user already.
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -263,6 +263,9 @@ void __init kaiser_check_boottime_disabl
+ 	char arg[5];
+ 	int ret;
+ 
++	if (boot_cpu_has(X86_FEATURE_XENPV))
++		goto silent_disable;
++
+ 	ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+ 	if (ret > 0) {
+ 		if (!strncmp(arg, "on", 2))
+@@ -290,6 +293,8 @@ enable:
+ 
+ disable:
+ 	pr_info("Kernel/User page tables isolation: disabled\n");
++
++silent_disable:
+ 	kaiser_enabled = 0;
+ 	setup_clear_cpu_cap(X86_FEATURE_KAISER);
+ }
diff --git a/queue/kaiser-do-not-set-_page_nx-on-pgd_none.patch b/queue/kaiser-do-not-set-_page_nx-on-pgd_none.patch
new file mode 100644
index 0000000..63a8639
--- /dev/null
+++ b/queue/kaiser-do-not-set-_page_nx-on-pgd_none.patch
@@ -0,0 +1,204 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 5 Sep 2017 12:05:01 -0700
+Subject: kaiser: do not set _PAGE_NX on pgd_none
+
+From: Hugh Dickins <hughd@google.com>
+
+
+native_pgd_clear() uses native_set_pgd(), so native_set_pgd() must
+avoid setting the _PAGE_NX bit on an otherwise pgd_none() entry:
+usually that just generated a warning on exit, but sometimes
+more mysterious and damaging failures (our production machines
+could not complete booting).
+
+The original fix to this just avoided adding _PAGE_NX to
+an empty entry; but eventually more problems surfaced with kexec,
+and EFI mapping expected to be a problem too.  So now instead
+change native_set_pgd() to update shadow only if _PAGE_USER:
+
+A few places (kernel/machine_kexec_64.c, platform/efi/efi_64.c for sure)
+use set_pgd() to set up a temporary internal virtual address space, with
+physical pages remapped at what Kaiser regards as userspace addresses:
+Kaiser then assumes a shadow pgd follows, which it will try to corrupt.
+
+This appears to be responsible for the recent kexec and kdump failures;
+though it's unclear how those did not manifest as a problem before.
+Ah, the shadow pgd will only be assumed to "follow" if the requested
+pgd is on an even-numbered page: so I suppose it was going wrong 50%
+of the time all along.
+
+What we need is a flag to set_pgd(), to tell it we're dealing with
+userspace.  Er, isn't that what the pgd's _PAGE_USER bit is saying?
+Add a test for that.  But we cannot do the same for pgd_clear()
+(which may be called to clear corrupted entries - set aside the
+question of "corrupt in which pgd?" until later), so there just
+rely on pgd_clear() not being called in the problematic cases -
+with a WARN_ON_ONCE() which should fire half the time if it is.
+
+But this is getting too big for an inline function: move it into
+arch/x86/mm/kaiser.c (which then demands a boot/compressed mod);
+and de-void and de-space native_get_shadow/normal_pgd() while here.
+
+Also make an unnecessary change to KASLR's init_trampoline(): it was
+using set_pgd() to assign a pgd-value to a global variable (not in a
+pg directory page), which was rather scary given Kaiser's previous
+set_pgd() implementation: not a problem now, but too scary to leave
+as was, it could easily blow up if we have to change set_pgd() again.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/misc.h   |    1 
+ arch/x86/include/asm/pgtable_64.h |   51 +++++++++-----------------------------
+ arch/x86/mm/kaiser.c              |   42 +++++++++++++++++++++++++++++++
+ arch/x86/mm/kaslr.c               |    4 +-
+ 4 files changed, 58 insertions(+), 40 deletions(-)
+
+--- a/arch/x86/boot/compressed/misc.h
++++ b/arch/x86/boot/compressed/misc.h
+@@ -9,6 +9,7 @@
+  */
+ #undef CONFIG_PARAVIRT
+ #undef CONFIG_PARAVIRT_SPINLOCKS
++#undef CONFIG_KAISER
+ #undef CONFIG_KASAN
+ 
+ #include <linux/linkage.h>
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -107,61 +107,36 @@ static inline void native_pud_clear(pud_
+ }
+ 
+ #ifdef CONFIG_KAISER
+-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
++
++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+ {
+-	return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
++	return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
+ }
+ 
+-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+ {
+-	return (pgd_t *)(void*)((unsigned long)(void*)pgdp &  ~(unsigned long)PAGE_SIZE);
++	return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
+ }
+ #else
+-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++	return pgd;
++}
++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+ {
+ 	BUILD_BUG_ON(1);
+ 	return NULL;
+ }
+-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+ {
+ 	return pgdp;
+ }
+ #endif /* CONFIG_KAISER */
+ 
+-/*
+- * Page table pages are page-aligned.  The lower half of the top
+- * level is used for userspace and the top half for the kernel.
+- * This returns true for user pages that need to get copied into
+- * both the user and kernel copies of the page tables, and false
+- * for kernel pages that should only be in the kernel copy.
+- */
+-static inline bool is_userspace_pgd(void *__ptr)
+-{
+-	unsigned long ptr = (unsigned long)__ptr;
+-
+-	return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2));
+-}
+-
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+-#ifdef CONFIG_KAISER
+-	pteval_t extra_kern_pgd_flags = 0;
+-	/* Do we need to also populate the shadow pgd? */
+-	if (is_userspace_pgd(pgdp)) {
+-		native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
+-		/*
+-		 * Even if the entry is *mapping* userspace, ensure
+-		 * that userspace can not use it.  This way, if we
+-		 * get out to userspace running on the kernel CR3,
+-		 * userspace will crash instead of running.
+-		 */
+-		extra_kern_pgd_flags = _PAGE_NX;
+-	}
+-	pgdp->pgd = pgd.pgd;
+-	pgdp->pgd |= extra_kern_pgd_flags;
+-#else /* CONFIG_KAISER */
+-	*pgdp = pgd;
+-#endif
++	*pgdp = kaiser_set_shadow_pgd(pgdp, pgd);
+ }
+ 
+ static inline void native_pgd_clear(pgd_t *pgd)
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -302,4 +302,46 @@ void kaiser_remove_mapping(unsigned long
+ 		unmap_pud_range_nofree(pgd, addr, end);
+ 	}
+ }
++
++/*
++ * Page table pages are page-aligned.  The lower half of the top
++ * level is used for userspace and the top half for the kernel.
++ * This returns true for user pages that need to get copied into
++ * both the user and kernel copies of the page tables, and false
++ * for kernel pages that should only be in the kernel copy.
++ */
++static inline bool is_userspace_pgd(pgd_t *pgdp)
++{
++	return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2);
++}
++
++pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++	/*
++	 * Do we need to also populate the shadow pgd?  Check _PAGE_USER to
++	 * skip cases like kexec and EFI which make temporary low mappings.
++	 */
++	if (pgd.pgd & _PAGE_USER) {
++		if (is_userspace_pgd(pgdp)) {
++			native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++			/*
++			 * Even if the entry is *mapping* userspace, ensure
++			 * that userspace can not use it.  This way, if we
++			 * get out to userspace running on the kernel CR3,
++			 * userspace will crash instead of running.
++			 */
++			pgd.pgd |= _PAGE_NX;
++		}
++	} else if (!pgd.pgd) {
++		/*
++		 * pgd_clear() cannot check _PAGE_USER, and is even used to
++		 * clear corrupted pgd entries: so just rely on cases like
++		 * kexec and EFI never to be using pgd_clear().
++		 */
++		if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) &&
++		    is_userspace_pgd(pgdp))
++			native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++	}
++	return pgd;
++}
+ #endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/kaslr.c
++++ b/arch/x86/mm/kaslr.c
+@@ -189,6 +189,6 @@ void __meminit init_trampoline(void)
+ 		*pud_tramp = *pud;
+ 	}
+ 
+-	set_pgd(&trampoline_pgd_entry,
+-		__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
++	/* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */
++	trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
+ }
diff --git a/queue/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch b/queue/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
new file mode 100644
index 0000000..2fed5fc
--- /dev/null
+++ b/queue/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
@@ -0,0 +1,53 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 29 Oct 2017 11:36:19 -0700
+Subject: kaiser: drop is_atomic arg to kaiser_pagetable_walk()
+
+From: Hugh Dickins <hughd@google.com>
+
+
+I have not observed a might_sleep() warning from setup_fixmap_gdt()'s
+use of kaiser_add_mapping() in our tree (why not?), but like upstream
+we have not provided a way for that to pass is_atomic true down to
+kaiser_pagetable_walk(), and at startup it's far from a likely source
+of trouble: so just delete the walk's is_atomic arg and might_sleep().
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c |   10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -107,19 +107,13 @@ static inline unsigned long get_pa_from_
+  *
+  * Returns a pointer to a PTE on success, or NULL on failure.
+  */
+-static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
++static pte_t *kaiser_pagetable_walk(unsigned long address)
+ {
+ 	pmd_t *pmd;
+ 	pud_t *pud;
+ 	pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ 
+-	if (is_atomic) {
+-		gfp &= ~GFP_KERNEL;
+-		gfp |= __GFP_HIGH | __GFP_ATOMIC;
+-	} else
+-		might_sleep();
+-
+ 	if (pgd_none(*pgd)) {
+ 		WARN_ONCE(1, "All shadow pgds should have been populated");
+ 		return NULL;
+@@ -194,7 +188,7 @@ static int kaiser_add_user_map(const voi
+ 			ret = -EIO;
+ 			break;
+ 		}
+-		pte = kaiser_pagetable_walk(address, false);
++		pte = kaiser_pagetable_walk(address);
+ 		if (!pte) {
+ 			ret = -ENOMEM;
+ 			break;
diff --git a/queue/kaiser-enhanced-by-kernel-and-user-pcids.patch b/queue/kaiser-enhanced-by-kernel-and-user-pcids.patch
new file mode 100644
index 0000000..84c6fd8
--- /dev/null
+++ b/queue/kaiser-enhanced-by-kernel-and-user-pcids.patch
@@ -0,0 +1,403 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 30 Aug 2017 16:23:00 -0700
+Subject: kaiser: enhanced by kernel and user PCIDs
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Merged performance improvements to Kaiser, using distinct kernel
+and user Process Context Identifiers to minimize the TLB flushing.
+
+[This work actually all from Dave Hansen 2017-08-30:
+still omitting trackswitch mods, and KAISER_REAL_SWITCH deleted.]
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S                   |   10 ++++-
+ arch/x86/entry/entry_64_compat.S            |    1 
+ arch/x86/include/asm/cpufeatures.h          |    1 
+ arch/x86/include/asm/kaiser.h               |   15 ++++++-
+ arch/x86/include/asm/pgtable_types.h        |   26 +++++++++++++
+ arch/x86/include/asm/tlbflush.h             |   54 +++++++++++++++++++++++-----
+ arch/x86/include/uapi/asm/processor-flags.h |    3 +
+ arch/x86/kernel/cpu/common.c                |   34 +++++++++++++++++
+ arch/x86/kvm/x86.c                          |    3 +
+ arch/x86/mm/kaiser.c                        |    7 +++
+ arch/x86/mm/tlb.c                           |   46 ++++++++++++++++++++++-
+ 11 files changed, 182 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1317,7 +1317,10 @@ ENTRY(nmi)
+ 	/* %rax is saved above, so OK to clobber here */
+ 	movq	%cr3, %rax
+ 	pushq	%rax
+-	andq	$(~KAISER_SHADOW_PGD_OFFSET), %rax
++	/* mask off "user" bit of pgd address and 12 PCID bits: */
++	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++	/* Add back kernel PCID and "no flush" bit */
++	orq	X86_CR3_PCID_KERN_VAR, %rax
+ 	movq	%rax, %cr3
+ #endif
+ 	call	do_nmi
+@@ -1558,7 +1561,10 @@ end_repeat_nmi:
+ 	/* %rax is saved above, so OK to clobber here */
+ 	movq	%cr3, %rax
+ 	pushq	%rax
+-	andq	$(~KAISER_SHADOW_PGD_OFFSET), %rax
++	/* mask off "user" bit of pgd address and 12 PCID bits: */
++	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++	/* Add back kernel PCID and "no flush" bit */
++	orq	X86_CR3_PCID_KERN_VAR, %rax
+ 	movq	%rax, %cr3
+ #endif
+ 
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -13,6 +13,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/pgtable_types.h>
+ #include <asm/kaiser.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -189,6 +189,7 @@
+ 
+ #define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
+ #define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
++#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */
+ 
+ #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -1,5 +1,8 @@
+ #ifndef _ASM_X86_KAISER_H
+ #define _ASM_X86_KAISER_H
++
++#include <uapi/asm/processor-flags.h> /* For PCID constants */
++
+ /*
+  * This file includes the definitions for the KAISER feature.
+  * KAISER is a counter measure against x86_64 side channel attacks on
+@@ -21,13 +24,21 @@
+ 
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+-andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
++orq  X86_CR3_PCID_KERN_VAR, \reg
+ movq \reg, %cr3
+ .endm
+ 
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+-orq $(KAISER_SHADOW_PGD_OFFSET), \reg
++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
++/*
++ * This can obviously be one instruction by putting the
++ * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR.
++ * But, just leave it now for simplicity.
++ */
++orq  X86_CR3_PCID_USER_VAR, \reg
++orq  $(KAISER_SHADOW_PGD_OFFSET), \reg
+ movq \reg, %cr3
+ .endm
+ 
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -141,6 +141,32 @@
+ 			 _PAGE_SOFT_DIRTY)
+ #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+ 
++/* The ASID is the lower 12 bits of CR3 */
++#define X86_CR3_PCID_ASID_MASK  (_AC((1<<12)-1,UL))
++
++/* Mask for all the PCID-related bits in CR3: */
++#define X86_CR3_PCID_MASK       (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
++#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
++#define X86_CR3_PCID_ASID_KERN  (_AC(0x4,UL))
++#define X86_CR3_PCID_ASID_USER  (_AC(0x6,UL))
++
++#define X86_CR3_PCID_KERN_FLUSH		(X86_CR3_PCID_ASID_KERN)
++#define X86_CR3_PCID_USER_FLUSH		(X86_CR3_PCID_ASID_USER)
++#define X86_CR3_PCID_KERN_NOFLUSH	(X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
++#define X86_CR3_PCID_USER_NOFLUSH	(X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
++#else
++#define X86_CR3_PCID_ASID_KERN  (_AC(0x0,UL))
++#define X86_CR3_PCID_ASID_USER  (_AC(0x0,UL))
++/*
++ * PCIDs are unsupported on 32-bit and none of these bits can be
++ * set in CR3:
++ */
++#define X86_CR3_PCID_KERN_FLUSH		(0)
++#define X86_CR3_PCID_USER_FLUSH		(0)
++#define X86_CR3_PCID_KERN_NOFLUSH	(0)
++#define X86_CR3_PCID_USER_NOFLUSH	(0)
++#endif
++
+ /*
+  * The cache modes defined here are used to translate between pure SW usage
+  * and the HW defined cache mode bits and/or PAT entries.
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -13,7 +13,6 @@ static inline void __invpcid(unsigned lo
+ 			     unsigned long type)
+ {
+ 	struct { u64 d[2]; } desc = { { pcid, addr } };
+-
+ 	/*
+ 	 * The memory clobber is because the whole point is to invalidate
+ 	 * stale TLB entries and, especially if we're flushing global
+@@ -134,14 +133,25 @@ static inline void cr4_set_bits_and_upda
+ 
+ static inline void __native_flush_tlb(void)
+ {
++	if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) {
++		/*
++		 * If current->mm == NULL then we borrow a mm which may change during a
++		 * task switch and therefore we must not be preempted while we write CR3
++		 * back:
++		 */
++		preempt_disable();
++		native_write_cr3(native_read_cr3());
++		preempt_enable();
++		return;
++	}
+ 	/*
+-	 * If current->mm == NULL then we borrow a mm which may change during a
+-	 * task switch and therefore we must not be preempted while we write CR3
+-	 * back:
+-	 */
+-	preempt_disable();
+-	native_write_cr3(native_read_cr3());
+-	preempt_enable();
++	 * We are no longer using globals with KAISER, so a
++	 * "nonglobals" flush would work too. But, this is more
++	 * conservative.
++	 *
++	 * Note, this works with CR4.PCIDE=0 or 1.
++	 */
++	invpcid_flush_all();
+ }
+ 
+ static inline void __native_flush_tlb_global_irq_disabled(void)
+@@ -163,6 +173,8 @@ static inline void __native_flush_tlb_gl
+ 		/*
+ 		 * Using INVPCID is considerably faster than a pair of writes
+ 		 * to CR4 sandwiched inside an IRQ flag save/restore.
++		 *
++		 * Note, this works with CR4.PCIDE=0 or 1.
+ 		 */
+ 		invpcid_flush_all();
+ 		return;
+@@ -182,7 +194,31 @@ static inline void __native_flush_tlb_gl
+ 
+ static inline void __native_flush_tlb_single(unsigned long addr)
+ {
+-	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
++	/*
++	 * SIMICS #GP's if you run INVPCID with type 2/3
++	 * and X86_CR4_PCIDE clear.  Shame!
++	 *
++	 * The ASIDs used below are hard-coded.  But, we must not
++	 * call invpcid(type=1/2) before CR4.PCIDE=1.  Just call
++	 * invpcid in the case we are called early.
++	 */
++	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
++		asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
++		return;
++	}
++	/* Flush the address out of both PCIDs. */
++	/*
++	 * An optimization here might be to determine addresses
++	 * that are only kernel-mapped and only flush the kernel
++	 * ASID.  But, userspace flushes are probably much more
++	 * important performance-wise.
++	 *
++	 * Make sure to do only a single invpcid when KAISER is
++	 * disabled and we have only a single ASID.
++	 */
++	if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
++		invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
++	invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
+ }
+ 
+ static inline void __flush_tlb_all(void)
+--- a/arch/x86/include/uapi/asm/processor-flags.h
++++ b/arch/x86/include/uapi/asm/processor-flags.h
+@@ -77,7 +77,8 @@
+ #define X86_CR3_PWT		_BITUL(X86_CR3_PWT_BIT)
+ #define X86_CR3_PCD_BIT		4 /* Page Cache Disable */
+ #define X86_CR3_PCD		_BITUL(X86_CR3_PCD_BIT)
+-#define X86_CR3_PCID_MASK	_AC(0x00000fff,UL) /* PCID Mask */
++#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
++#define X86_CR3_PCID_NOFLUSH    _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
+ 
+ /*
+  * Intel CPU features in CR4
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -324,11 +324,45 @@ static __always_inline void setup_smap(s
+ 	}
+ }
+ 
++/*
++ * These can have bit 63 set, so we can not just use a plain "or"
++ * instruction to get their value or'd into CR3.  It would take
++ * another register.  So, we use a memory reference to these
++ * instead.
++ *
++ * This is also handy because systems that do not support
++ * PCIDs just end up or'ing a 0 into their CR3, which does
++ * no harm.
++ */
++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0;
++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0;
++
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ 	if (cpu_has(c, X86_FEATURE_PCID)) {
+ 		if (cpu_has(c, X86_FEATURE_PGE)) {
+ 			cr4_set_bits(X86_CR4_PCIDE);
++			/*
++			 * These variables are used by the entry/exit
++			 * code to change PCIDs.
++			 */
++#ifdef CONFIG_KAISER
++			X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH;
++			X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH;
++#endif
++			/*
++			 * INVPCID has two "groups" of types:
++			 * 1/2: Invalidate an individual address
++			 * 3/4: Invalidate all contexts
++			 *
++			 * 1/2 take a PCID, but 3/4 do not.  So, 3/4
++			 * ignore the PCID argument in the descriptor.
++			 * But, we have to be careful not to call 1/2
++			 * with an actual non-zero PCID in them before
++			 * we do the above cr4_set_bits().
++			 */
++			if (cpu_has(c, X86_FEATURE_INVPCID))
++				set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE);
+ 		} else {
+ 			/*
+ 			 * flush_tlb_all(), as currently implemented, won't
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -773,7 +773,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, u
+ 			return 1;
+ 
+ 		/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
+-		if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
++		if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) ||
++		    !is_long_mode(vcpu))
+ 			return 1;
+ 	}
+ 
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -239,6 +239,8 @@ static void __init kaiser_init_all_pgds(
+ } while (0)
+ 
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++extern unsigned long X86_CR3_PCID_KERN_VAR;
++extern unsigned long X86_CR3_PCID_USER_VAR;
+ /*
+  * If anything in here fails, we will likely die on one of the
+  * first kernel->user transitions and init will die.  But, we
+@@ -289,6 +291,11 @@ void __init kaiser_init(void)
+ 	kaiser_add_user_map_early(&debug_idt_table,
+ 				  sizeof(gate_desc) * NR_VECTORS,
+ 				  __PAGE_KERNEL);
++
++	kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
++				  __PAGE_KERNEL);
++	kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
++				  __PAGE_KERNEL);
+ }
+ 
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -34,6 +34,46 @@ struct flush_tlb_info {
+ 	unsigned long flush_end;
+ };
+ 
++static void load_new_mm_cr3(pgd_t *pgdir)
++{
++	unsigned long new_mm_cr3 = __pa(pgdir);
++
++	/*
++	 * KAISER, plus PCIDs needs some extra work here.  But,
++	 * if either of features is not present, we need no
++	 * PCIDs here and just do a normal, full TLB flush with
++	 * the write_cr3()
++	 */
++	if (!IS_ENABLED(CONFIG_KAISER) ||
++	    !cpu_feature_enabled(X86_FEATURE_PCID))
++		goto out_set_cr3;
++	/*
++	 * We reuse the same PCID for different tasks, so we must
++	 * flush all the entires for the PCID out when we change
++	 * tasks.
++	 */
++	new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
++
++	/*
++	 * The flush from load_cr3() may leave old TLB entries
++	 * for userspace in place.  We must flush that context
++	 * separately.  We can theoretically delay doing this
++	 * until we actually load up the userspace CR3, but
++	 * that's a bit tricky.  We have to have the "need to
++	 * flush userspace PCID" bit per-cpu and check it in the
++	 * exit-to-userspace paths.
++	 */
++	invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
++
++out_set_cr3:
++	/*
++	 * Caution: many callers of this function expect
++	 * that load_cr3() is serializing and orders TLB
++	 * fills with respect to the mm_cpumask writes.
++	 */
++	write_cr3(new_mm_cr3);
++}
++
+ /*
+  * We cannot call mmdrop() because we are in interrupt context,
+  * instead update mm->cpu_vm_mask.
+@@ -45,7 +85,7 @@ void leave_mm(int cpu)
+ 		BUG();
+ 	if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
+ 		cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
+-		load_cr3(swapper_pg_dir);
++		load_new_mm_cr3(swapper_pg_dir);
+ 		/*
+ 		 * This gets called in the idle path where RCU
+ 		 * functions differently.  Tracing normally
+@@ -120,7 +160,7 @@ void switch_mm_irqs_off(struct mm_struct
+ 		 * ordering guarantee we need.
+ 		 *
+ 		 */
+-		load_cr3(next->pgd);
++		load_new_mm_cr3(next->pgd);
+ 
+ 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ 
+@@ -167,7 +207,7 @@ void switch_mm_irqs_off(struct mm_struct
+ 			 * As above, load_cr3() is serializing and orders TLB
+ 			 * fills with respect to the mm_cpumask write.
+ 			 */
+-			load_cr3(next->pgd);
++			load_new_mm_cr3(next->pgd);
+ 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ 			load_mm_cr4(next);
+ 			load_mm_ldt(next);
diff --git a/queue/kaiser-enomem-if-kaiser_pagetable_walk-null.patch b/queue/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
new file mode 100644
index 0000000..c2bd8bc
--- /dev/null
+++ b/queue/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
@@ -0,0 +1,52 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 18:48:02 -0700
+Subject: kaiser: ENOMEM if kaiser_pagetable_walk() NULL
+
+From: Hugh Dickins <hughd@google.com>
+
+
+kaiser_add_user_map() took no notice when kaiser_pagetable_walk() failed.
+And avoid its might_sleep() when atomic (though atomic at present unused).
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -98,11 +98,11 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 	pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ 
+-	might_sleep();
+ 	if (is_atomic) {
+ 		gfp &= ~GFP_KERNEL;
+ 		gfp |= __GFP_HIGH | __GFP_ATOMIC;
+-	}
++	} else
++		might_sleep();
+ 
+ 	if (pgd_none(*pgd)) {
+ 		WARN_ONCE(1, "All shadow pgds should have been populated");
+@@ -159,13 +159,17 @@ int kaiser_add_user_map(const void *__st
+ 	unsigned long end_addr = PAGE_ALIGN(start_addr + size);
+ 	unsigned long target_address;
+ 
+-	for (;address < end_addr; address += PAGE_SIZE) {
++	for (; address < end_addr; address += PAGE_SIZE) {
+ 		target_address = get_pa_from_mapping(address);
+ 		if (target_address == -1) {
+ 			ret = -EIO;
+ 			break;
+ 		}
+ 		pte = kaiser_pagetable_walk(address, false);
++		if (!pte) {
++			ret = -ENOMEM;
++			break;
++		}
+ 		if (pte_none(*pte)) {
+ 			set_pte(pte, __pte(flags | target_address));
+ 		} else {
diff --git a/queue/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch b/queue/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
new file mode 100644
index 0000000..92d279d
--- /dev/null
+++ b/queue/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
@@ -0,0 +1,53 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 17:09:44 -0700
+Subject: kaiser: fix build and FIXME in alloc_ldt_struct()
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Include linux/kaiser.h instead of asm/kaiser.h to build ldt.c without
+CONFIG_KAISER.  kaiser_add_mapping() does already return an error code,
+so fix the FIXME.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ldt.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -16,9 +16,9 @@
+ #include <linux/slab.h>
+ #include <linux/vmalloc.h>
+ #include <linux/uaccess.h>
++#include <linux/kaiser.h>
+ 
+ #include <asm/ldt.h>
+-#include <asm/kaiser.h>
+ #include <asm/desc.h>
+ #include <asm/mmu_context.h>
+ #include <asm/syscalls.h>
+@@ -49,7 +49,7 @@ static struct ldt_struct *alloc_ldt_stru
+ {
+ 	struct ldt_struct *new_ldt;
+ 	int alloc_size;
+-	int ret = 0;
++	int ret;
+ 
+ 	if (size > LDT_ENTRIES)
+ 		return NULL;
+@@ -77,10 +77,8 @@ static struct ldt_struct *alloc_ldt_stru
+ 		return NULL;
+ 	}
+ 
+-	// FIXME: make kaiser_add_mapping() return an error code
+-	// when it fails
+-	kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
+-			   __PAGE_KERNEL);
++	ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
++				 __PAGE_KERNEL);
+ 	if (ret) {
+ 		__free_ldt_struct(new_ldt);
+ 		return NULL;
diff --git a/queue/kaiser-fix-compile-error-without-vsyscall.patch b/queue/kaiser-fix-compile-error-without-vsyscall.patch
new file mode 100644
index 0000000..1c5369d
--- /dev/null
+++ b/queue/kaiser-fix-compile-error-without-vsyscall.patch
@@ -0,0 +1,46 @@
+From foo@baz Tue Feb 13 16:45:20 CET 2018
+Date: Tue, 13 Feb 2018 16:45:20 +0100
+To: Greg KH <gregkh@linuxfoundation.org>
+From: Hugh Dickins <hughd@google.com>
+Subject: kaiser: fix compile error without vsyscall
+
+From: Hugh Dickins <hughd@google.com>
+
+Tobias noticed a compile error on 4.4.115, and it's the same on 4.9.80:
+arch/x86/mm/kaiser.c: In function ‘kaiser_init’:
+arch/x86/mm/kaiser.c:348:8: error: ‘vsyscall_pgprot’ undeclared
+                                   (first use in this function)
+
+It seems like his combination of kernel options doesn't work for KAISER.
+X86_VSYSCALL_EMULATION is not set on his system, while LEGACY_VSYSCALL
+is set to NONE (LEGACY_VSYSCALL_NONE=y). He managed to get things
+compiling again, by moving the 'extern unsigned long vsyscall_pgprot'
+outside of the preprocessor statement. This works because the optimizer
+removes that code (vsyscall_enabled() is always false) - and that's how
+it was done in some older backports.
+
+Reported-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/vsyscall.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/vsyscall.h
++++ b/arch/x86/include/asm/vsyscall.h
+@@ -13,7 +13,6 @@ extern void map_vsyscall(void);
+  */
+ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+ extern bool vsyscall_enabled(void);
+-extern unsigned long vsyscall_pgprot;
+ #else
+ static inline void map_vsyscall(void) {}
+ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+@@ -22,5 +21,6 @@ static inline bool emulate_vsyscall(stru
+ }
+ static inline bool vsyscall_enabled(void) { return false; }
+ #endif
++extern unsigned long vsyscall_pgprot;
+ 
+ #endif /* _ASM_X86_VSYSCALL_H */
diff --git a/queue/kaiser-fix-intel_bts-perf-crashes.patch b/queue/kaiser-fix-intel_bts-perf-crashes.patch
new file mode 100644
index 0000000..5ea9a08
--- /dev/null
+++ b/queue/kaiser-fix-intel_bts-perf-crashes.patch
@@ -0,0 +1,132 @@
+From hughd@google.com  Mon Feb  5 04:59:18 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 29 Jan 2018 18:16:55 -0800
+Subject: kaiser: fix intel_bts perf crashes
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Hugh Dickins <hughd@google.com>, Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Andy Lutomirski <luto@amacapital.net>, Alexander Shishkin <alexander.shishkin@linux.intel.com>, Linus Torvalds <torvalds@linux-foundation.org>, Vince Weaver <vince@deater.net>, stable@vger.kernel.org, Jiri Kosina <jkosina@suse.cz>
+Message-ID: <20180130021655.229155-1-hughd@google.com>
+
+From: Hugh Dickins <hughd@google.com>
+
+Vince reported perf_fuzzer quickly locks up on 4.15-rc7 with PTI;
+Robert reported Bad RIP with KPTI and Intel BTS also on 4.15-rc7:
+honggfuzz -f /tmp/somedirectorywithatleastonefile \
+          --linux_perf_bts_edge -s -- /bin/true
+(honggfuzz from https://github.com/google/honggfuzz) crashed with
+BUG: unable to handle kernel paging request at ffff9d3215100000
+(then narrowed it down to
+perf record --per-thread -e intel_bts//u -- /bin/ls).
+
+The intel_bts driver does not use the 'normal' BTS buffer which is
+exposed through kaiser_add_mapping(), but instead uses the memory
+allocated for the perf AUX buffer.
+
+This obviously comes apart when using PTI, because then the kernel
+mapping, which includes that AUX buffer memory, disappears while
+switched to user page tables.
+
+Easily fixed in old-Kaiser backports, by applying kaiser_add_mapping()
+to those pages; perhaps not so easy for upstream, where 4.15-rc8 commit
+99a9dc98ba52 ("x86,perf: Disable intel_bts when PTI") disables for now.
+
+Slightly reorganized surrounding code in bts_buffer_setup_aux(),
+so it can better match bts_buffer_free_aux(): free_aux with an #ifdef
+to avoid the loop when PTI is off, but setup_aux needs to loop anyway
+(and kaiser_add_mapping() is cheap when PTI config is off or "pti=off").
+
+Reported-by: Vince Weaver <vincent.weaver@maine.edu>
+Reported-by: Robert Święcki <robert@swiecki.net>
+Analyzed-by: Peter Zijlstra <peterz@infradead.org>
+Analyzed-by: Stephane Eranian <eranian@google.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Vince Weaver <vince@deater.net>
+Cc: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/bts.c |   44 +++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 33 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/events/intel/bts.c
++++ b/arch/x86/events/intel/bts.c
+@@ -22,6 +22,7 @@
+ #include <linux/debugfs.h>
+ #include <linux/device.h>
+ #include <linux/coredump.h>
++#include <linux/kaiser.h>
+ 
+ #include <asm-generic/sizes.h>
+ #include <asm/perf_event.h>
+@@ -77,6 +78,23 @@ static size_t buf_size(struct page *page
+ 	return 1 << (PAGE_SHIFT + page_private(page));
+ }
+ 
++static void bts_buffer_free_aux(void *data)
++{
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++	struct bts_buffer *buf = data;
++	int nbuf;
++
++	for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) {
++		struct page *page = buf->buf[nbuf].page;
++		void *kaddr = page_address(page);
++		size_t page_size = buf_size(page);
++
++		kaiser_remove_mapping((unsigned long)kaddr, page_size);
++	}
++#endif
++	kfree(data);
++}
++
+ static void *
+ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
+ {
+@@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pag
+ 	buf->real_size = size - size % BTS_RECORD_SIZE;
+ 
+ 	for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
+-		unsigned int __nr_pages;
++		void *kaddr = pages[pg];
++		size_t page_size;
++
++		page = virt_to_page(kaddr);
++		page_size = buf_size(page);
++
++		if (kaiser_add_mapping((unsigned long)kaddr,
++					page_size, __PAGE_KERNEL) < 0) {
++			buf->nr_bufs = nbuf;
++			bts_buffer_free_aux(buf);
++			return NULL;
++		}
+ 
+-		page = virt_to_page(pages[pg]);
+-		__nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
+ 		buf->buf[nbuf].page = page;
+ 		buf->buf[nbuf].offset = offset;
+ 		buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+-		buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
++		buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement;
+ 		pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
+ 		buf->buf[nbuf].size -= pad;
+ 
+-		pg += __nr_pages;
+-		offset += __nr_pages << PAGE_SHIFT;
++		pg += page_size >> PAGE_SHIFT;
++		offset += page_size;
+ 	}
+ 
+ 	return buf;
+ }
+ 
+-static void bts_buffer_free_aux(void *data)
+-{
+-	kfree(data);
+-}
+-
+ static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
+ {
+ 	return buf->buf[idx].offset + buf->buf[idx].displacement;
diff --git a/queue/kaiser-fix-perf-crashes.patch b/queue/kaiser-fix-perf-crashes.patch
new file mode 100644
index 0000000..6a9286c
--- /dev/null
+++ b/queue/kaiser-fix-perf-crashes.patch
@@ -0,0 +1,150 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 23 Aug 2017 14:21:14 -0700
+Subject: kaiser: fix perf crashes
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Avoid perf crashes: place debug_store in the user-mapped per-cpu area
+instead of allocating, and use page allocator plus kaiser_add_mapping()
+to keep the BTS and PEBS buffers user-mapped (that is, present in the
+user mapping, though visible only to kernel and hardware).  The PEBS
+fixup buffer does not need this treatment.
+
+The need for a user-mapped struct debug_store showed up before doing
+any conscious perf testing: in a couple of kernel paging oopses on
+Westmere, implicating the debug_store offset of the per-cpu area.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/ds.c |   57 +++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 45 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -2,11 +2,15 @@
+ #include <linux/types.h>
+ #include <linux/slab.h>
+ 
++#include <asm/kaiser.h>
+ #include <asm/perf_event.h>
+ #include <asm/insn.h>
+ 
+ #include "../perf_event.h"
+ 
++static
++DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);
++
+ /* The size of a BTS record in bytes: */
+ #define BTS_RECORD_SIZE		24
+ 
+@@ -268,6 +272,39 @@ void fini_debug_store_on_cpu(int cpu)
+ 
+ static DEFINE_PER_CPU(void *, insn_buffer);
+ 
++static void *dsalloc(size_t size, gfp_t flags, int node)
++{
++#ifdef CONFIG_KAISER
++	unsigned int order = get_order(size);
++	struct page *page;
++	unsigned long addr;
++
++	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
++	if (!page)
++		return NULL;
++	addr = (unsigned long)page_address(page);
++	if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) {
++		__free_pages(page, order);
++		addr = 0;
++	}
++	return (void *)addr;
++#else
++	return kmalloc_node(size, flags | __GFP_ZERO, node);
++#endif
++}
++
++static void dsfree(const void *buffer, size_t size)
++{
++#ifdef CONFIG_KAISER
++	if (!buffer)
++		return;
++	kaiser_remove_mapping((unsigned long)buffer, size);
++	free_pages((unsigned long)buffer, get_order(size));
++#else
++	kfree(buffer);
++#endif
++}
++
+ static int alloc_pebs_buffer(int cpu)
+ {
+ 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+@@ -278,7 +315,7 @@ static int alloc_pebs_buffer(int cpu)
+ 	if (!x86_pmu.pebs)
+ 		return 0;
+ 
+-	buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
++	buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+ 	if (unlikely(!buffer))
+ 		return -ENOMEM;
+ 
+@@ -289,7 +326,7 @@ static int alloc_pebs_buffer(int cpu)
+ 	if (x86_pmu.intel_cap.pebs_format < 2) {
+ 		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ 		if (!ibuffer) {
+-			kfree(buffer);
++			dsfree(buffer, x86_pmu.pebs_buffer_size);
+ 			return -ENOMEM;
+ 		}
+ 		per_cpu(insn_buffer, cpu) = ibuffer;
+@@ -315,7 +352,8 @@ static void release_pebs_buffer(int cpu)
+ 	kfree(per_cpu(insn_buffer, cpu));
+ 	per_cpu(insn_buffer, cpu) = NULL;
+ 
+-	kfree((void *)(unsigned long)ds->pebs_buffer_base);
++	dsfree((void *)(unsigned long)ds->pebs_buffer_base,
++			x86_pmu.pebs_buffer_size);
+ 	ds->pebs_buffer_base = 0;
+ }
+ 
+@@ -329,7 +367,7 @@ static int alloc_bts_buffer(int cpu)
+ 	if (!x86_pmu.bts)
+ 		return 0;
+ 
+-	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
++	buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+ 	if (unlikely(!buffer)) {
+ 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
+ 		return -ENOMEM;
+@@ -355,19 +393,15 @@ static void release_bts_buffer(int cpu)
+ 	if (!ds || !x86_pmu.bts)
+ 		return;
+ 
+-	kfree((void *)(unsigned long)ds->bts_buffer_base);
++	dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);
+ 	ds->bts_buffer_base = 0;
+ }
+ 
+ static int alloc_ds_buffer(int cpu)
+ {
+-	int node = cpu_to_node(cpu);
+-	struct debug_store *ds;
+-
+-	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
+-	if (unlikely(!ds))
+-		return -ENOMEM;
++	struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);
+ 
++	memset(ds, 0, sizeof(*ds));
+ 	per_cpu(cpu_hw_events, cpu).ds = ds;
+ 
+ 	return 0;
+@@ -381,7 +415,6 @@ static void release_ds_buffer(int cpu)
+ 		return;
+ 
+ 	per_cpu(cpu_hw_events, cpu).ds = NULL;
+-	kfree(ds);
+ }
+ 
+ void release_ds_buffers(void)
diff --git a/queue/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch b/queue/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
new file mode 100644
index 0000000..babde9a
--- /dev/null
+++ b/queue/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
@@ -0,0 +1,72 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 21 Sep 2017 20:39:56 -0700
+Subject: kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER
+
+From: Hugh Dickins <hughd@google.com>
+
+
+pjt has observed that nmi's second (nmi_from_kernel) call to do_nmi()
+adjusted the %rdi regs arg, rightly when CONFIG_KAISER, but wrongly
+when not CONFIG_KAISER.
+
+Although the minimal change is to add an #ifdef CONFIG_KAISER around
+the addq line, that looks cluttered, and I prefer how the first call
+to do_nmi() handled it: prepare args in %rdi and %rsi before getting
+into the CONFIG_KAISER block, since it does not touch them at all.
+
+And while we're here, place the "#ifdef CONFIG_KAISER" that follows
+each, to enclose the "Unconditionally restore CR3" comment: matching
+how the "Unconditionally use kernel CR3" comment above is enclosed.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1323,12 +1323,13 @@ ENTRY(nmi)
+ 	movq	%rax, %cr3
+ #endif
+ 	call	do_nmi
++
++#ifdef CONFIG_KAISER
+ 	/*
+ 	 * Unconditionally restore CR3.  I know we return to
+ 	 * kernel code that needs user CR3, but do we ever return
+ 	 * to "user mode" where we need the kernel CR3?
+ 	 */
+-#ifdef CONFIG_KAISER
+ 	popq	%rax
+ 	mov	%rax, %cr3
+ #endif
+@@ -1552,6 +1553,8 @@ end_repeat_nmi:
+ 	SWAPGS
+ 	xorl	%ebx, %ebx
+ 1:
++	movq	%rsp, %rdi
++	movq	$-1, %rsi
+ #ifdef CONFIG_KAISER
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+@@ -1564,16 +1567,14 @@ end_repeat_nmi:
+ #endif
+ 
+ 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+-	movq	%rsp, %rdi
+-	addq	$8, %rdi /* point %rdi at ptregs, fixed up for CR3 */
+-	movq	$-1, %rsi
+ 	call	do_nmi
++
++#ifdef CONFIG_KAISER
+ 	/*
+ 	 * Unconditionally restore CR3.  We might be returning to
+ 	 * kernel code that needs user CR3, like just just before
+ 	 * a sysret.
+ 	 */
+-#ifdef CONFIG_KAISER
+ 	popq	%rax
+ 	mov	%rax, %cr3
+ #endif
diff --git a/queue/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch b/queue/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
new file mode 100644
index 0000000..6da60ee
--- /dev/null
+++ b/queue/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
@@ -0,0 +1,33 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 4 Dec 2017 20:13:35 -0800
+Subject: kaiser: fix unlikely error in alloc_ldt_struct()
+
+From: Hugh Dickins <hughd@google.com>
+
+
+An error from kaiser_add_mapping() here is not at all likely, but
+Eric Biggers rightly points out that __free_ldt_struct() relies on
+new_ldt->size being initialized: move that up.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ldt.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -79,11 +79,11 @@ static struct ldt_struct *alloc_ldt_stru
+ 
+ 	ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
+ 				 __PAGE_KERNEL);
++	new_ldt->size = size;
+ 	if (ret) {
+ 		__free_ldt_struct(new_ldt);
+ 		return NULL;
+ 	}
+-	new_ldt->size = size;
+ 	return new_ldt;
+ }
+ 
diff --git a/queue/kaiser-kaiser-depends-on-smp.patch b/queue/kaiser-kaiser-depends-on-smp.patch
new file mode 100644
index 0000000..d9a4854
--- /dev/null
+++ b/queue/kaiser-kaiser-depends-on-smp.patch
@@ -0,0 +1,54 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 13 Sep 2017 14:03:10 -0700
+Subject: kaiser: KAISER depends on SMP
+
+From: Hugh Dickins <hughd@google.com>
+
+
+It is absurd that KAISER should depend on SMP, but apparently nobody
+has tried a UP build before: which breaks on implicit declaration of
+function 'per_cpu_offset' in arch/x86/mm/kaiser.c.
+
+Now, you would expect that to be trivially fixed up; but looking at
+the System.map when that block is #ifdef'ed out of kaiser_init(),
+I see that in a UP build __per_cpu_user_mapped_end is precisely at
+__per_cpu_user_mapped_start, and the items carefully gathered into
+that section for user-mapping on SMP, dispersed elsewhere on UP.
+
+So, some other kind of section assignment will be needed on UP,
+but implementing that is not a priority: just make KAISER depend
+on SMP for now.
+
+Also inserted a blank line before the option, tidied up the
+brief Kconfig help message, and added an "If unsure, Y".
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/Kconfig |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -30,14 +30,16 @@ config SECURITY
+ 	  model will be used.
+ 
+ 	  If you are unsure how to answer this question, answer N.
++
+ config KAISER
+ 	bool "Remove the kernel mapping in user mode"
+ 	default y
+-	depends on X86_64
+-	depends on !PARAVIRT
++	depends on X86_64 && SMP && !PARAVIRT
+ 	help
+-	  This enforces a strict kernel and user space isolation in order to close
+-	  hardware side channels on kernel address information.
++	  This enforces a strict kernel and user space isolation, in order
++	  to close hardware side channels on kernel address information.
++
++	  If you are unsure how to answer this question, answer Y.
+ 
+ config KAISER_REAL_SWITCH
+ 	bool "KAISER: actually switch page tables"
diff --git a/queue/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch b/queue/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
new file mode 100644
index 0000000..75c1365
--- /dev/null
+++ b/queue/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
@@ -0,0 +1,86 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 4 Nov 2017 18:43:06 -0700
+Subject: kaiser: kaiser_flush_tlb_on_return_to_user() check PCID
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Let kaiser_flush_tlb_on_return_to_user() do the X86_FEATURE_PCID
+check, instead of each caller doing it inline first: nobody needs
+to optimize for the noPCID case, it's clearer this way, and better
+suits later changes.  Replace those no-op X86_CR3_PCID_KERN_FLUSH lines
+by a BUILD_BUG_ON() in load_new_mm_cr3(), in case something changes.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/tlbflush.h |    4 ++--
+ arch/x86/mm/kaiser.c            |    6 +++---
+ arch/x86/mm/tlb.c               |    8 ++++----
+ 3 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -158,7 +158,7 @@ static inline void __native_flush_tlb(vo
+ 	 * back:
+ 	 */
+ 	preempt_disable();
+-	if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
++	if (kaiser_enabled)
+ 		kaiser_flush_tlb_on_return_to_user();
+ 	native_write_cr3(native_read_cr3());
+ 	preempt_enable();
+@@ -217,7 +217,7 @@ static inline void __native_flush_tlb_si
+ 	 */
+ 
+ 	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+-		if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
++		if (kaiser_enabled)
+ 			kaiser_flush_tlb_on_return_to_user();
+ 		asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ 		return;
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -435,12 +435,12 @@ void kaiser_setup_pcid(void)
+ 
+ /*
+  * Make a note that this cpu will need to flush USER tlb on return to user.
+- * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
+- * if cpu does not, then the NOFLUSH bit will never have been set.
++ * If cpu does not have PCID, then the NOFLUSH bit will never have been set.
+  */
+ void kaiser_flush_tlb_on_return_to_user(void)
+ {
+-	this_cpu_write(x86_cr3_pcid_user,
++	if (this_cpu_has(X86_FEATURE_PCID))
++		this_cpu_write(x86_cr3_pcid_user,
+ 			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -39,7 +39,7 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ 	unsigned long new_mm_cr3 = __pa(pgdir);
+ 
+-	if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
++	if (kaiser_enabled) {
+ 		/*
+ 		 * We reuse the same PCID for different tasks, so we must
+ 		 * flush all the entries for the PCID out when we change tasks.
+@@ -50,10 +50,10 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ 		 * do it here, but can only be used if X86_FEATURE_INVPCID is
+ 		 * available - and many machines support pcid without invpcid.
+ 		 *
+-		 * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
+-		 * but keep that line in there in case something changes.
++		 * If X86_CR3_PCID_KERN_FLUSH actually added something, then it
++		 * would be needed in the write_cr3() below - if PCIDs enabled.
+ 		 */
+-		new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
++		BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH);
+ 		kaiser_flush_tlb_on_return_to_user();
+ 	}
+ 
diff --git a/queue/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch b/queue/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
new file mode 100644
index 0000000..66fe640
--- /dev/null
+++ b/queue/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
@@ -0,0 +1,50 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 2 Oct 2017 10:57:24 -0700
+Subject: kaiser: kaiser_remove_mapping() move along the pgd
+
+From: Hugh Dickins <hughd@google.com>
+
+
+When removing the bogus comment from kaiser_remove_mapping(),
+I really ought to have checked the extent of its bogosity: as
+Neel points out, there is nothing to stop unmap_pud_range_nofree()
+from continuing beyond the end of a pud (and starting in the wrong
+position on the next).
+
+Fix kaiser_remove_mapping() to constrain the extent and advance pgd
+pointer correctly: use pgd_addr_end() macro as used throughout base
+mm (but don't assume page-rounded start and size in this case).
+
+But this bug was very unlikely to trigger in this backport: since
+any buddy allocation is contained within a single pud extent, and
+we are not using vmapped stacks (and are only mapping one page of
+stack anyway): the only way to hit this bug here would be when
+freeing a large modified ldt.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -319,11 +319,13 @@ void kaiser_remove_mapping(unsigned long
+ 	extern void unmap_pud_range_nofree(pgd_t *pgd,
+ 				unsigned long start, unsigned long end);
+ 	unsigned long end = start + size;
+-	unsigned long addr;
++	unsigned long addr, next;
++	pgd_t *pgd;
+ 
+-	for (addr = start; addr < end; addr += PGDIR_SIZE) {
+-		pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
+-		unmap_pud_range_nofree(pgd, addr, end);
++	pgd = native_get_shadow_pgd(pgd_offset_k(start));
++	for (addr = start; addr < end; pgd++, addr = next) {
++		next = pgd_addr_end(addr, end);
++		unmap_pud_range_nofree(pgd, addr, next);
+ 	}
+ }
+ 
diff --git a/queue/kaiser-kernel-address-isolation.patch b/queue/kaiser-kernel-address-isolation.patch
new file mode 100644
index 0000000..079f4d6
--- /dev/null
+++ b/queue/kaiser-kernel-address-isolation.patch
@@ -0,0 +1,979 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Richard Fellner <richard.fellner@student.tugraz.at>
+Date: Thu, 4 May 2017 14:26:50 +0200
+Subject: KAISER: Kernel Address Isolation
+
+From: Richard Fellner <richard.fellner@student.tugraz.at>
+
+
+This patch introduces our implementation of KAISER (Kernel Address Isolation to
+have Side-channels Efficiently Removed), a kernel isolation technique to close
+hardware side channels on kernel address information.
+
+More information about the patch can be found on:
+
+        https://github.com/IAIK/KAISER
+
+From: Richard Fellner <richard.fellner@student.tugraz.at>
+From: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+Subject: [RFC, PATCH] x86_64: KAISER - do not map kernel in user mode
+Date: Thu, 4 May 2017 14:26:50 +0200
+Link: http://marc.info/?l=linux-kernel&m=149390087310405&w=2
+Kaiser-4.10-SHA1: c4b1831d44c6144d3762ccc72f0c4e71a0c713e5
+
+To: <linux-kernel@vger.kernel.org>
+To: <kernel-hardening@lists.openwall.com>
+Cc: <clementine.maurice@iaik.tugraz.at>
+Cc: <moritz.lipp@iaik.tugraz.at>
+Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+Cc: Richard Fellner <richard.fellner@student.tugraz.at>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: <kirill.shutemov@linux.intel.com>
+Cc: <anders.fogh@gdata-adan.de>
+
+After several recent works [1,2,3] KASLR on x86_64 was basically
+considered dead by many researchers. We have been working on an
+efficient but effective fix for this problem and found that not mapping
+the kernel space when running in user mode is the solution to this
+problem [4] (the corresponding paper [5] will be presented at ESSoS17).
+
+With this RFC patch we allow anybody to configure their kernel with the
+flag CONFIG_KAISER to add our defense mechanism.
+
+If there are any questions we would love to answer them.
+We also appreciate any comments!
+
+Cheers,
+Daniel (+ the KAISER team from Graz University of Technology)
+
+[1] http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf
+[2] https://www.blackhat.com/docs/us-16/materials/us-16-Fogh-Using-Undocumented-CPU-Behaviour-To-See-Into-Kernel-Mode-And-Break-KASLR-In-The-Process.pdf
+[3] https://www.blackhat.com/docs/us-16/materials/us-16-Jang-Breaking-Kernel-Address-Space-Layout-Randomization-KASLR-With-Intel-TSX.pdf
+[4] https://github.com/IAIK/KAISER
+[5] https://gruss.cc/files/kaiser.pdf
+
+[patch based also on
+https://raw.githubusercontent.com/IAIK/KAISER/master/KAISER/0001-KAISER-Kernel-Address-Isolation.patch]
+
+Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
+Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S            |   17 +++
+ arch/x86/entry/entry_64_compat.S     |    7 +
+ arch/x86/include/asm/hw_irq.h        |    2 
+ arch/x86/include/asm/kaiser.h        |  113 ++++++++++++++++++++++++
+ arch/x86/include/asm/pgtable.h       |    4 
+ arch/x86/include/asm/pgtable_64.h    |   21 ++++
+ arch/x86/include/asm/pgtable_types.h |   12 ++
+ arch/x86/include/asm/processor.h     |    7 +
+ arch/x86/kernel/cpu/common.c         |    4 
+ arch/x86/kernel/espfix_64.c          |    6 +
+ arch/x86/kernel/head_64.S            |   16 ++-
+ arch/x86/kernel/irqinit.c            |    2 
+ arch/x86/kernel/process.c            |    2 
+ arch/x86/mm/Makefile                 |    2 
+ arch/x86/mm/kaiser.c                 |  160 +++++++++++++++++++++++++++++++++++
+ arch/x86/mm/pageattr.c               |    2 
+ arch/x86/mm/pgtable.c                |   26 +++++
+ include/asm-generic/vmlinux.lds.h    |   11 ++
+ include/linux/percpu-defs.h          |   30 ++++++
+ init/main.c                          |    6 +
+ kernel/fork.c                        |    8 +
+ security/Kconfig                     |    7 +
+ 22 files changed, 449 insertions(+), 16 deletions(-)
+ create mode 100644 arch/x86/include/asm/kaiser.h
+ create mode 100644 arch/x86/mm/kaiser.c
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -36,6 +36,7 @@
+ #include <asm/smap.h>
+ #include <asm/pgtable_types.h>
+ #include <asm/export.h>
++#include <asm/kaiser.h>
+ #include <linux/err.h>
+ 
+ /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
+@@ -146,6 +147,7 @@ ENTRY(entry_SYSCALL_64)
+ 	 * it is too small to ever cause noticeable irq latency.
+ 	 */
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	/*
+ 	 * A hypervisor implementation might want to use a label
+ 	 * after the swapgs, so that it can do the swapgs
+@@ -228,6 +230,7 @@ entry_SYSCALL_64_fastpath:
+ 	movq	RIP(%rsp), %rcx
+ 	movq	EFLAGS(%rsp), %r11
+ 	RESTORE_C_REGS_EXCEPT_RCX_R11
++	SWITCH_USER_CR3
+ 	movq	RSP(%rsp), %rsp
+ 	USERGS_SYSRET64
+ 
+@@ -323,10 +326,12 @@ return_from_SYSCALL_64:
+ syscall_return_via_sysret:
+ 	/* rcx and r11 are already restored (see code above) */
+ 	RESTORE_C_REGS_EXCEPT_RCX_R11
++	SWITCH_USER_CR3
+ 	movq	RSP(%rsp), %rsp
+ 	USERGS_SYSRET64
+ 
+ opportunistic_sysret_failed:
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_c_regs_and_iret
+ END(entry_SYSCALL_64)
+@@ -424,6 +429,7 @@ ENTRY(ret_from_fork)
+ 	movq	%rsp, %rdi
+ 	call	syscall_return_slowpath	/* returns with IRQs disabled */
+ 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_regs_and_iret
+ 
+@@ -478,6 +484,7 @@ END(irq_entries_start)
+ 	 * tracking that we're in kernel mode.
+ 	 */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 
+ 	/*
+ 	 * We need to tell lockdep that IRQs are off.  We can't do this until
+@@ -535,6 +542,7 @@ GLOBAL(retint_user)
+ 	mov	%rsp,%rdi
+ 	call	prepare_exit_to_usermode
+ 	TRACE_IRQS_IRETQ
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_regs_and_iret
+ 
+@@ -612,6 +620,7 @@ native_irq_return_ldt:
+ 
+ 	pushq	%rdi				/* Stash user RDI */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 	movq	PER_CPU_VAR(espfix_waddr), %rdi
+ 	movq	%rax, (0*8)(%rdi)		/* user RAX */
+ 	movq	(1*8)(%rsp), %rax		/* user RIP */
+@@ -638,6 +647,7 @@ native_irq_return_ldt:
+ 	 * still points to an RO alias of the ESPFIX stack.
+ 	 */
+ 	orq	PER_CPU_VAR(espfix_stack), %rax
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	movq	%rax, %rsp
+ 
+@@ -1034,6 +1044,7 @@ ENTRY(paranoid_entry)
+ 	testl	%edx, %edx
+ 	js	1f				/* negative -> in kernel */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 	xorl	%ebx, %ebx
+ 1:	ret
+ END(paranoid_entry)
+@@ -1056,6 +1067,7 @@ ENTRY(paranoid_exit)
+ 	testl	%ebx, %ebx			/* swapgs needed? */
+ 	jnz	paranoid_exit_no_swapgs
+ 	TRACE_IRQS_IRETQ
++	SWITCH_USER_CR3_NO_STACK
+ 	SWAPGS_UNSAFE_STACK
+ 	jmp	paranoid_exit_restore
+ paranoid_exit_no_swapgs:
+@@ -1084,6 +1096,7 @@ ENTRY(error_entry)
+ 	 * from user mode due to an IRET fault.
+ 	 */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 
+ .Lerror_entry_from_usermode_after_swapgs:
+ 	/*
+@@ -1135,6 +1148,7 @@ ENTRY(error_entry)
+ 	 * Switch to kernel gsbase:
+ 	 */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 
+ 	/*
+ 	 * Pretend that the exception came from user mode: set up pt_regs
+@@ -1235,6 +1249,7 @@ ENTRY(nmi)
+ 	 */
+ 
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	cld
+ 	movq	%rsp, %rdx
+ 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+@@ -1275,6 +1290,7 @@ ENTRY(nmi)
+ 	 * work, because we don't want to enable interrupts.  Fortunately,
+ 	 * do_nmi doesn't modify pt_regs.
+ 	 */
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_c_regs_and_iret
+ 
+@@ -1486,6 +1502,7 @@ end_repeat_nmi:
+ 	testl	%ebx, %ebx			/* swapgs needed? */
+ 	jnz	nmi_restore
+ nmi_swapgs:
++	SWITCH_USER_CR3_NO_STACK
+ 	SWAPGS_UNSAFE_STACK
+ nmi_restore:
+ 	RESTORE_EXTRA_REGS
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -13,6 +13,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/kaiser.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+ 
+@@ -48,6 +49,7 @@
+ ENTRY(entry_SYSENTER_compat)
+ 	/* Interrupts are off on entry. */
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ 
+ 	/*
+@@ -184,6 +186,7 @@ ENDPROC(entry_SYSENTER_compat)
+ ENTRY(entry_SYSCALL_compat)
+ 	/* Interrupts are off on entry. */
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 
+ 	/* Stash user ESP and switch to the kernel stack. */
+ 	movl	%esp, %r8d
+@@ -259,6 +262,7 @@ sysret32_from_system_call:
+ 	xorq	%r8, %r8
+ 	xorq	%r9, %r9
+ 	xorq	%r10, %r10
++	SWITCH_USER_CR3
+ 	movq	RSP-ORIG_RAX(%rsp), %rsp
+ 	swapgs
+ 	sysretl
+@@ -297,7 +301,7 @@ ENTRY(entry_INT80_compat)
+ 	PARAVIRT_ADJUST_EXCEPTION_FRAME
+ 	ASM_CLAC			/* Do this early to minimize exposure */
+ 	SWAPGS
+-
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	/*
+ 	 * User tracing code (ptrace or signal handlers) might assume that
+ 	 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
+@@ -338,6 +342,7 @@ ENTRY(entry_INT80_compat)
+ 
+ 	/* Go back to user mode. */
+ 	TRACE_IRQS_ON
++	SWITCH_USER_CR3_NO_STACK
+ 	SWAPGS
+ 	jmp	restore_regs_and_iret
+ END(entry_INT80_compat)
+--- a/arch/x86/include/asm/hw_irq.h
++++ b/arch/x86/include/asm/hw_irq.h
+@@ -178,7 +178,7 @@ extern char irq_entries_start[];
+ #define VECTOR_RETRIGGERED	((void *)~0UL)
+ 
+ typedef struct irq_desc* vector_irq_t[NR_VECTORS];
+-DECLARE_PER_CPU(vector_irq_t, vector_irq);
++DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);
+ 
+ #endif /* !ASSEMBLY_ */
+ 
+--- /dev/null
++++ b/arch/x86/include/asm/kaiser.h
+@@ -0,0 +1,113 @@
++#ifndef _ASM_X86_KAISER_H
++#define _ASM_X86_KAISER_H
++
++/* This file includes the definitions for the KAISER feature.
++ * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory.
++ * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped,
++ * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled,
++ * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled.
++ * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory.
++ *
++ * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions
++ * of the user space, or the stacks.
++ */
++#ifdef __ASSEMBLY__
++#ifdef CONFIG_KAISER
++
++.macro _SWITCH_TO_KERNEL_CR3 reg
++movq %cr3, \reg
++andq $(~0x1000), \reg
++movq \reg, %cr3
++.endm
++
++.macro _SWITCH_TO_USER_CR3 reg
++movq %cr3, \reg
++orq $(0x1000), \reg
++movq \reg, %cr3
++.endm
++
++.macro SWITCH_KERNEL_CR3
++pushq %rax
++_SWITCH_TO_KERNEL_CR3 %rax
++popq %rax
++.endm
++
++.macro SWITCH_USER_CR3
++pushq %rax
++_SWITCH_TO_USER_CR3 %rax
++popq %rax
++.endm
++
++.macro SWITCH_KERNEL_CR3_NO_STACK
++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++_SWITCH_TO_KERNEL_CR3 %rax
++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++.endm
++
++
++.macro SWITCH_USER_CR3_NO_STACK
++
++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++_SWITCH_TO_USER_CR3 %rax
++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++
++.endm
++
++#else /* CONFIG_KAISER */
++
++.macro SWITCH_KERNEL_CR3 reg
++.endm
++.macro SWITCH_USER_CR3 reg
++.endm
++.macro SWITCH_USER_CR3_NO_STACK
++.endm
++.macro SWITCH_KERNEL_CR3_NO_STACK
++.endm
++
++#endif /* CONFIG_KAISER */
++#else /* __ASSEMBLY__ */
++
++
++#ifdef CONFIG_KAISER
++// Upon kernel/user mode switch, it may happen that
++// the address space has to be switched before the registers have been stored.
++// To change the address space, another register is needed.
++// A register therefore has to be stored/restored.
++//
++DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++#endif /* CONFIG_KAISER */
++
++/**
++ *  shadowmem_add_mapping - map a virtual memory part to the shadow mapping
++ *  @addr: the start address of the range
++ *  @size: the size of the range
++ *  @flags: The mapping flags of the pages
++ *
++ *  the mapping is done on a global scope, so no bigger synchronization has to be done.
++ *  the pages have to be manually unmapped again when they are not needed any longer.
++ */
++extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
++
++
++/**
++ *  shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping
++ *  @addr: the start address of the range
++ *  @size: the size of the range
++ */
++extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
++
++/**
++ *  shadowmem_initialize_mapping - Initalize the shadow mapping
++ *
++ *  most parts of the shadow mapping can be mapped upon boot time.
++ *  only the thread stacks have to be mapped on runtime.
++ *  the mapped regions are not unmapped at all.
++ */
++extern void kaiser_init(void);
++
++#endif
++
++
++
++#endif /* _ASM_X86_KAISER_H */
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -904,6 +904,10 @@ static inline void pmdp_set_wrprotect(st
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+        memcpy(dst, src, count * sizeof(pgd_t));
++#ifdef CONFIG_KAISER
++	// clone the shadow pgd part as well
++	memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t));
++#endif
+ }
+ 
+ #define PTE_SHIFT ilog2(PTRS_PER_PTE)
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -106,9 +106,30 @@ static inline void native_pud_clear(pud_
+ 	native_set_pud(pud, native_make_pud(0));
+ }
+ 
++#ifdef CONFIG_KAISER
++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) {
++	return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
++}
++
++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) {
++	return (pgd_t *)(void*)((unsigned long)(void*)pgdp &  ~(unsigned long)PAGE_SIZE);
++}
++#endif /* CONFIG_KAISER */
++
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
++#ifdef CONFIG_KAISER
++	// We know that a pgd is page aligned.
++	// Therefore the lower indices have to be mapped to user space.
++	// These pages are mapped to the shadow mapping.
++	if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) {
++		native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++	}
++
++	pgdp->pgd = pgd.pgd & ~_PAGE_USER;
++#else /* CONFIG_KAISER */
+ 	*pgdp = pgd;
++#endif
+ }
+ 
+ static inline void native_pgd_clear(pgd_t *pgd)
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -45,7 +45,11 @@
+ #define _PAGE_ACCESSED	(_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+ #define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+ #define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+-#define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#ifdef CONFIG_KAISER
++#define _PAGE_GLOBAL	(_AT(pteval_t, 0))
++#else
++#define _PAGE_GLOBAL  (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#endif
+ #define _PAGE_SOFTW1	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_SOFTW2	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
+ #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+@@ -119,7 +123,11 @@
+ #define _PAGE_DEVMAP	(_AT(pteval_t, 0))
+ #endif
+ 
+-#define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++#ifdef CONFIG_KAISER
++#define _PAGE_PROTNONE	(_AT(pteval_t, 0))
++#else
++#define _PAGE_PROTNONE  (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++#endif
+ 
+ #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
+ 			 _PAGE_ACCESSED | _PAGE_DIRTY)
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -308,7 +308,7 @@ struct tss_struct {
+ 
+ } ____cacheline_aligned;
+ 
+-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
++DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
+ 
+ #ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+@@ -335,6 +335,11 @@ union irq_stack_union {
+ 		char gs_base[40];
+ 		unsigned long stack_canary;
+ 	};
++
++	struct {
++		char irq_stack_pointer[64];
++		char unused[IRQ_STACK_SIZE - 64];
++	};
+ };
+ 
+ DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -93,7 +93,7 @@ static const struct cpu_dev default_cpu
+ 
+ static const struct cpu_dev *this_cpu = &default_cpu;
+ 
+-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
+ #ifdef CONFIG_X86_64
+ 	/*
+ 	 * We need valid kernel segments for data and code in long mode too
+@@ -1365,7 +1365,7 @@ static const unsigned int exception_stac
+ 	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
+ };
+ 
+-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks
+ 	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ 
+ /* May not be marked __init: used by software suspend */
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -41,6 +41,7 @@
+ #include <asm/pgalloc.h>
+ #include <asm/setup.h>
+ #include <asm/espfix.h>
++#include <asm/kaiser.h>
+ 
+ /*
+  * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
+@@ -126,6 +127,11 @@ void __init init_espfix_bsp(void)
+ 	/* Install the espfix pud into the kernel page directory */
+ 	pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+ 	pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
++#ifdef CONFIG_KAISER
++	// add the esp stack pud to the shadow mapping here.
++	// This can be done directly, because the fixup stack has its own pud
++	set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page)));
++#endif
+ 
+ 	/* Randomize the locations */
+ 	init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -405,6 +405,14 @@ GLOBAL(early_recursion_flag)
+ 	.balign	PAGE_SIZE; \
+ GLOBAL(name)
+ 
++#ifdef CONFIG_KAISER
++#define NEXT_PGD_PAGE(name) \
++	.balign 2 * PAGE_SIZE; \
++GLOBAL(name)
++#else
++#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
++#endif
++
+ /* Automate the creation of 1 to 1 mapping pmd entries */
+ #define PMDS(START, PERM, COUNT)			\
+ 	i = 0 ;						\
+@@ -414,7 +422,7 @@ GLOBAL(name)
+ 	.endr
+ 
+ 	__INITDATA
+-NEXT_PAGE(early_level4_pgt)
++NEXT_PGD_PAGE(early_level4_pgt)
+ 	.fill	511,8,0
+ 	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ 
+@@ -424,10 +432,10 @@ NEXT_PAGE(early_dynamic_pgts)
+ 	.data
+ 
+ #ifndef CONFIG_XEN
+-NEXT_PAGE(init_level4_pgt)
+-	.fill	512,8,0
++NEXT_PGD_PAGE(init_level4_pgt)
++	.fill	2*512,8,0
+ #else
+-NEXT_PAGE(init_level4_pgt)
++NEXT_PGD_PAGE(init_level4_pgt)
+ 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ 	.org    init_level4_pgt + L4_PAGE_OFFSET*8, 0
+ 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+--- a/arch/x86/kernel/irqinit.c
++++ b/arch/x86/kernel/irqinit.c
+@@ -51,7 +51,7 @@ static struct irqaction irq2 = {
+ 	.flags = IRQF_NO_THREAD,
+ };
+ 
+-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
++DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = {
+ 	[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
+ };
+ 
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -41,7 +41,7 @@
+  * section. Since TSS's are completely CPU-local, we want them
+  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+  */
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
++__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
+ 	.x86_tss = {
+ 		.sp0 = TOP_OF_INIT_STACK,
+ #ifdef CONFIG_X86_32
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU)		+= numa_emulatio
+ obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
+ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+-
++obj-$(CONFIG_KAISER) += kaiser.o
+--- /dev/null
++++ b/arch/x86/mm/kaiser.c
+@@ -0,0 +1,160 @@
++
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/string.h>
++#include <linux/types.h>
++#include <linux/bug.h>
++#include <linux/init.h>
++#include <linux/spinlock.h>
++#include <linux/mm.h>
++
++#include <linux/uaccess.h>
++#include <asm/pgtable.h>
++#include <asm/pgalloc.h>
++#include <asm/desc.h>
++#ifdef CONFIG_KAISER
++
++__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++/**
++ * Get the real ppn from a address in kernel mapping.
++ * @param address The virtual adrress
++ * @return the physical address
++ */
++static inline unsigned long get_pa_from_mapping (unsigned long address)
++{
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++
++	pgd = pgd_offset_k(address);
++	BUG_ON(pgd_none(*pgd) || pgd_large(*pgd));
++
++	pud = pud_offset(pgd, address);
++	BUG_ON(pud_none(*pud));
++
++	if (pud_large(*pud)) {
++		return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK);
++	}
++
++	pmd = pmd_offset(pud, address);
++	BUG_ON(pmd_none(*pmd));
++
++	if (pmd_large(*pmd)) {
++		return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK);
++	}
++
++	pte = pte_offset_kernel(pmd, address);
++	BUG_ON(pte_none(*pte));
++
++	return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK);
++}
++
++void _kaiser_copy (unsigned long start_addr, unsigned long size,
++					unsigned long flags)
++{
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++	unsigned long address;
++	unsigned long end_addr = start_addr + size;
++	unsigned long target_address;
++
++	for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1));
++			address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) {
++		target_address = get_pa_from_mapping(address);
++
++		pgd = native_get_shadow_pgd(pgd_offset_k(address));
++
++		BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n");
++		BUG_ON(pgd_large(*pgd));
++
++		pud = pud_offset(pgd, address);
++		if (pud_none(*pud)) {
++			set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address))));
++		}
++		BUG_ON(pud_large(*pud));
++
++		pmd = pmd_offset(pud, address);
++		if (pmd_none(*pmd)) {
++			set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address))));
++		}
++		BUG_ON(pmd_large(*pmd));
++
++		pte = pte_offset_kernel(pmd, address);
++		if (pte_none(*pte)) {
++			set_pte(pte, __pte(flags | target_address));
++		} else {
++			BUG_ON(__pa(pte_page(*pte)) != target_address);
++		}
++	}
++}
++
++// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping
++static inline void __init _kaiser_init(void)
++{
++	pgd_t *pgd;
++	int i = 0;
++
++	pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
++	for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
++		set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0))));
++	}
++}
++
++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++spinlock_t shadow_table_lock;
++void __init kaiser_init(void)
++{
++	int cpu;
++	spin_lock_init(&shadow_table_lock);
++
++	spin_lock(&shadow_table_lock);
++
++	_kaiser_init();
++
++	for_each_possible_cpu(cpu) {
++		// map the per cpu user variables
++		_kaiser_copy(
++				(unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)),
++				(unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start,
++				__PAGE_KERNEL);
++	}
++
++	// map the entry/exit text section, which is responsible to switch between user- and kernel mode
++	_kaiser_copy(
++			(unsigned long) __entry_text_start,
++			(unsigned long) __entry_text_end - (unsigned long) __entry_text_start,
++			__PAGE_KERNEL_RX);
++
++	// the fixed map address of the idt_table
++	_kaiser_copy(
++			(unsigned long) idt_descr.address,
++			sizeof(gate_desc) * NR_VECTORS,
++			__PAGE_KERNEL_RO);
++
++	spin_unlock(&shadow_table_lock);
++}
++
++// add a mapping to the shadow-mapping, and synchronize the mappings
++void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++{
++	spin_lock(&shadow_table_lock);
++	_kaiser_copy(addr, size, flags);
++	spin_unlock(&shadow_table_lock);
++}
++
++extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end);
++void kaiser_remove_mapping(unsigned long start, unsigned long size)
++{
++	pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start));
++	spin_lock(&shadow_table_lock);
++	do {
++		unmap_pud_range(pgd, start, start + size);
++	} while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size)));
++	spin_unlock(&shadow_table_lock);
++}
++#endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -823,7 +823,7 @@ static void unmap_pmd_range(pud_t *pud,
+ 			pud_clear(pud);
+ }
+ 
+-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+ {
+ 	pud_t *pud = pud_offset(pgd, start);
+ 
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -346,12 +346,38 @@ static inline void _pgd_free(pgd_t *pgd)
+ #else
+ static inline pgd_t *_pgd_alloc(void)
+ {
++#ifdef CONFIG_KAISER
++	// Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory
++	// block. Therefore, we have to allocate at least 3 pages. However, the
++	// __get_free_pages returns us 4 pages. Hence, we store the base pointer at
++	// the beginning of the page of our 8kb-aligned memory block in order to
++	// correctly free it afterwars.
++
++	unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE));
++
++	if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages)
++	{
++		*((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages;
++		return (pgd_t *) pages;
++	}
++	else
++	{
++		*((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages;
++		return (pgd_t *) (pages + PAGE_SIZE);
++	}
++#else
+ 	return (pgd_t *)__get_free_page(PGALLOC_GFP);
++#endif
+ }
+ 
+ static inline void _pgd_free(pgd_t *pgd)
+ {
++#ifdef CONFIG_KAISER
++  unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE));
++	free_pages(pages, get_order(4*PAGE_SIZE));
++#else
+ 	free_page((unsigned long)pgd);
++#endif
+ }
+ #endif /* CONFIG_X86_PAE */
+ 
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -778,7 +778,16 @@
+  */
+ #define PERCPU_INPUT(cacheline)						\
+ 	VMLINUX_SYMBOL(__per_cpu_start) = .;				\
+-	*(.data..percpu..first)						\
++	\
++	VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .;        \
++	*(.data..percpu..first)           \
++	. = ALIGN(cacheline);           \
++	*(.data..percpu..user_mapped)            \
++	*(.data..percpu..user_mapped..shared_aligned)        \
++	. = ALIGN(PAGE_SIZE);           \
++	*(.data..percpu..user_mapped..page_aligned)          \
++	VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .;        \
++	\
+ 	. = ALIGN(PAGE_SIZE);						\
+ 	*(.data..percpu..page_aligned)					\
+ 	. = ALIGN(cacheline);						\
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -35,6 +35,12 @@
+ 
+ #endif
+ 
++#ifdef CONFIG_KAISER
++#define USER_MAPPED_SECTION "..user_mapped"
++#else
++#define USER_MAPPED_SECTION ""
++#endif
++
+ /*
+  * Base implementations of per-CPU variable declarations and definitions, where
+  * the section in which the variable is to be placed is provided by the
+@@ -115,6 +121,12 @@
+ #define DEFINE_PER_CPU(type, name)					\
+ 	DEFINE_PER_CPU_SECTION(type, name, "")
+ 
++#define DECLARE_PER_CPU_USER_MAPPED(type, name)         \
++	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
++
++#define DEFINE_PER_CPU_USER_MAPPED(type, name)          \
++	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
++
+ /*
+  * Declaration/definition used for per-CPU variables that must come first in
+  * the set of variables.
+@@ -144,6 +156,14 @@
+ 	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
+ 	____cacheline_aligned_in_smp
+ 
++#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)			\
++	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
++	____cacheline_aligned_in_smp
++
++#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)			\
++	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
++	____cacheline_aligned_in_smp
++
+ #define DECLARE_PER_CPU_ALIGNED(type, name)				\
+ 	DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION)	\
+ 	____cacheline_aligned
+@@ -162,6 +182,16 @@
+ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)				\
+ 	DEFINE_PER_CPU_SECTION(type, name, "..page_aligned")		\
+ 	__aligned(PAGE_SIZE)
++/*
++ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
++ */
++#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)      \
++  DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned")   \
++  __aligned(PAGE_SIZE)
++
++#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)       \
++  DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned")    \
++  __aligned(PAGE_SIZE)
+ 
+ /*
+  * Declaration/definition used for per-CPU variables that must be read mostly.
+--- a/init/main.c
++++ b/init/main.c
+@@ -86,6 +86,9 @@
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+ #include <asm/cacheflush.h>
++#ifdef CONFIG_KAISER
++#include <asm/kaiser.h>
++#endif
+ 
+ static int kernel_init(void *);
+ 
+@@ -473,6 +476,9 @@ static void __init mm_init(void)
+ 	pgtable_init();
+ 	vmalloc_init();
+ 	ioremap_huge_init();
++#ifdef CONFIG_KAISER
++	kaiser_init();
++#endif
+ }
+ 
+ asmlinkage __visible void __init start_kernel(void)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -211,8 +211,12 @@ static unsigned long *alloc_thread_stack
+ #endif
+ }
+ 
++extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size);
+ static inline void free_thread_stack(struct task_struct *tsk)
+ {
++#ifdef CONFIG_KAISER
++	kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE);
++#endif
+ #ifdef CONFIG_VMAP_STACK
+ 	if (task_stack_vm_area(tsk)) {
+ 		unsigned long flags;
+@@ -468,6 +472,7 @@ void set_task_stack_end_magic(struct tas
+ 	*stackend = STACK_END_MAGIC;	/* for overflow detection */
+ }
+ 
++extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ {
+ 	struct task_struct *tsk;
+@@ -495,6 +500,9 @@ static struct task_struct *dup_task_stru
+ 	 * functions again.
+ 	 */
+ 	tsk->stack = stack;
++#ifdef CONFIG_KAISER
++	kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
++#endif
+ #ifdef CONFIG_VMAP_STACK
+ 	tsk->stack_vm_area = stack_vm_area;
+ #endif
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -30,6 +30,13 @@ config SECURITY
+ 	  model will be used.
+ 
+ 	  If you are unsure how to answer this question, answer N.
++config KAISER
++	bool "Remove the kernel mapping in user mode"
++	depends on X86_64
++	depends on !PARAVIRT
++	help
++	  This enforces a strict kernel and user space isolation in order to close
++	  hardware side channels on kernel address information.
+ 
+ config SECURITYFS
+ 	bool "Enable the securityfs filesystem"
diff --git a/queue/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch b/queue/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
new file mode 100644
index 0000000..3e82917
--- /dev/null
+++ b/queue/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
@@ -0,0 +1,392 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 17 Aug 2017 15:00:37 -0700
+Subject: kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush user
+
+From: Hugh Dickins <hughd@google.com>
+
+
+We have many machines (Westmere, Sandybridge, Ivybridge) supporting
+PCID but not INVPCID: on these load_new_mm_cr3() simply crashed.
+
+Flushing user context inside load_new_mm_cr3() without the use of
+invpcid is difficult: momentarily switch from kernel to user context
+and back to do so?  I'm not sure whether that can be safely done at
+all, and would risk polluting user context with kernel internals,
+and kernel context with stale user externals.
+
+Instead, follow the hint in the comment that was there: change
+X86_CR3_PCID_USER_VAR to be a per-cpu variable, then load_new_mm_cr3()
+can leave a note in it, for SWITCH_USER_CR3 on return to userspace to
+flush user context TLB, instead of default X86_CR3_PCID_USER_NOFLUSH.
+
+Which works well enough that there's no need to do it this way only
+when invpcid is unsupported: it's a good alternative to invpcid here.
+But there's a couple of inlines in asm/tlbflush.h that need to do the
+same trick, so it's best to localize all this per-cpu business in
+mm/kaiser.c: moving that part of the initialization from setup_pcid()
+to kaiser_setup_pcid(); with kaiser_flush_tlb_on_return_to_user() the
+function for noting an X86_CR3_PCID_USER_FLUSH.  And let's keep a
+KAISER_SHADOW_PGD_OFFSET in there, to avoid the extra OR on exit.
+
+I did try to make the feature tests in asm/tlbflush.h more consistent
+with each other: there seem to be far too many ways of performing such
+tests, and I don't have a good grasp of their differences.  At first
+I converted them all to be static_cpu_has(): but that proved to be a
+mistake, as the comment in __native_flush_tlb_single() hints; so then
+I reversed and made them all this_cpu_has().  Probably all gratuitous
+change, but that's the way it's working at present.
+
+I am slightly bothered by the way non-per-cpu X86_CR3_PCID_KERN_VAR
+gets re-initialized by each cpu (before and after these changes):
+no problem when (as usual) all cpus on a machine have the same
+features, but in principle incorrect.  However, my experiment
+to per-cpu-ify that one did not end well...
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kaiser.h   |   18 +++++++-----
+ arch/x86/include/asm/tlbflush.h |   56 +++++++++++++++++++++++++++-------------
+ arch/x86/kernel/cpu/common.c    |   22 ---------------
+ arch/x86/mm/kaiser.c            |   50 +++++++++++++++++++++++++++++++----
+ arch/x86/mm/tlb.c               |   46 ++++++++++++--------------------
+ 5 files changed, 113 insertions(+), 79 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -32,13 +32,12 @@ movq \reg, %cr3
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-/*
+- * This can obviously be one instruction by putting the
+- * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR.
+- * But, just leave it now for simplicity.
+- */
+-orq  X86_CR3_PCID_USER_VAR, \reg
+-orq  $(KAISER_SHADOW_PGD_OFFSET), \reg
++orq  PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
++js   9f
++// FLUSH this time, reset to NOFLUSH for next time
++// But if nopcid?  Consider using 0x80 for user pcid?
++movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
++9:
+ movq \reg, %cr3
+ .endm
+ 
+@@ -90,6 +89,11 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ 
++extern unsigned long X86_CR3_PCID_KERN_VAR;
++DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
++
++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++
+ /**
+  *  kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+  *  @addr: the start address of the range
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -13,6 +13,7 @@ static inline void __invpcid(unsigned lo
+ 			     unsigned long type)
+ {
+ 	struct { u64 d[2]; } desc = { { pcid, addr } };
++
+ 	/*
+ 	 * The memory clobber is because the whole point is to invalidate
+ 	 * stale TLB entries and, especially if we're flushing global
+@@ -131,27 +132,42 @@ static inline void cr4_set_bits_and_upda
+ 	cr4_set_bits(mask);
+ }
+ 
++/*
++ * Declare a couple of kaiser interfaces here for convenience,
++ * to avoid the need for asm/kaiser.h in unexpected places.
++ */
++#ifdef CONFIG_KAISER
++extern void kaiser_setup_pcid(void);
++extern void kaiser_flush_tlb_on_return_to_user(void);
++#else
++static inline void kaiser_setup_pcid(void)
++{
++}
++static inline void kaiser_flush_tlb_on_return_to_user(void)
++{
++}
++#endif
++
+ static inline void __native_flush_tlb(void)
+ {
+-	if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) {
++	if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ 		/*
+-		 * If current->mm == NULL then we borrow a mm which may change during a
+-		 * task switch and therefore we must not be preempted while we write CR3
+-		 * back:
++		 * Note, this works with CR4.PCIDE=0 or 1.
+ 		 */
+-		preempt_disable();
+-		native_write_cr3(native_read_cr3());
+-		preempt_enable();
++		invpcid_flush_all_nonglobals();
+ 		return;
+ 	}
++
+ 	/*
+-	 * We are no longer using globals with KAISER, so a
+-	 * "nonglobals" flush would work too. But, this is more
+-	 * conservative.
+-	 *
+-	 * Note, this works with CR4.PCIDE=0 or 1.
++	 * If current->mm == NULL then we borrow a mm which may change during a
++	 * task switch and therefore we must not be preempted while we write CR3
++	 * back:
+ 	 */
+-	invpcid_flush_all();
++	preempt_disable();
++	if (this_cpu_has(X86_FEATURE_PCID))
++		kaiser_flush_tlb_on_return_to_user();
++	native_write_cr3(native_read_cr3());
++	preempt_enable();
+ }
+ 
+ static inline void __native_flush_tlb_global_irq_disabled(void)
+@@ -167,9 +183,13 @@ static inline void __native_flush_tlb_gl
+ 
+ static inline void __native_flush_tlb_global(void)
+ {
++#ifdef CONFIG_KAISER
++	/* Globals are not used at all */
++	__native_flush_tlb();
++#else
+ 	unsigned long flags;
+ 
+-	if (static_cpu_has(X86_FEATURE_INVPCID)) {
++	if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ 		/*
+ 		 * Using INVPCID is considerably faster than a pair of writes
+ 		 * to CR4 sandwiched inside an IRQ flag save/restore.
+@@ -186,10 +206,9 @@ static inline void __native_flush_tlb_gl
+ 	 * be called from deep inside debugging code.)
+ 	 */
+ 	raw_local_irq_save(flags);
+-
+ 	__native_flush_tlb_global_irq_disabled();
+-
+ 	raw_local_irq_restore(flags);
++#endif
+ }
+ 
+ static inline void __native_flush_tlb_single(unsigned long addr)
+@@ -200,9 +219,12 @@ static inline void __native_flush_tlb_si
+ 	 *
+ 	 * The ASIDs used below are hard-coded.  But, we must not
+ 	 * call invpcid(type=1/2) before CR4.PCIDE=1.  Just call
+-	 * invpcid in the case we are called early.
++	 * invlpg in the case we are called early.
+ 	 */
++
+ 	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
++		if (this_cpu_has(X86_FEATURE_PCID))
++			kaiser_flush_tlb_on_return_to_user();
+ 		asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ 		return;
+ 	}
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -324,33 +324,12 @@ static __always_inline void setup_smap(s
+ 	}
+ }
+ 
+-/*
+- * These can have bit 63 set, so we can not just use a plain "or"
+- * instruction to get their value or'd into CR3.  It would take
+- * another register.  So, we use a memory reference to these
+- * instead.
+- *
+- * This is also handy because systems that do not support
+- * PCIDs just end up or'ing a 0 into their CR3, which does
+- * no harm.
+- */
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0;
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0;
+-
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ 	if (cpu_has(c, X86_FEATURE_PCID)) {
+ 		if (cpu_has(c, X86_FEATURE_PGE)) {
+ 			cr4_set_bits(X86_CR4_PCIDE);
+ 			/*
+-			 * These variables are used by the entry/exit
+-			 * code to change PCIDs.
+-			 */
+-#ifdef CONFIG_KAISER
+-			X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH;
+-			X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH;
+-#endif
+-			/*
+ 			 * INVPCID has two "groups" of types:
+ 			 * 1/2: Invalidate an individual address
+ 			 * 3/4: Invalidate all contexts
+@@ -375,6 +354,7 @@ static void setup_pcid(struct cpuinfo_x8
+ 			clear_cpu_cap(c, X86_FEATURE_PCID);
+ 		}
+ 	}
++	kaiser_setup_pcid();
+ }
+ 
+ /*
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -11,12 +11,26 @@
+ #include <linux/uaccess.h>
+ 
+ #include <asm/kaiser.h>
++#include <asm/tlbflush.h>	/* to verify its kaiser declarations */
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
++
+ #ifdef CONFIG_KAISER
++__visible
++DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++/*
++ * These can have bit 63 set, so we can not just use a plain "or"
++ * instruction to get their value or'd into CR3.  It would take
++ * another register.  So, we use a memory reference to these instead.
++ *
++ * This is also handy because systems that do not support PCIDs
++ * just end up or'ing a 0 into their CR3, which does no harm.
++ */
++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR;
++DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
+ 
+-__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ /*
+  * At runtime, the only things we map are some things for CPU
+  * hotplug, and stacks for new processes.  No two CPUs will ever
+@@ -238,9 +252,6 @@ static void __init kaiser_init_all_pgds(
+ 	WARN_ON(__ret);							\
+ } while (0)
+ 
+-extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+-extern unsigned long X86_CR3_PCID_KERN_VAR;
+-extern unsigned long X86_CR3_PCID_USER_VAR;
+ /*
+  * If anything in here fails, we will likely die on one of the
+  * first kernel->user transitions and init will die.  But, we
+@@ -294,8 +305,6 @@ void __init kaiser_init(void)
+ 
+ 	kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
+ 				  __PAGE_KERNEL);
+-	kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
+-				  __PAGE_KERNEL);
+ }
+ 
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+@@ -358,4 +367,33 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp,
+ 	}
+ 	return pgd;
+ }
++
++void kaiser_setup_pcid(void)
++{
++	unsigned long kern_cr3 = 0;
++	unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
++
++	if (this_cpu_has(X86_FEATURE_PCID)) {
++		kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
++		user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
++	}
++	/*
++	 * These variables are used by the entry/exit
++	 * code to change PCID and pgd and TLB flushing.
++	 */
++	X86_CR3_PCID_KERN_VAR = kern_cr3;
++	this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3);
++}
++
++/*
++ * Make a note that this cpu will need to flush USER tlb on return to user.
++ * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
++ * if cpu does not, then the NOFLUSH bit will never have been set.
++ */
++void kaiser_flush_tlb_on_return_to_user(void)
++{
++	this_cpu_write(X86_CR3_PCID_USER_VAR,
++			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
++}
++EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+ #endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -6,13 +6,14 @@
+ #include <linux/interrupt.h>
+ #include <linux/export.h>
+ #include <linux/cpu.h>
++#include <linux/debugfs.h>
+ 
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+ #include <asm/cache.h>
+ #include <asm/apic.h>
+ #include <asm/uv/uv.h>
+-#include <linux/debugfs.h>
++#include <asm/kaiser.h>
+ 
+ /*
+  *	TLB flushing, formerly SMP-only
+@@ -38,34 +39,23 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ 	unsigned long new_mm_cr3 = __pa(pgdir);
+ 
+-	/*
+-	 * KAISER, plus PCIDs needs some extra work here.  But,
+-	 * if either of features is not present, we need no
+-	 * PCIDs here and just do a normal, full TLB flush with
+-	 * the write_cr3()
+-	 */
+-	if (!IS_ENABLED(CONFIG_KAISER) ||
+-	    !cpu_feature_enabled(X86_FEATURE_PCID))
+-		goto out_set_cr3;
+-	/*
+-	 * We reuse the same PCID for different tasks, so we must
+-	 * flush all the entires for the PCID out when we change
+-	 * tasks.
+-	 */
+-	new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
+-
+-	/*
+-	 * The flush from load_cr3() may leave old TLB entries
+-	 * for userspace in place.  We must flush that context
+-	 * separately.  We can theoretically delay doing this
+-	 * until we actually load up the userspace CR3, but
+-	 * that's a bit tricky.  We have to have the "need to
+-	 * flush userspace PCID" bit per-cpu and check it in the
+-	 * exit-to-userspace paths.
+-	 */
+-	invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
++#ifdef CONFIG_KAISER
++	if (this_cpu_has(X86_FEATURE_PCID)) {
++		/*
++		 * We reuse the same PCID for different tasks, so we must
++		 * flush all the entries for the PCID out when we change tasks.
++		 * Flush KERN below, flush USER when returning to userspace in
++		 * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro.
++		 *
++		 * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
++		 * do it here, but can only be used if X86_FEATURE_INVPCID is
++		 * available - and many machines support pcid without invpcid.
++		 */
++		new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
++		kaiser_flush_tlb_on_return_to_user();
++	}
++#endif /* CONFIG_KAISER */
+ 
+-out_set_cr3:
+ 	/*
+ 	 * Caution: many callers of this function expect
+ 	 * that load_cr3() is serializing and orders TLB
diff --git a/queue/kaiser-merged-update.patch b/queue/kaiser-merged-update.patch
new file mode 100644
index 0000000..8a0e3fe
--- /dev/null
+++ b/queue/kaiser-merged-update.patch
@@ -0,0 +1,1298 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Wed, 30 Aug 2017 16:23:00 -0700
+Subject: kaiser: merged update
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+
+Merged fixes and cleanups, rebased to 4.9.51 tree (no 5-level paging).
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S            |  105 ++++++++++-
+ arch/x86/include/asm/kaiser.h        |   43 ++--
+ arch/x86/include/asm/pgtable.h       |   18 +
+ arch/x86/include/asm/pgtable_64.h    |   48 ++++-
+ arch/x86/include/asm/pgtable_types.h |    6 
+ arch/x86/kernel/espfix_64.c          |   13 -
+ arch/x86/kernel/head_64.S            |   19 +-
+ arch/x86/kernel/ldt.c                |   27 ++
+ arch/x86/kernel/tracepoint.c         |    2 
+ arch/x86/mm/kaiser.c                 |  317 +++++++++++++++++++++++++----------
+ arch/x86/mm/pageattr.c               |   63 +++++-
+ arch/x86/mm/pgtable.c                |   40 +---
+ include/linux/kaiser.h               |   26 ++
+ kernel/fork.c                        |    9 
+ security/Kconfig                     |    5 
+ 15 files changed, 551 insertions(+), 190 deletions(-)
+ create mode 100644 include/linux/kaiser.h
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -230,6 +230,13 @@ entry_SYSCALL_64_fastpath:
+ 	movq	RIP(%rsp), %rcx
+ 	movq	EFLAGS(%rsp), %r11
+ 	RESTORE_C_REGS_EXCEPT_RCX_R11
++	/*
++	 * This opens a window where we have a user CR3, but are
++	 * running in the kernel.  This makes using the CS
++	 * register useless for telling whether or not we need to
++	 * switch CR3 in NMIs.  Normal interrupts are OK because
++	 * they are off here.
++	 */
+ 	SWITCH_USER_CR3
+ 	movq	RSP(%rsp), %rsp
+ 	USERGS_SYSRET64
+@@ -326,11 +333,25 @@ return_from_SYSCALL_64:
+ syscall_return_via_sysret:
+ 	/* rcx and r11 are already restored (see code above) */
+ 	RESTORE_C_REGS_EXCEPT_RCX_R11
++	/*
++	 * This opens a window where we have a user CR3, but are
++	 * running in the kernel.  This makes using the CS
++	 * register useless for telling whether or not we need to
++	 * switch CR3 in NMIs.  Normal interrupts are OK because
++	 * they are off here.
++	 */
+ 	SWITCH_USER_CR3
+ 	movq	RSP(%rsp), %rsp
+ 	USERGS_SYSRET64
+ 
+ opportunistic_sysret_failed:
++	/*
++	 * This opens a window where we have a user CR3, but are
++	 * running in the kernel.  This makes using the CS
++	 * register useless for telling whether or not we need to
++	 * switch CR3 in NMIs.  Normal interrupts are OK because
++	 * they are off here.
++	 */
+ 	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_c_regs_and_iret
+@@ -1087,6 +1108,13 @@ ENTRY(error_entry)
+ 	cld
+ 	SAVE_C_REGS 8
+ 	SAVE_EXTRA_REGS 8
++	/*
++	 * error_entry() always returns with a kernel gsbase and
++	 * CR3.  We must also have a kernel CR3/gsbase before
++	 * calling TRACE_IRQS_*.  Just unconditionally switch to
++	 * the kernel CR3 here.
++	 */
++	SWITCH_KERNEL_CR3
+ 	xorl	%ebx, %ebx
+ 	testb	$3, CS+8(%rsp)
+ 	jz	.Lerror_kernelspace
+@@ -1096,7 +1124,6 @@ ENTRY(error_entry)
+ 	 * from user mode due to an IRET fault.
+ 	 */
+ 	SWAPGS
+-	SWITCH_KERNEL_CR3
+ 
+ .Lerror_entry_from_usermode_after_swapgs:
+ 	/*
+@@ -1148,7 +1175,6 @@ ENTRY(error_entry)
+ 	 * Switch to kernel gsbase:
+ 	 */
+ 	SWAPGS
+-	SWITCH_KERNEL_CR3
+ 
+ 	/*
+ 	 * Pretend that the exception came from user mode: set up pt_regs
+@@ -1249,7 +1275,10 @@ ENTRY(nmi)
+ 	 */
+ 
+ 	SWAPGS_UNSAFE_STACK
+-	SWITCH_KERNEL_CR3_NO_STACK
++	/*
++	 * percpu variables are mapped with user CR3, so no need
++	 * to switch CR3 here.
++	 */
+ 	cld
+ 	movq	%rsp, %rdx
+ 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+@@ -1283,14 +1312,33 @@ ENTRY(nmi)
+ 
+ 	movq	%rsp, %rdi
+ 	movq	$-1, %rsi
++#ifdef CONFIG_KAISER
++	/* Unconditionally use kernel CR3 for do_nmi() */
++	/* %rax is saved above, so OK to clobber here */
++	movq	%cr3, %rax
++	pushq	%rax
++#ifdef CONFIG_KAISER_REAL_SWITCH
++	andq	$(~0x1000), %rax
++#endif
++	movq	%rax, %cr3
++#endif
+ 	call	do_nmi
++	/*
++	 * Unconditionally restore CR3.  I know we return to
++	 * kernel code that needs user CR3, but do we ever return
++	 * to "user mode" where we need the kernel CR3?
++	 */
++#ifdef CONFIG_KAISER
++	popq	%rax
++	mov	%rax, %cr3
++#endif
+ 
+ 	/*
+ 	 * Return back to user mode.  We must *not* do the normal exit
+-	 * work, because we don't want to enable interrupts.  Fortunately,
+-	 * do_nmi doesn't modify pt_regs.
++	 * work, because we don't want to enable interrupts.  Do not
++	 * switch to user CR3: we might be going back to kernel code
++	 * that had a user CR3 set.
+ 	 */
+-	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_c_regs_and_iret
+ 
+@@ -1486,23 +1534,54 @@ end_repeat_nmi:
+ 	ALLOC_PT_GPREGS_ON_STACK
+ 
+ 	/*
+-	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
+-	 * as we should not be calling schedule in NMI context.
+-	 * Even with normal interrupts enabled. An NMI should not be
+-	 * setting NEED_RESCHED or anything that normal interrupts and
+-	 * exceptions might do.
++	 * Use the same approach as paranoid_entry to handle SWAPGS, but
++	 * without CR3 handling since we do that differently in NMIs.  No
++	 * need to use paranoid_exit as we should not be calling schedule
++	 * in NMI context.  Even with normal interrupts enabled. An NMI
++	 * should not be setting NEED_RESCHED or anything that normal
++	 * interrupts and exceptions might do.
+ 	 */
+-	call	paranoid_entry
++	cld
++	SAVE_C_REGS
++	SAVE_EXTRA_REGS
++	movl	$1, %ebx
++	movl	$MSR_GS_BASE, %ecx
++	rdmsr
++	testl	%edx, %edx
++	js	1f				/* negative -> in kernel */
++	SWAPGS
++	xorl	%ebx, %ebx
++1:
++#ifdef CONFIG_KAISER
++	/* Unconditionally use kernel CR3 for do_nmi() */
++	/* %rax is saved above, so OK to clobber here */
++	movq	%cr3, %rax
++	pushq	%rax
++#ifdef CONFIG_KAISER_REAL_SWITCH
++	andq	$(~0x1000), %rax
++#endif
++	movq	%rax, %cr3
++#endif
+ 
+ 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+ 	movq	%rsp, %rdi
++	addq	$8, %rdi /* point %rdi at ptregs, fixed up for CR3 */
+ 	movq	$-1, %rsi
+ 	call	do_nmi
++	/*
++	 * Unconditionally restore CR3.  We might be returning to
++	 * kernel code that needs user CR3, like just just before
++	 * a sysret.
++	 */
++#ifdef CONFIG_KAISER
++	popq	%rax
++	mov	%rax, %cr3
++#endif
+ 
+ 	testl	%ebx, %ebx			/* swapgs needed? */
+ 	jnz	nmi_restore
+ nmi_swapgs:
+-	SWITCH_USER_CR3_NO_STACK
++	/* We fixed up CR3 above, so no need to switch it here */
+ 	SWAPGS_UNSAFE_STACK
+ nmi_restore:
+ 	RESTORE_EXTRA_REGS
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -16,13 +16,17 @@
+ 
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
++#ifdef CONFIG_KAISER_REAL_SWITCH
+ andq $(~0x1000), \reg
++#endif
+ movq \reg, %cr3
+ .endm
+ 
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
++#ifdef CONFIG_KAISER_REAL_SWITCH
+ orq $(0x1000), \reg
++#endif
+ movq \reg, %cr3
+ .endm
+ 
+@@ -65,48 +69,53 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ .endm
+ 
+ #endif /* CONFIG_KAISER */
++
+ #else /* __ASSEMBLY__ */
+ 
+ 
+ #ifdef CONFIG_KAISER
+-// Upon kernel/user mode switch, it may happen that
+-// the address space has to be switched before the registers have been stored.
+-// To change the address space, another register is needed.
+-// A register therefore has to be stored/restored.
+-//
+-DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++/*
++ * Upon kernel/user mode switch, it may happen that the address
++ * space has to be switched before the registers have been
++ * stored.  To change the address space, another register is
++ * needed.  A register therefore has to be stored/restored.
++*/
+ 
+-#endif /* CONFIG_KAISER */
++DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ 
+ /**
+- *  shadowmem_add_mapping - map a virtual memory part to the shadow mapping
++ *  kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+  *  @addr: the start address of the range
+  *  @size: the size of the range
+  *  @flags: The mapping flags of the pages
+  *
+- *  the mapping is done on a global scope, so no bigger synchronization has to be done.
+- *  the pages have to be manually unmapped again when they are not needed any longer.
++ *  The mapping is done on a global scope, so no bigger
++ *  synchronization has to be done.  the pages have to be
++ *  manually unmapped again when they are not needed any longer.
+  */
+-extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
++extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+ 
+ 
+ /**
+- *  shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping
++ *  kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
+  *  @addr: the start address of the range
+  *  @size: the size of the range
+  */
+ extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
+ 
+ /**
+- *  shadowmem_initialize_mapping - Initalize the shadow mapping
++ *  kaiser_initialize_mapping - Initalize the shadow mapping
+  *
+- *  most parts of the shadow mapping can be mapped upon boot time.
+- *  only the thread stacks have to be mapped on runtime.
+- *  the mapped regions are not unmapped at all.
++ *  Most parts of the shadow mapping can be mapped upon boot
++ *  time.  Only per-process things like the thread stacks
++ *  or a new LDT have to be mapped at runtime.  These boot-
++ *  time mappings are permanent and nevertunmapped.
+  */
+ extern void kaiser_init(void);
+ 
+-#endif
++#endif /* CONFIG_KAISER */
++
++#endif /* __ASSEMBLY */
+ 
+ 
+ 
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -690,7 +690,17 @@ static inline pud_t *pud_offset(pgd_t *p
+ 
+ static inline int pgd_bad(pgd_t pgd)
+ {
+-	return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
++	pgdval_t ignore_flags = _PAGE_USER;
++	/*
++	 * We set NX on KAISER pgds that map userspace memory so
++	 * that userspace can not meaningfully use the kernel
++	 * page table by accident; it will fault on the first
++	 * instruction it tries to run.  See native_set_pgd().
++	 */
++	if (IS_ENABLED(CONFIG_KAISER))
++		ignore_flags |= _PAGE_NX;
++
++	return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
+ }
+ 
+ static inline int pgd_none(pgd_t pgd)
+@@ -905,8 +915,10 @@ static inline void clone_pgd_range(pgd_t
+ {
+        memcpy(dst, src, count * sizeof(pgd_t));
+ #ifdef CONFIG_KAISER
+-	// clone the shadow pgd part as well
+-	memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t));
++	/* Clone the shadow pgd part as well */
++	memcpy(native_get_shadow_pgd(dst),
++	       native_get_shadow_pgd(src),
++	       count * sizeof(pgd_t));
+ #endif
+ }
+ 
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -107,26 +107,58 @@ static inline void native_pud_clear(pud_
+ }
+ 
+ #ifdef CONFIG_KAISER
+-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) {
++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++{
+ 	return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
+ }
+ 
+-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) {
++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++{
+ 	return (pgd_t *)(void*)((unsigned long)(void*)pgdp &  ~(unsigned long)PAGE_SIZE);
+ }
++#else
++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++{
++	BUILD_BUG_ON(1);
++	return NULL;
++}
++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++{
++	return pgdp;
++}
+ #endif /* CONFIG_KAISER */
+ 
++/*
++ * Page table pages are page-aligned.  The lower half of the top
++ * level is used for userspace and the top half for the kernel.
++ * This returns true for user pages that need to get copied into
++ * both the user and kernel copies of the page tables, and false
++ * for kernel pages that should only be in the kernel copy.
++ */
++static inline bool is_userspace_pgd(void *__ptr)
++{
++	unsigned long ptr = (unsigned long)__ptr;
++
++	return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2));
++}
++
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+ #ifdef CONFIG_KAISER
+-	// We know that a pgd is page aligned.
+-	// Therefore the lower indices have to be mapped to user space.
+-	// These pages are mapped to the shadow mapping.
+-	if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) {
++	pteval_t extra_kern_pgd_flags = 0;
++	/* Do we need to also populate the shadow pgd? */
++	if (is_userspace_pgd(pgdp)) {
+ 		native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++		/*
++		 * Even if the entry is *mapping* userspace, ensure
++		 * that userspace can not use it.  This way, if we
++		 * get out to userspace running on the kernel CR3,
++		 * userspace will crash instead of running.
++		 */
++		extra_kern_pgd_flags = _PAGE_NX;
+ 	}
+-
+-	pgdp->pgd = pgd.pgd & ~_PAGE_USER;
++	pgdp->pgd = pgd.pgd;
++	pgdp->pgd |= extra_kern_pgd_flags;
+ #else /* CONFIG_KAISER */
+ 	*pgdp = pgd;
+ #endif
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -48,7 +48,7 @@
+ #ifdef CONFIG_KAISER
+ #define _PAGE_GLOBAL	(_AT(pteval_t, 0))
+ #else
+-#define _PAGE_GLOBAL  (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+ #endif
+ #define _PAGE_SOFTW1	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_SOFTW2	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
+@@ -123,11 +123,7 @@
+ #define _PAGE_DEVMAP	(_AT(pteval_t, 0))
+ #endif
+ 
+-#ifdef CONFIG_KAISER
+-#define _PAGE_PROTNONE	(_AT(pteval_t, 0))
+-#else
+ #define _PAGE_PROTNONE  (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+-#endif
+ 
+ #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
+ 			 _PAGE_ACCESSED | _PAGE_DIRTY)
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -127,11 +127,14 @@ void __init init_espfix_bsp(void)
+ 	/* Install the espfix pud into the kernel page directory */
+ 	pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+ 	pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
+-#ifdef CONFIG_KAISER
+-	// add the esp stack pud to the shadow mapping here.
+-	// This can be done directly, because the fixup stack has its own pud
+-	set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page)));
+-#endif
++	/*
++	 * Just copy the top-level PGD that is mapping the espfix
++	 * area to ensure it is mapped into the shadow user page
++	 * tables.
++	 */
++	if (IS_ENABLED(CONFIG_KAISER))
++		set_pgd(native_get_shadow_pgd(pgd_p),
++			__pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
+ 
+ 	/* Randomize the locations */
+ 	init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -406,11 +406,24 @@ GLOBAL(early_recursion_flag)
+ GLOBAL(name)
+ 
+ #ifdef CONFIG_KAISER
++/*
++ * Each PGD needs to be 8k long and 8k aligned.  We do not
++ * ever go out to userspace with these, so we do not
++ * strictly *need* the second page, but this allows us to
++ * have a single set_pgd() implementation that does not
++ * need to worry about whether it has 4k or 8k to work
++ * with.
++ *
++ * This ensures PGDs are 8k long:
++ */
++#define KAISER_USER_PGD_FILL	512
++/* This ensures they are 8k-aligned: */
+ #define NEXT_PGD_PAGE(name) \
+ 	.balign 2 * PAGE_SIZE; \
+ GLOBAL(name)
+ #else
+ #define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
++#define KAISER_USER_PGD_FILL	0
+ #endif
+ 
+ /* Automate the creation of 1 to 1 mapping pmd entries */
+@@ -425,6 +438,7 @@ GLOBAL(name)
+ NEXT_PGD_PAGE(early_level4_pgt)
+ 	.fill	511,8,0
+ 	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
++	.fill	KAISER_USER_PGD_FILL,8,0
+ 
+ NEXT_PAGE(early_dynamic_pgts)
+ 	.fill	512*EARLY_DYNAMIC_PAGE_TABLES,8,0
+@@ -433,7 +447,8 @@ NEXT_PAGE(early_dynamic_pgts)
+ 
+ #ifndef CONFIG_XEN
+ NEXT_PGD_PAGE(init_level4_pgt)
+-	.fill	2*512,8,0
++	.fill	512,8,0
++	.fill	KAISER_USER_PGD_FILL,8,0
+ #else
+ NEXT_PGD_PAGE(init_level4_pgt)
+ 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+@@ -442,6 +457,7 @@ NEXT_PGD_PAGE(init_level4_pgt)
+ 	.org    init_level4_pgt + L4_START_KERNEL*8, 0
+ 	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ 	.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
++	.fill	KAISER_USER_PGD_FILL,8,0
+ 
+ NEXT_PAGE(level3_ident_pgt)
+ 	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+@@ -452,6 +468,7 @@ NEXT_PAGE(level2_ident_pgt)
+ 	 */
+ 	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+ #endif
++	.fill	KAISER_USER_PGD_FILL,8,0
+ 
+ NEXT_PAGE(level3_kernel_pgt)
+ 	.fill	L3_START_KERNEL,8,0
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -18,6 +18,7 @@
+ #include <linux/uaccess.h>
+ 
+ #include <asm/ldt.h>
++#include <asm/kaiser.h>
+ #include <asm/desc.h>
+ #include <asm/mmu_context.h>
+ #include <asm/syscalls.h>
+@@ -34,11 +35,21 @@ static void flush_ldt(void *current_mm)
+ 	set_ldt(pc->ldt->entries, pc->ldt->size);
+ }
+ 
++static void __free_ldt_struct(struct ldt_struct *ldt)
++{
++	if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
++		vfree(ldt->entries);
++	else
++		free_page((unsigned long)ldt->entries);
++	kfree(ldt);
++}
++
+ /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
+ static struct ldt_struct *alloc_ldt_struct(int size)
+ {
+ 	struct ldt_struct *new_ldt;
+ 	int alloc_size;
++	int ret = 0;
+ 
+ 	if (size > LDT_ENTRIES)
+ 		return NULL;
+@@ -66,6 +77,14 @@ static struct ldt_struct *alloc_ldt_stru
+ 		return NULL;
+ 	}
+ 
++	// FIXME: make kaiser_add_mapping() return an error code
++	// when it fails
++	kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
++			   __PAGE_KERNEL);
++	if (ret) {
++		__free_ldt_struct(new_ldt);
++		return NULL;
++	}
+ 	new_ldt->size = size;
+ 	return new_ldt;
+ }
+@@ -92,12 +111,10 @@ static void free_ldt_struct(struct ldt_s
+ 	if (likely(!ldt))
+ 		return;
+ 
++	kaiser_remove_mapping((unsigned long)ldt->entries,
++			      ldt->size * LDT_ENTRY_SIZE);
+ 	paravirt_free_ldt(ldt->entries, ldt->size);
+-	if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+-		vfree(ldt->entries);
+-	else
+-		free_page((unsigned long)ldt->entries);
+-	kfree(ldt);
++	__free_ldt_struct(ldt);
+ }
+ 
+ /*
+--- a/arch/x86/kernel/tracepoint.c
++++ b/arch/x86/kernel/tracepoint.c
+@@ -9,10 +9,12 @@
+ #include <linux/atomic.h>
+ 
+ atomic_t trace_idt_ctr = ATOMIC_INIT(0);
++__aligned(PAGE_SIZE)
+ struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
+ 				(unsigned long) trace_idt_table };
+ 
+ /* No need to be aligned, but done to keep all IDTs defined the same way. */
++__aligned(PAGE_SIZE)
+ gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
+ 
+ static int trace_irq_vector_refcount;
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -1,160 +1,305 @@
+-
+-
++#include <linux/bug.h>
+ #include <linux/kernel.h>
+ #include <linux/errno.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
+ #include <linux/bug.h>
+ #include <linux/init.h>
++#include <linux/interrupt.h>
+ #include <linux/spinlock.h>
+ #include <linux/mm.h>
+-
+ #include <linux/uaccess.h>
++
++#include <asm/kaiser.h>
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
+ #ifdef CONFIG_KAISER
+ 
+ __visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++/*
++ * At runtime, the only things we map are some things for CPU
++ * hotplug, and stacks for new processes.  No two CPUs will ever
++ * be populating the same addresses, so we only need to ensure
++ * that we protect between two CPUs trying to allocate and
++ * populate the same page table page.
++ *
++ * Only take this lock when doing a set_p[4um]d(), but it is not
++ * needed for doing a set_pte().  We assume that only the *owner*
++ * of a given allocation will be doing this for _their_
++ * allocation.
++ *
++ * This ensures that once a system has been running for a while
++ * and there have been stacks all over and these page tables
++ * are fully populated, there will be no further acquisitions of
++ * this lock.
++ */
++static DEFINE_SPINLOCK(shadow_table_allocation_lock);
+ 
+-/**
+- * Get the real ppn from a address in kernel mapping.
+- * @param address The virtual adrress
+- * @return the physical address
++/*
++ * Returns -1 on error.
+  */
+-static inline unsigned long get_pa_from_mapping (unsigned long address)
++static inline unsigned long get_pa_from_mapping(unsigned long vaddr)
+ {
+ 	pgd_t *pgd;
+ 	pud_t *pud;
+ 	pmd_t *pmd;
+ 	pte_t *pte;
+ 
+-	pgd = pgd_offset_k(address);
+-	BUG_ON(pgd_none(*pgd) || pgd_large(*pgd));
++	pgd = pgd_offset_k(vaddr);
++	/*
++	 * We made all the kernel PGDs present in kaiser_init().
++	 * We expect them to stay that way.
++	 */
++	BUG_ON(pgd_none(*pgd));
++	/*
++	 * PGDs are either 512GB or 128TB on all x86_64
++	 * configurations.  We don't handle these.
++	 */
++	BUG_ON(pgd_large(*pgd));
++
++	pud = pud_offset(pgd, vaddr);
++	if (pud_none(*pud)) {
++		WARN_ON_ONCE(1);
++		return -1;
++	}
+ 
+-	pud = pud_offset(pgd, address);
+-	BUG_ON(pud_none(*pud));
++	if (pud_large(*pud))
++		return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK);
+ 
+-	if (pud_large(*pud)) {
+-		return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK);
++	pmd = pmd_offset(pud, vaddr);
++	if (pmd_none(*pmd)) {
++		WARN_ON_ONCE(1);
++		return -1;
+ 	}
+ 
+-	pmd = pmd_offset(pud, address);
+-	BUG_ON(pmd_none(*pmd));
++	if (pmd_large(*pmd))
++		return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK);
+ 
+-	if (pmd_large(*pmd)) {
+-		return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK);
++	pte = pte_offset_kernel(pmd, vaddr);
++	if (pte_none(*pte)) {
++		WARN_ON_ONCE(1);
++		return -1;
+ 	}
+ 
+-	pte = pte_offset_kernel(pmd, address);
+-	BUG_ON(pte_none(*pte));
+-
+-	return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK);
++	return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
+ }
+ 
+-void _kaiser_copy (unsigned long start_addr, unsigned long size,
+-					unsigned long flags)
++/*
++ * This is a relatively normal page table walk, except that it
++ * also tries to allocate page tables pages along the way.
++ *
++ * Returns a pointer to a PTE on success, or NULL on failure.
++ */
++static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
+ {
+-	pgd_t *pgd;
+-	pud_t *pud;
+ 	pmd_t *pmd;
+-	pte_t *pte;
+-	unsigned long address;
+-	unsigned long end_addr = start_addr + size;
+-	unsigned long target_address;
++	pud_t *pud;
++	pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
++	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ 
+-	for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1));
+-			address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) {
+-		target_address = get_pa_from_mapping(address);
++	might_sleep();
++	if (is_atomic) {
++		gfp &= ~GFP_KERNEL;
++		gfp |= __GFP_HIGH | __GFP_ATOMIC;
++	}
+ 
+-		pgd = native_get_shadow_pgd(pgd_offset_k(address));
++	if (pgd_none(*pgd)) {
++		WARN_ONCE(1, "All shadow pgds should have been populated");
++		return NULL;
++	}
++	BUILD_BUG_ON(pgd_large(*pgd) != 0);
+ 
+-		BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n");
+-		BUG_ON(pgd_large(*pgd));
++	pud = pud_offset(pgd, address);
++	/* The shadow page tables do not use large mappings: */
++	if (pud_large(*pud)) {
++		WARN_ON(1);
++		return NULL;
++	}
++	if (pud_none(*pud)) {
++		unsigned long new_pmd_page = __get_free_page(gfp);
++		if (!new_pmd_page)
++			return NULL;
++		spin_lock(&shadow_table_allocation_lock);
++		if (pud_none(*pud))
++			set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
++		else
++			free_page(new_pmd_page);
++		spin_unlock(&shadow_table_allocation_lock);
++	}
+ 
+-		pud = pud_offset(pgd, address);
+-		if (pud_none(*pud)) {
+-			set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address))));
+-		}
+-		BUG_ON(pud_large(*pud));
++	pmd = pmd_offset(pud, address);
++	/* The shadow page tables do not use large mappings: */
++	if (pmd_large(*pmd)) {
++		WARN_ON(1);
++		return NULL;
++	}
++	if (pmd_none(*pmd)) {
++		unsigned long new_pte_page = __get_free_page(gfp);
++		if (!new_pte_page)
++			return NULL;
++		spin_lock(&shadow_table_allocation_lock);
++		if (pmd_none(*pmd))
++			set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
++		else
++			free_page(new_pte_page);
++		spin_unlock(&shadow_table_allocation_lock);
++	}
+ 
+-		pmd = pmd_offset(pud, address);
+-		if (pmd_none(*pmd)) {
+-			set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address))));
+-		}
+-		BUG_ON(pmd_large(*pmd));
++	return pte_offset_kernel(pmd, address);
++}
+ 
+-		pte = pte_offset_kernel(pmd, address);
++int kaiser_add_user_map(const void *__start_addr, unsigned long size,
++			unsigned long flags)
++{
++	int ret = 0;
++	pte_t *pte;
++	unsigned long start_addr = (unsigned long )__start_addr;
++	unsigned long address = start_addr & PAGE_MASK;
++	unsigned long end_addr = PAGE_ALIGN(start_addr + size);
++	unsigned long target_address;
++
++	for (;address < end_addr; address += PAGE_SIZE) {
++		target_address = get_pa_from_mapping(address);
++		if (target_address == -1) {
++			ret = -EIO;
++			break;
++		}
++		pte = kaiser_pagetable_walk(address, false);
+ 		if (pte_none(*pte)) {
+ 			set_pte(pte, __pte(flags | target_address));
+ 		} else {
+-			BUG_ON(__pa(pte_page(*pte)) != target_address);
++			pte_t tmp;
++			set_pte(&tmp, __pte(flags | target_address));
++			WARN_ON_ONCE(!pte_same(*pte, tmp));
+ 		}
+ 	}
++	return ret;
+ }
+ 
+-// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping
+-static inline void __init _kaiser_init(void)
++static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags)
++{
++	unsigned long size = end - start;
++
++	return kaiser_add_user_map(start, size, flags);
++}
++
++/*
++ * Ensure that the top level of the (shadow) page tables are
++ * entirely populated.  This ensures that all processes that get
++ * forked have the same entries.  This way, we do not have to
++ * ever go set up new entries in older processes.
++ *
++ * Note: we never free these, so there are no updates to them
++ * after this.
++ */
++static void __init kaiser_init_all_pgds(void)
+ {
+ 	pgd_t *pgd;
+ 	int i = 0;
+ 
+ 	pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+ 	for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+-		set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0))));
++		pgd_t new_pgd;
++		pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE);
++		if (!pud) {
++			WARN_ON(1);
++			break;
++		}
++		new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
++		/*
++		 * Make sure not to stomp on some other pgd entry.
++		 */
++		if (!pgd_none(pgd[i])) {
++			WARN_ON(1);
++			continue;
++		}
++		set_pgd(pgd + i, new_pgd);
+ 	}
+ }
+ 
++#define kaiser_add_user_map_early(start, size, flags) do {	\
++	int __ret = kaiser_add_user_map(start, size, flags);	\
++	WARN_ON(__ret);						\
++} while (0)
++
++#define kaiser_add_user_map_ptrs_early(start, end, flags) do {		\
++	int __ret = kaiser_add_user_map_ptrs(start, end, flags);	\
++	WARN_ON(__ret);							\
++} while (0)
++
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+-spinlock_t shadow_table_lock;
++/*
++ * If anything in here fails, we will likely die on one of the
++ * first kernel->user transitions and init will die.  But, we
++ * will have most of the kernel up by then and should be able to
++ * get a clean warning out of it.  If we BUG_ON() here, we run
++ * the risk of being before we have good console output.
++ */
+ void __init kaiser_init(void)
+ {
+ 	int cpu;
+-	spin_lock_init(&shadow_table_lock);
+-
+-	spin_lock(&shadow_table_lock);
+ 
+-	_kaiser_init();
++	kaiser_init_all_pgds();
+ 
+ 	for_each_possible_cpu(cpu) {
+-		// map the per cpu user variables
+-		_kaiser_copy(
+-				(unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)),
+-				(unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start,
+-				__PAGE_KERNEL);
+-	}
+-
+-	// map the entry/exit text section, which is responsible to switch between user- and kernel mode
+-	_kaiser_copy(
+-			(unsigned long) __entry_text_start,
+-			(unsigned long) __entry_text_end - (unsigned long) __entry_text_start,
+-			__PAGE_KERNEL_RX);
+-
+-	// the fixed map address of the idt_table
+-	_kaiser_copy(
+-			(unsigned long) idt_descr.address,
+-			sizeof(gate_desc) * NR_VECTORS,
+-			__PAGE_KERNEL_RO);
++		void *percpu_vaddr = __per_cpu_user_mapped_start +
++				     per_cpu_offset(cpu);
++		unsigned long percpu_sz = __per_cpu_user_mapped_end -
++					  __per_cpu_user_mapped_start;
++		kaiser_add_user_map_early(percpu_vaddr, percpu_sz,
++					  __PAGE_KERNEL);
++	}
+ 
+-	spin_unlock(&shadow_table_lock);
++	/*
++	 * Map the entry/exit text section, which is needed at
++	 * switches from user to and from kernel.
++	 */
++	kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end,
++				       __PAGE_KERNEL_RX);
++
++#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
++	kaiser_add_user_map_ptrs_early(__irqentry_text_start,
++				       __irqentry_text_end,
++				       __PAGE_KERNEL_RX);
++#endif
++	kaiser_add_user_map_early((void *)idt_descr.address,
++				  sizeof(gate_desc) * NR_VECTORS,
++				  __PAGE_KERNEL_RO);
++#ifdef CONFIG_TRACING
++	kaiser_add_user_map_early(&trace_idt_descr,
++				  sizeof(trace_idt_descr),
++				  __PAGE_KERNEL);
++	kaiser_add_user_map_early(&trace_idt_table,
++				  sizeof(gate_desc) * NR_VECTORS,
++				  __PAGE_KERNEL);
++#endif
++	kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr),
++				  __PAGE_KERNEL);
++	kaiser_add_user_map_early(&debug_idt_table,
++				  sizeof(gate_desc) * NR_VECTORS,
++				  __PAGE_KERNEL);
+ }
+ 
++extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end);
+ // add a mapping to the shadow-mapping, and synchronize the mappings
+-void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+ {
+-	spin_lock(&shadow_table_lock);
+-	_kaiser_copy(addr, size, flags);
+-	spin_unlock(&shadow_table_lock);
++	return kaiser_add_user_map((const void *)addr, size, flags);
+ }
+ 
+-extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end);
+ void kaiser_remove_mapping(unsigned long start, unsigned long size)
+ {
+-	pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start));
+-	spin_lock(&shadow_table_lock);
+-	do {
+-		unmap_pud_range(pgd, start, start + size);
+-	} while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size)));
+-	spin_unlock(&shadow_table_lock);
++	unsigned long end = start + size;
++	unsigned long addr;
++
++	for (addr = start; addr < end; addr += PGDIR_SIZE) {
++		pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
++		/*
++		 * unmap_p4d_range() handles > P4D_SIZE unmaps,
++		 * so no need to trim 'end'.
++		 */
++		unmap_pud_range_nofree(pgd, addr, end);
++	}
+ }
+ #endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(cpa_lock);
+ #define CPA_FLUSHTLB 1
+ #define CPA_ARRAY 2
+ #define CPA_PAGES_ARRAY 4
++#define CPA_FREE_PAGETABLES 8
+ 
+ #ifdef CONFIG_PROC_FS
+ static unsigned long direct_pages_count[PG_LEVEL_NUM];
+@@ -729,10 +730,13 @@ static int split_large_page(struct cpa_d
+ 	return 0;
+ }
+ 
+-static bool try_to_free_pte_page(pte_t *pte)
++static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte)
+ {
+ 	int i;
+ 
++	if (!(cpa->flags & CPA_FREE_PAGETABLES))
++		return false;
++
+ 	for (i = 0; i < PTRS_PER_PTE; i++)
+ 		if (!pte_none(pte[i]))
+ 			return false;
+@@ -741,10 +745,13 @@ static bool try_to_free_pte_page(pte_t *
+ 	return true;
+ }
+ 
+-static bool try_to_free_pmd_page(pmd_t *pmd)
++static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd)
+ {
+ 	int i;
+ 
++	if (!(cpa->flags & CPA_FREE_PAGETABLES))
++		return false;
++
+ 	for (i = 0; i < PTRS_PER_PMD; i++)
+ 		if (!pmd_none(pmd[i]))
+ 			return false;
+@@ -753,7 +760,9 @@ static bool try_to_free_pmd_page(pmd_t *
+ 	return true;
+ }
+ 
+-static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
++static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd,
++			    unsigned long start,
++			    unsigned long end)
+ {
+ 	pte_t *pte = pte_offset_kernel(pmd, start);
+ 
+@@ -764,22 +773,23 @@ static bool unmap_pte_range(pmd_t *pmd,
+ 		pte++;
+ 	}
+ 
+-	if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
++	if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) {
+ 		pmd_clear(pmd);
+ 		return true;
+ 	}
+ 	return false;
+ }
+ 
+-static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
++static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd,
+ 			      unsigned long start, unsigned long end)
+ {
+-	if (unmap_pte_range(pmd, start, end))
+-		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
++	if (unmap_pte_range(cpa, pmd, start, end))
++		if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
+ 			pud_clear(pud);
+ }
+ 
+-static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
++static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud,
++			    unsigned long start, unsigned long end)
+ {
+ 	pmd_t *pmd = pmd_offset(pud, start);
+ 
+@@ -790,7 +800,7 @@ static void unmap_pmd_range(pud_t *pud,
+ 		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
+ 		unsigned long pre_end = min_t(unsigned long, end, next_page);
+ 
+-		__unmap_pmd_range(pud, pmd, start, pre_end);
++		__unmap_pmd_range(cpa, pud, pmd, start, pre_end);
+ 
+ 		start = pre_end;
+ 		pmd++;
+@@ -803,7 +813,8 @@ static void unmap_pmd_range(pud_t *pud,
+ 		if (pmd_large(*pmd))
+ 			pmd_clear(pmd);
+ 		else
+-			__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
++			__unmap_pmd_range(cpa, pud, pmd,
++					  start, start + PMD_SIZE);
+ 
+ 		start += PMD_SIZE;
+ 		pmd++;
+@@ -813,17 +824,19 @@ static void unmap_pmd_range(pud_t *pud,
+ 	 * 4K leftovers?
+ 	 */
+ 	if (start < end)
+-		return __unmap_pmd_range(pud, pmd, start, end);
++		return __unmap_pmd_range(cpa, pud, pmd, start, end);
+ 
+ 	/*
+ 	 * Try again to free the PMD page if haven't succeeded above.
+ 	 */
+ 	if (!pud_none(*pud))
+-		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
++		if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
+ 			pud_clear(pud);
+ }
+ 
+-void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd,
++			      unsigned long start,
++			      unsigned long end)
+ {
+ 	pud_t *pud = pud_offset(pgd, start);
+ 
+@@ -834,7 +847,7 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ 		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
+ 		unsigned long pre_end	= min_t(unsigned long, end, next_page);
+ 
+-		unmap_pmd_range(pud, start, pre_end);
++		unmap_pmd_range(cpa, pud, start, pre_end);
+ 
+ 		start = pre_end;
+ 		pud++;
+@@ -848,7 +861,7 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ 		if (pud_large(*pud))
+ 			pud_clear(pud);
+ 		else
+-			unmap_pmd_range(pud, start, start + PUD_SIZE);
++			unmap_pmd_range(cpa, pud, start, start + PUD_SIZE);
+ 
+ 		start += PUD_SIZE;
+ 		pud++;
+@@ -858,7 +871,7 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ 	 * 2M leftovers?
+ 	 */
+ 	if (start < end)
+-		unmap_pmd_range(pud, start, end);
++		unmap_pmd_range(cpa, pud, start, end);
+ 
+ 	/*
+ 	 * No need to try to free the PUD page because we'll free it in
+@@ -866,6 +879,24 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ 	 */
+ }
+ 
++static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++{
++	struct cpa_data cpa = {
++		.flags = CPA_FREE_PAGETABLES,
++	};
++
++	__unmap_pud_range(&cpa, pgd, start, end);
++}
++
++void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end)
++{
++	struct cpa_data cpa = {
++		.flags = 0,
++	};
++
++	__unmap_pud_range(&cpa, pgd, start, end);
++}
++
+ static int alloc_pte_page(pmd_t *pmd)
+ {
+ 	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -344,40 +344,26 @@ static inline void _pgd_free(pgd_t *pgd)
+ 		kmem_cache_free(pgd_cache, pgd);
+ }
+ #else
+-static inline pgd_t *_pgd_alloc(void)
+-{
+-#ifdef CONFIG_KAISER
+-	// Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory
+-	// block. Therefore, we have to allocate at least 3 pages. However, the
+-	// __get_free_pages returns us 4 pages. Hence, we store the base pointer at
+-	// the beginning of the page of our 8kb-aligned memory block in order to
+-	// correctly free it afterwars.
+ 
+-	unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE));
+-
+-	if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages)
+-	{
+-		*((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages;
+-		return (pgd_t *) pages;
+-	}
+-	else
+-	{
+-		*((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages;
+-		return (pgd_t *) (pages + PAGE_SIZE);
+-	}
++#ifdef CONFIG_KAISER
++/*
++ * Instead of one pmd, we aquire two pmds.  Being order-1, it is
++ * both 8k in size and 8k-aligned.  That lets us just flip bit 12
++ * in a pointer to swap between the two 4k halves.
++ */
++#define PGD_ALLOCATION_ORDER 1
+ #else
+-	return (pgd_t *)__get_free_page(PGALLOC_GFP);
++#define PGD_ALLOCATION_ORDER 0
+ #endif
++
++static inline pgd_t *_pgd_alloc(void)
++{
++	return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+ }
+ 
+ static inline void _pgd_free(pgd_t *pgd)
+ {
+-#ifdef CONFIG_KAISER
+-  unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE));
+-	free_pages(pages, get_order(4*PAGE_SIZE));
+-#else
+-	free_page((unsigned long)pgd);
+-#endif
++	free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
+ }
+ #endif /* CONFIG_X86_PAE */
+ 
+--- /dev/null
++++ b/include/linux/kaiser.h
+@@ -0,0 +1,26 @@
++#ifndef _INCLUDE_KAISER_H
++#define _INCLUDE_KAISER_H
++
++#ifdef CONFIG_KAISER
++#include <asm/kaiser.h>
++#else
++
++/*
++ * These stubs are used whenever CONFIG_KAISER is off, which
++ * includes architectures that support KAISER, but have it
++ * disabled.
++ */
++
++static inline void kaiser_init(void)
++{
++}
++static inline void kaiser_remove_mapping(unsigned long start, unsigned long size)
++{
++}
++static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++{
++	return 0;
++}
++
++#endif /* !CONFIG_KAISER */
++#endif /* _INCLUDE_KAISER_H */
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -58,6 +58,7 @@
+ #include <linux/tsacct_kern.h>
+ #include <linux/cn_proc.h>
+ #include <linux/freezer.h>
++#include <linux/kaiser.h>
+ #include <linux/delayacct.h>
+ #include <linux/taskstats_kern.h>
+ #include <linux/random.h>
+@@ -472,7 +473,6 @@ void set_task_stack_end_magic(struct tas
+ 	*stackend = STACK_END_MAGIC;	/* for overflow detection */
+ }
+ 
+-extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ {
+ 	struct task_struct *tsk;
+@@ -500,9 +500,10 @@ static struct task_struct *dup_task_stru
+ 	 * functions again.
+ 	 */
+ 	tsk->stack = stack;
+-#ifdef CONFIG_KAISER
+-	kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
+-#endif
++
++	err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
++	if (err)
++		goto free_stack;
+ #ifdef CONFIG_VMAP_STACK
+ 	tsk->stack_vm_area = stack_vm_area;
+ #endif
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -32,12 +32,17 @@ config SECURITY
+ 	  If you are unsure how to answer this question, answer N.
+ config KAISER
+ 	bool "Remove the kernel mapping in user mode"
++	default y
+ 	depends on X86_64
+ 	depends on !PARAVIRT
+ 	help
+ 	  This enforces a strict kernel and user space isolation in order to close
+ 	  hardware side channels on kernel address information.
+ 
++config KAISER_REAL_SWITCH
++	bool "KAISER: actually switch page tables"
++	default y
++
+ config SECURITYFS
+ 	bool "Enable the securityfs filesystem"
+ 	help
diff --git a/queue/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch b/queue/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
new file mode 100644
index 0000000..447c040
--- /dev/null
+++ b/queue/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
@@ -0,0 +1,66 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 9 Sep 2017 17:31:18 -0700
+Subject: kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET
+
+From: Hugh Dickins <hughd@google.com>
+
+
+There's a 0x1000 in various places, which looks better with a name.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S     |    4 ++--
+ arch/x86/include/asm/kaiser.h |    7 +++++--
+ 2 files changed, 7 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1318,7 +1318,7 @@ ENTRY(nmi)
+ 	movq	%cr3, %rax
+ 	pushq	%rax
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+-	andq	$(~0x1000), %rax
++	andq	$(~KAISER_SHADOW_PGD_OFFSET), %rax
+ #endif
+ 	movq	%rax, %cr3
+ #endif
+@@ -1561,7 +1561,7 @@ end_repeat_nmi:
+ 	movq	%cr3, %rax
+ 	pushq	%rax
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+-	andq	$(~0x1000), %rax
++	andq	$(~KAISER_SHADOW_PGD_OFFSET), %rax
+ #endif
+ 	movq	%rax, %cr3
+ #endif
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -13,13 +13,16 @@
+  * A minimalistic kernel mapping holds the parts needed to be mapped in user
+  * mode, such as the entry/exit functions of the user space, or the stacks.
+  */
++
++#define KAISER_SHADOW_PGD_OFFSET 0x1000
++
+ #ifdef __ASSEMBLY__
+ #ifdef CONFIG_KAISER
+ 
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+-andq $(~0x1000), \reg
++andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
+ #endif
+ movq \reg, %cr3
+ .endm
+@@ -27,7 +30,7 @@ movq \reg, %cr3
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+-orq $(0x1000), \reg
++orq $(KAISER_SHADOW_PGD_OFFSET), \reg
+ #endif
+ movq \reg, %cr3
+ .endm
diff --git a/queue/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch b/queue/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
new file mode 100644
index 0000000..f0ec889
--- /dev/null
+++ b/queue/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
@@ -0,0 +1,166 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 26 Sep 2017 18:43:07 -0700
+Subject: kaiser: paranoid_entry pass cr3 need to paranoid_exit
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Neel Natu points out that paranoid_entry() was wrong to assume that
+an entry that did not need swapgs would not need SWITCH_KERNEL_CR3:
+paranoid_entry (used for debug breakpoint, int3, double fault or MCE;
+though I think it's only the MCE case that is cause for concern here)
+can break in at an awkward time, between cr3 switch and swapgs, but
+its handling always needs kernel gs and kernel cr3.
+
+Easy to fix in itself, but paranoid_entry() also needs to convey to
+paranoid_exit() (and my reading of macro idtentry says paranoid_entry
+and paranoid_exit are always paired) how to restore the prior state.
+The swapgs state is already conveyed by %ebx (0 or 1), so extend that
+also to convey when SWITCH_USER_CR3 will be needed (2 or 3).
+
+(Yes, I'd much prefer that 0 meant no swapgs, whereas it's the other
+way round: and a convention shared with error_entry() and error_exit(),
+which I don't want to touch.  Perhaps I should have inverted the bit
+for switch cr3 too, but did not.)
+
+paranoid_exit() would be straightforward, except for TRACE_IRQS: it
+did TRACE_IRQS_IRETQ when doing swapgs, but TRACE_IRQS_IRETQ_DEBUG
+when not: which is it supposed to use when SWITCH_USER_CR3 is split
+apart from that?  As best as I can determine, commit 5963e317b1e9
+("ftrace/x86: Do not change stacks in DEBUG when calling lockdep")
+missed the swapgs case, and should have used TRACE_IRQS_IRETQ_DEBUG
+there too (the discrepancy has nothing to do with the liberal use
+of _NO_STACK and _UNSAFE_STACK hereabouts: TRACE_IRQS_OFF_DEBUG has
+just been used in all cases); discrepancy lovingly preserved across
+several paranoid_exit() cleanups, but I'm now removing it.
+
+Neel further indicates that to use SWITCH_USER_CR3_NO_STACK there in
+paranoid_exit() is now not only unnecessary but unsafe: might corrupt
+syscall entry's unsafe_stack_register_backup of %rax.  Just use
+SWITCH_USER_CR3: and delete SWITCH_USER_CR3_NO_STACK altogether,
+before we make the mistake of using it again.
+
+hughd adds: this commit fixes an issue in the Kaiser-without-PCIDs
+part of the series, and ought to be moved earlier, if you decided
+to make a release of Kaiser-without-PCIDs.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S        |   46 ++++++++++++++++++++++++++++++---------
+ arch/x86/entry/entry_64_compat.S |    2 -
+ arch/x86/include/asm/kaiser.h    |    8 ------
+ 3 files changed, 37 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1053,7 +1053,11 @@ idtentry machine_check					has_error_cod
+ /*
+  * Save all registers in pt_regs, and switch gs if needed.
+  * Use slow, but surefire "are we in kernel?" check.
+- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
++ *
++ * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit
++ *         ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit
++ *         ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit
++ *         ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit
+  */
+ ENTRY(paranoid_entry)
+ 	cld
+@@ -1065,9 +1069,26 @@ ENTRY(paranoid_entry)
+ 	testl	%edx, %edx
+ 	js	1f				/* negative -> in kernel */
+ 	SWAPGS
+-	SWITCH_KERNEL_CR3
+ 	xorl	%ebx, %ebx
+-1:	ret
++1:
++#ifdef CONFIG_KAISER
++	/*
++	 * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
++	 * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
++	 * Do a conditional SWITCH_KERNEL_CR3: this could safely be done
++	 * unconditionally, but we need to find out whether the reverse
++	 * should be done on return (conveyed to paranoid_exit in %ebx).
++	 */
++	movq	%cr3, %rax
++	testl	$KAISER_SHADOW_PGD_OFFSET, %eax
++	jz	2f
++	orl	$2, %ebx
++	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++	orq	x86_cr3_pcid_noflush, %rax
++	movq	%rax, %cr3
++2:
++#endif
++	ret
+ END(paranoid_entry)
+ 
+ /*
+@@ -1080,20 +1101,25 @@ END(paranoid_entry)
+  * be complicated.  Fortunately, we there's no good reason
+  * to try to handle preemption here.
+  *
+- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
++ * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3
++ *           ebx=1: needs neither swapgs nor SWITCH_USER_CR3
++ *           ebx=2: needs both swapgs and SWITCH_USER_CR3
++ *           ebx=3: needs SWITCH_USER_CR3 but not swapgs
+  */
+ ENTRY(paranoid_exit)
+ 	DISABLE_INTERRUPTS(CLBR_NONE)
+ 	TRACE_IRQS_OFF_DEBUG
+-	testl	%ebx, %ebx			/* swapgs needed? */
++	TRACE_IRQS_IRETQ_DEBUG
++#ifdef CONFIG_KAISER
++	testl	$2, %ebx			/* SWITCH_USER_CR3 needed? */
++	jz	paranoid_exit_no_switch
++	SWITCH_USER_CR3
++paranoid_exit_no_switch:
++#endif
++	testl	$1, %ebx			/* swapgs needed? */
+ 	jnz	paranoid_exit_no_swapgs
+-	TRACE_IRQS_IRETQ
+-	SWITCH_USER_CR3_NO_STACK
+ 	SWAPGS_UNSAFE_STACK
+-	jmp	paranoid_exit_restore
+ paranoid_exit_no_swapgs:
+-	TRACE_IRQS_IRETQ_DEBUG
+-paranoid_exit_restore:
+ 	RESTORE_EXTRA_REGS
+ 	RESTORE_C_REGS
+ 	REMOVE_PT_GPREGS_FROM_STACK 8
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -343,7 +343,7 @@ ENTRY(entry_INT80_compat)
+ 
+ 	/* Go back to user mode. */
+ 	TRACE_IRQS_ON
+-	SWITCH_USER_CR3_NO_STACK
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_regs_and_iret
+ END(entry_INT80_compat)
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -63,20 +63,12 @@ _SWITCH_TO_KERNEL_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ .endm
+ 
+-.macro SWITCH_USER_CR3_NO_STACK
+-movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+-_SWITCH_TO_USER_CR3 %rax %al
+-movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+-.endm
+-
+ #else /* CONFIG_KAISER */
+ 
+ .macro SWITCH_KERNEL_CR3 reg
+ .endm
+ .macro SWITCH_USER_CR3 reg regb
+ .endm
+-.macro SWITCH_USER_CR3_NO_STACK
+-.endm
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+ .endm
+ 
diff --git a/queue/kaiser-pcid-0-for-kernel-and-128-for-user.patch b/queue/kaiser-pcid-0-for-kernel-and-128-for-user.patch
new file mode 100644
index 0000000..7e8f7e2
--- /dev/null
+++ b/queue/kaiser-pcid-0-for-kernel-and-128-for-user.patch
@@ -0,0 +1,129 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 8 Sep 2017 19:26:30 -0700
+Subject: kaiser: PCID 0 for kernel and 128 for user
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Why was 4 chosen for kernel PCID and 6 for user PCID?
+No good reason in a backport where PCIDs are only used for Kaiser.
+
+If we continue with those, then we shall need to add Andy Lutomirski's
+4.13 commit 6c690ee1039b ("x86/mm: Split read_cr3() into read_cr3_pa()
+and __read_cr3()"), which deals with the problem of read_cr3() callers
+finding stray bits in the cr3 that they expected to be page-aligned;
+and for hibernation, his 4.14 commit f34902c5c6c0 ("x86/hibernate/64:
+Mask off CR3's PCID bits in the saved CR3").
+
+But if 0 is used for kernel PCID, then there's no need to add in those
+commits - whenever the kernel looks, it sees 0 in the lower bits; and
+0 for kernel seems an obvious choice.
+
+And I naughtily propose 128 for user PCID.  Because there's a place
+in _SWITCH_TO_USER_CR3 where it takes note of the need for TLB FLUSH,
+but needs to reset that to NOFLUSH for the next occasion.  Currently
+it does so with a "movb $(0x80)" into the high byte of the per-cpu
+quadword, but that will cause a machine without PCID support to crash.
+Now, if %al just happened to have 0x80 in it at that point, on a
+machine with PCID support, but 0 on a machine without PCID support...
+
+(That will go badly wrong once the pgd can be at a physical address
+above 2^56, but even with 5-level paging, physical goes up to 2^52.)
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kaiser.h        |   19 ++++++++++++-------
+ arch/x86/include/asm/pgtable_types.h |    7 ++++---
+ arch/x86/mm/tlb.c                    |    3 +++
+ 3 files changed, 19 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -29,14 +29,19 @@ orq  X86_CR3_PCID_KERN_VAR, \reg
+ movq \reg, %cr3
+ .endm
+ 
+-.macro _SWITCH_TO_USER_CR3 reg
++.macro _SWITCH_TO_USER_CR3 reg regb
++/*
++ * regb must be the low byte portion of reg: because we have arranged
++ * for the low byte of the user PCID to serve as the high byte of NOFLUSH
++ * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are
++ * not enabled): so that the one register can update both memory and cr3.
++ */
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+ orq  PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
+ js   9f
+-// FLUSH this time, reset to NOFLUSH for next time
+-// But if nopcid?  Consider using 0x80 for user pcid?
+-movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
++/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
++movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
+ 9:
+ movq \reg, %cr3
+ .endm
+@@ -49,7 +54,7 @@ popq %rax
+ 
+ .macro SWITCH_USER_CR3
+ pushq %rax
+-_SWITCH_TO_USER_CR3 %rax
++_SWITCH_TO_USER_CR3 %rax %al
+ popq %rax
+ .endm
+ 
+@@ -61,7 +66,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ 
+ .macro SWITCH_USER_CR3_NO_STACK
+ movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+-_SWITCH_TO_USER_CR3 %rax
++_SWITCH_TO_USER_CR3 %rax %al
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ .endm
+ 
+@@ -69,7 +74,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ 
+ .macro SWITCH_KERNEL_CR3 reg
+ .endm
+-.macro SWITCH_USER_CR3 reg
++.macro SWITCH_USER_CR3 reg regb
+ .endm
+ .macro SWITCH_USER_CR3_NO_STACK
+ .endm
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -146,16 +146,17 @@
+ 
+ /* Mask for all the PCID-related bits in CR3: */
+ #define X86_CR3_PCID_MASK       (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
++#define X86_CR3_PCID_ASID_KERN  (_AC(0x0,UL))
++
+ #if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
+-#define X86_CR3_PCID_ASID_KERN  (_AC(0x4,UL))
+-#define X86_CR3_PCID_ASID_USER  (_AC(0x6,UL))
++/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
++#define X86_CR3_PCID_ASID_USER	(_AC(0x80,UL))
+ 
+ #define X86_CR3_PCID_KERN_FLUSH		(X86_CR3_PCID_ASID_KERN)
+ #define X86_CR3_PCID_USER_FLUSH		(X86_CR3_PCID_ASID_USER)
+ #define X86_CR3_PCID_KERN_NOFLUSH	(X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
+ #define X86_CR3_PCID_USER_NOFLUSH	(X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
+ #else
+-#define X86_CR3_PCID_ASID_KERN  (_AC(0x0,UL))
+ #define X86_CR3_PCID_ASID_USER  (_AC(0x0,UL))
+ /*
+  * PCIDs are unsupported on 32-bit and none of these bits can be
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -50,6 +50,9 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ 		 * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
+ 		 * do it here, but can only be used if X86_FEATURE_INVPCID is
+ 		 * available - and many machines support pcid without invpcid.
++		 *
++		 * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
++		 * but keep that line in there in case something changes.
+ 		 */
+ 		new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ 		kaiser_flush_tlb_on_return_to_user();
diff --git a/queue/kaiser-set-_page_nx-only-if-supported.patch b/queue/kaiser-set-_page_nx-only-if-supported.patch
new file mode 100644
index 0000000..0e59d80
--- /dev/null
+++ b/queue/kaiser-set-_page_nx-only-if-supported.patch
@@ -0,0 +1,118 @@
+From: Guenter Roeck <groeck@chromium.org>
+Date: Thu, 4 Jan 2018 13:41:55 -0800
+Subject: kaiser: Set _PAGE_NX only if supported
+
+From: Guenter Roeck <groeck@chromium.org>
+
+This resolves a crash if loaded under qemu + haxm under windows.
+See https://www.spinics.net/lists/kernel/msg2689835.html for details.
+Here is a boot log (the log is from chromeos-4.4, but Tao Wu says that
+the same log is also seen with vanilla v4.4.110-rc1).
+
+[    0.712750] Freeing unused kernel memory: 552K
+[    0.721821] init: Corrupted page table at address 57b029b332e0
+[    0.722761] PGD 80000000bb238067 PUD bc36a067 PMD bc369067 PTE 45d2067
+[    0.722761] Bad pagetable: 000b [#1] PREEMPT SMP 
+[    0.722761] Modules linked in:
+[    0.722761] CPU: 1 PID: 1 Comm: init Not tainted 4.4.96 #31
+[    0.722761] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
+rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014
+[    0.722761] task: ffff8800bc290000 ti: ffff8800bc28c000 task.ti: ffff8800bc28c000
+[    0.722761] RIP: 0010:[<ffffffff83f4129e>]  [<ffffffff83f4129e>] __clear_user+0x42/0x67
+[    0.722761] RSP: 0000:ffff8800bc28fcf8  EFLAGS: 00010202
+[    0.722761] RAX: 0000000000000000 RBX: 00000000000001a4 RCX: 00000000000001a4
+[    0.722761] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000057b029b332e0
+[    0.722761] RBP: ffff8800bc28fd08 R08: ffff8800bc290000 R09: ffff8800bb2f4000
+[    0.722761] R10: ffff8800bc290000 R11: ffff8800bb2f4000 R12: 000057b029b332e0
+[    0.722761] R13: 0000000000000000 R14: 000057b029b33340 R15: ffff8800bb1e2a00
+[    0.722761] FS:  0000000000000000(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000
+[    0.722761] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+[    0.722761] CR2: 000057b029b332e0 CR3: 00000000bb2f8000 CR4: 00000000000006e0
+[    0.722761] Stack:
+[    0.722761]  000057b029b332e0 ffff8800bb95fa80 ffff8800bc28fd18 ffffffff83f4120c
+[    0.722761]  ffff8800bc28fe18 ffffffff83e9e7a1 ffff8800bc28fd68 0000000000000000
+[    0.722761]  ffff8800bc290000 ffff8800bc290000 ffff8800bc290000 ffff8800bc290000
+[    0.722761] Call Trace:
+[    0.722761]  [<ffffffff83f4120c>] clear_user+0x2e/0x30
+[    0.722761]  [<ffffffff83e9e7a1>] load_elf_binary+0xa7f/0x18f7
+[    0.722761]  [<ffffffff83de2088>] search_binary_handler+0x86/0x19c
+[    0.722761]  [<ffffffff83de389e>] do_execveat_common.isra.26+0x909/0xf98
+[    0.722761]  [<ffffffff844febe0>] ? rest_init+0x87/0x87
+[    0.722761]  [<ffffffff83de40be>] do_execve+0x23/0x25
+[    0.722761]  [<ffffffff83c002e3>] run_init_process+0x2b/0x2d
+[    0.722761]  [<ffffffff844fec4d>] kernel_init+0x6d/0xda
+[    0.722761]  [<ffffffff84505b2f>] ret_from_fork+0x3f/0x70
+[    0.722761]  [<ffffffff844febe0>] ? rest_init+0x87/0x87
+[    0.722761] Code: 86 84 be 12 00 00 00 e8 87 0d e8 ff 66 66 90 48 89 d8 48 c1
+eb 03 4c 89 e7 83 e0 07 48 89 d9 be 08 00 00 00 31 d2 48 85 c9 74 0a <48> 89 17
+48 01 f7 ff c9 75 f6 48 89 c1 85 c9 74 09 88 17 48 ff 
+[    0.722761] RIP  [<ffffffff83f4129e>] __clear_user+0x42/0x67
+[    0.722761]  RSP <ffff8800bc28fcf8>
+[    0.722761] ---[ end trace def703879b4ff090 ]---
+[    0.722761] BUG: sleeping function called from invalid context at /mnt/host/source/src/third_party/kernel/v4.4/kernel/locking/rwsem.c:21
+[    0.722761] in_atomic(): 0, irqs_disabled(): 1, pid: 1, name: init
+[    0.722761] CPU: 1 PID: 1 Comm: init Tainted: G      D         4.4.96 #31
+[    0.722761] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014
+[    0.722761]  0000000000000086 dcb5d76098c89836 ffff8800bc28fa30 ffffffff83f34004
+[    0.722761]  ffffffff84839dc2 0000000000000015 ffff8800bc28fa40 ffffffff83d57dc9
+[    0.722761]  ffff8800bc28fa68 ffffffff83d57e6a ffffffff84a53640 0000000000000000
+[    0.722761] Call Trace:
+[    0.722761]  [<ffffffff83f34004>] dump_stack+0x4d/0x63
+[    0.722761]  [<ffffffff83d57dc9>] ___might_sleep+0x13a/0x13c
+[    0.722761]  [<ffffffff83d57e6a>] __might_sleep+0x9f/0xa6
+[    0.722761]  [<ffffffff84502788>] down_read+0x20/0x31
+[    0.722761]  [<ffffffff83cc5d9b>] __blocking_notifier_call_chain+0x35/0x63
+[    0.722761]  [<ffffffff83cc5ddd>] blocking_notifier_call_chain+0x14/0x16
+[    0.800374] usb 1-1: new full-speed USB device number 2 using uhci_hcd
+[    0.722761]  [<ffffffff83cefe97>] profile_task_exit+0x1a/0x1c
+[    0.802309]  [<ffffffff83cac84e>] do_exit+0x39/0xe7f
+[    0.802309]  [<ffffffff83ce5938>] ? vprintk_default+0x1d/0x1f
+[    0.802309]  [<ffffffff83d7bb95>] ? printk+0x57/0x73
+[    0.802309]  [<ffffffff83c46e25>] oops_end+0x80/0x85
+[    0.802309]  [<ffffffff83c7b747>] pgtable_bad+0x8a/0x95
+[    0.802309]  [<ffffffff83ca7f4a>] __do_page_fault+0x8c/0x352
+[    0.802309]  [<ffffffff83eefba5>] ? file_has_perm+0xc4/0xe5
+[    0.802309]  [<ffffffff83ca821c>] do_page_fault+0xc/0xe
+[    0.802309]  [<ffffffff84507682>] page_fault+0x22/0x30
+[    0.802309]  [<ffffffff83f4129e>] ? __clear_user+0x42/0x67
+[    0.802309]  [<ffffffff83f4127f>] ? __clear_user+0x23/0x67
+[    0.802309]  [<ffffffff83f4120c>] clear_user+0x2e/0x30
+[    0.802309]  [<ffffffff83e9e7a1>] load_elf_binary+0xa7f/0x18f7
+[    0.802309]  [<ffffffff83de2088>] search_binary_handler+0x86/0x19c
+[    0.802309]  [<ffffffff83de389e>] do_execveat_common.isra.26+0x909/0xf98
+[    0.802309]  [<ffffffff844febe0>] ? rest_init+0x87/0x87
+[    0.802309]  [<ffffffff83de40be>] do_execve+0x23/0x25
+[    0.802309]  [<ffffffff83c002e3>] run_init_process+0x2b/0x2d
+[    0.802309]  [<ffffffff844fec4d>] kernel_init+0x6d/0xda
+[    0.802309]  [<ffffffff84505b2f>] ret_from_fork+0x3f/0x70
+[    0.802309]  [<ffffffff844febe0>] ? rest_init+0x87/0x87
+[    0.830559] Kernel panic - not syncing: Attempted to kill init!  exitcode=0x00000009
+[    0.830559] 
+[    0.831305] Kernel Offset: 0x2c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
+[    0.831305] ---[ end Kernel panic - not syncing: Attempted to kill init!  exitcode=0x00000009
+
+The crash part of this problem may be solved with the following patch
+(thanks to Hugh for the hint). There is still another problem, though -
+with this patch applied, the qemu session aborts with "VCPU Shutdown
+request", whatever that means.
+
+Cc: lepton <ytht.net@gmail.com>
+Signed-off-by: Guenter Roeck <groeck@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/kaiser.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -413,7 +413,8 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp,
+ 			 * get out to userspace running on the kernel CR3,
+ 			 * userspace will crash instead of running.
+ 			 */
+-			pgd.pgd |= _PAGE_NX;
++			if (__supported_pte_mask & _PAGE_NX)
++				pgd.pgd |= _PAGE_NX;
+ 		}
+ 	} else if (!pgd.pgd) {
+ 		/*
diff --git a/queue/kaiser-stack-map-page_size-at-thread_size-page_size.patch b/queue/kaiser-stack-map-page_size-at-thread_size-page_size.patch
new file mode 100644
index 0000000..6ea3930
--- /dev/null
+++ b/queue/kaiser-stack-map-page_size-at-thread_size-page_size.patch
@@ -0,0 +1,139 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 18:57:03 -0700
+Subject: kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Kaiser only needs to map one page of the stack; and
+kernel/fork.c did not build on powerpc (no __PAGE_KERNEL).
+It's all cleaner if linux/kaiser.h provides kaiser_map_thread_stack()
+and kaiser_unmap_thread_stack() wrappers around asm/kaiser.h's
+kaiser_add_mapping() and kaiser_remove_mapping().  And use
+linux/kaiser.h in init/main.c to avoid the #ifdefs there.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/kaiser.h |   40 +++++++++++++++++++++++++++++++++-------
+ init/main.c            |    6 +-----
+ kernel/fork.c          |    7 ++-----
+ 3 files changed, 36 insertions(+), 17 deletions(-)
+
+--- a/include/linux/kaiser.h
++++ b/include/linux/kaiser.h
+@@ -1,26 +1,52 @@
+-#ifndef _INCLUDE_KAISER_H
+-#define _INCLUDE_KAISER_H
++#ifndef _LINUX_KAISER_H
++#define _LINUX_KAISER_H
+ 
+ #ifdef CONFIG_KAISER
+ #include <asm/kaiser.h>
++
++static inline int kaiser_map_thread_stack(void *stack)
++{
++	/*
++	 * Map that page of kernel stack on which we enter from user context.
++	 */
++	return kaiser_add_mapping((unsigned long)stack +
++			THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL);
++}
++
++static inline void kaiser_unmap_thread_stack(void *stack)
++{
++	/*
++	 * Note: may be called even when kaiser_map_thread_stack() failed.
++	 */
++	kaiser_remove_mapping((unsigned long)stack +
++			THREAD_SIZE - PAGE_SIZE, PAGE_SIZE);
++}
+ #else
+ 
+ /*
+  * These stubs are used whenever CONFIG_KAISER is off, which
+- * includes architectures that support KAISER, but have it
+- * disabled.
++ * includes architectures that support KAISER, but have it disabled.
+  */
+ 
+ static inline void kaiser_init(void)
+ {
+ }
+-static inline void kaiser_remove_mapping(unsigned long start, unsigned long size)
++static inline int kaiser_add_mapping(unsigned long addr,
++				     unsigned long size, unsigned long flags)
++{
++	return 0;
++}
++static inline void kaiser_remove_mapping(unsigned long start,
++					 unsigned long size)
+ {
+ }
+-static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++static inline int kaiser_map_thread_stack(void *stack)
+ {
+ 	return 0;
+ }
++static inline void kaiser_unmap_thread_stack(void *stack)
++{
++}
+ 
+ #endif /* !CONFIG_KAISER */
+-#endif /* _INCLUDE_KAISER_H */
++#endif /* _LINUX_KAISER_H */
+--- a/init/main.c
++++ b/init/main.c
+@@ -80,15 +80,13 @@
+ #include <linux/integrity.h>
+ #include <linux/proc_ns.h>
+ #include <linux/io.h>
++#include <linux/kaiser.h>
+ 
+ #include <asm/io.h>
+ #include <asm/bugs.h>
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+ #include <asm/cacheflush.h>
+-#ifdef CONFIG_KAISER
+-#include <asm/kaiser.h>
+-#endif
+ 
+ static int kernel_init(void *);
+ 
+@@ -476,9 +474,7 @@ static void __init mm_init(void)
+ 	pgtable_init();
+ 	vmalloc_init();
+ 	ioremap_huge_init();
+-#ifdef CONFIG_KAISER
+ 	kaiser_init();
+-#endif
+ }
+ 
+ asmlinkage __visible void __init start_kernel(void)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -212,12 +212,9 @@ static unsigned long *alloc_thread_stack
+ #endif
+ }
+ 
+-extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size);
+ static inline void free_thread_stack(struct task_struct *tsk)
+ {
+-#ifdef CONFIG_KAISER
+-	kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE);
+-#endif
++	kaiser_unmap_thread_stack(tsk->stack);
+ #ifdef CONFIG_VMAP_STACK
+ 	if (task_stack_vm_area(tsk)) {
+ 		unsigned long flags;
+@@ -501,7 +498,7 @@ static struct task_struct *dup_task_stru
+ 	 */
+ 	tsk->stack = stack;
+ 
+-	err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
++	err= kaiser_map_thread_stack(tsk->stack);
+ 	if (err)
+ 		goto free_stack;
+ #ifdef CONFIG_VMAP_STACK
diff --git a/queue/kaiser-tidied-up-asm-kaiser.h-somewhat.patch b/queue/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
new file mode 100644
index 0000000..2007d66
--- /dev/null
+++ b/queue/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
@@ -0,0 +1,105 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 19:18:07 -0700
+Subject: kaiser: tidied up asm/kaiser.h somewhat
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Mainly deleting a surfeit of blank lines, and reflowing header comment.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kaiser.h |   32 +++++++++++++-------------------
+ 1 file changed, 13 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -1,15 +1,17 @@
+ #ifndef _ASM_X86_KAISER_H
+ #define _ASM_X86_KAISER_H
+-
+-/* This file includes the definitions for the KAISER feature.
+- * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory.
+- * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped,
+- * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled,
+- * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled.
+- * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory.
++/*
++ * This file includes the definitions for the KAISER feature.
++ * KAISER is a counter measure against x86_64 side channel attacks on
++ * the kernel virtual memory.  It has a shadow pgd for every process: the
++ * shadow pgd has a minimalistic kernel-set mapped, but includes the whole
++ * user memory. Within a kernel context switch, or when an interrupt is handled,
++ * the pgd is switched to the normal one. When the system switches to user mode,
++ * the shadow pgd is enabled. By this, the virtual memory caches are freed,
++ * and the user may not attack the whole kernel memory.
+  *
+- * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions
+- * of the user space, or the stacks.
++ * A minimalistic kernel mapping holds the parts needed to be mapped in user
++ * mode, such as the entry/exit functions of the user space, or the stacks.
+  */
+ #ifdef __ASSEMBLY__
+ #ifdef CONFIG_KAISER
+@@ -48,13 +50,10 @@ _SWITCH_TO_KERNEL_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ .endm
+ 
+-
+ .macro SWITCH_USER_CR3_NO_STACK
+-
+ movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+ _SWITCH_TO_USER_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+-
+ .endm
+ 
+ #else /* CONFIG_KAISER */
+@@ -72,7 +71,6 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ 
+ #else /* __ASSEMBLY__ */
+ 
+-
+ #ifdef CONFIG_KAISER
+ /*
+  * Upon kernel/user mode switch, it may happen that the address
+@@ -80,7 +78,6 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+  * stored.  To change the address space, another register is
+  * needed.  A register therefore has to be stored/restored.
+ */
+-
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ 
+ /**
+@@ -95,7 +92,6 @@ DECLARE_PER_CPU_USER_MAPPED(unsigned lon
+  */
+ extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+ 
+-
+ /**
+  *  kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
+  *  @addr: the start address of the range
+@@ -104,12 +100,12 @@ extern int kaiser_add_mapping(unsigned l
+ extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
+ 
+ /**
+- *  kaiser_initialize_mapping - Initalize the shadow mapping
++ *  kaiser_init - Initialize the shadow mapping
+  *
+  *  Most parts of the shadow mapping can be mapped upon boot
+  *  time.  Only per-process things like the thread stacks
+  *  or a new LDT have to be mapped at runtime.  These boot-
+- *  time mappings are permanent and nevertunmapped.
++ *  time mappings are permanent and never unmapped.
+  */
+ extern void kaiser_init(void);
+ 
+@@ -117,6 +113,4 @@ extern void kaiser_init(void);
+ 
+ #endif /* __ASSEMBLY */
+ 
+-
+-
+ #endif /* _ASM_X86_KAISER_H */
diff --git a/queue/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch b/queue/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
new file mode 100644
index 0000000..4bb0110
--- /dev/null
+++ b/queue/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
@@ -0,0 +1,50 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 19:23:08 -0700
+Subject: kaiser: tidied up kaiser_add/remove_mapping slightly
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Yes, unmap_pud_range_nofree()'s declaration ought to be in a
+header file really, but I'm not sure we want to use it anyway:
+so for now just declare it inside kaiser_remove_mapping().
+And there doesn't seem to be such a thing as unmap_p4d_range(),
+even in a 5-level paging tree.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c |    9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -285,8 +285,7 @@ void __init kaiser_init(void)
+ 				  __PAGE_KERNEL);
+ }
+ 
+-extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end);
+-// add a mapping to the shadow-mapping, and synchronize the mappings
++/* Add a mapping to the shadow mapping, and synchronize the mappings */
+ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+ {
+ 	return kaiser_add_user_map((const void *)addr, size, flags);
+@@ -294,15 +293,13 @@ int kaiser_add_mapping(unsigned long add
+ 
+ void kaiser_remove_mapping(unsigned long start, unsigned long size)
+ {
++	extern void unmap_pud_range_nofree(pgd_t *pgd,
++				unsigned long start, unsigned long end);
+ 	unsigned long end = start + size;
+ 	unsigned long addr;
+ 
+ 	for (addr = start; addr < end; addr += PGDIR_SIZE) {
+ 		pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
+-		/*
+-		 * unmap_p4d_range() handles > P4D_SIZE unmaps,
+-		 * so no need to trim 'end'.
+-		 */
+ 		unmap_pud_range_nofree(pgd, addr, end);
+ 	}
+ }
diff --git a/queue/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch b/queue/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
new file mode 100644
index 0000000..44bbb7a
--- /dev/null
+++ b/queue/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
@@ -0,0 +1,130 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 3 Oct 2017 20:49:04 -0700
+Subject: kaiser: use ALTERNATIVE instead of x86_cr3_pcid_noflush
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Now that we're playing the ALTERNATIVE game, use that more efficient
+method: instead of user-mapping an extra page, and reading an extra
+cacheline each time for x86_cr3_pcid_noflush.
+
+Neel has found that __stringify(bts $X86_CR3_PCID_NOFLUSH_BIT, %rax)
+is a working substitute for the "bts $63, %rax" in these ALTERNATIVEs;
+but the one line with $63 in looks clearer, so let's stick with that.
+
+Worried about what happens with an ALTERNATIVE between the jump and
+jump label in another ALTERNATIVE?  I was, but have checked the
+combinations in SWITCH_KERNEL_CR3_NO_STACK at entry_SYSCALL_64,
+and it does a good job.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S     |    7 ++++---
+ arch/x86/include/asm/kaiser.h |    6 +++---
+ arch/x86/mm/kaiser.c          |   11 +----------
+ 3 files changed, 8 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1084,7 +1084,8 @@ ENTRY(paranoid_entry)
+ 	jz	2f
+ 	orl	$2, %ebx
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+-	orq	x86_cr3_pcid_noflush, %rax
++	/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
++	ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ 	movq	%rax, %cr3
+ 2:
+ #endif
+@@ -1344,7 +1345,7 @@ ENTRY(nmi)
+ 	/* %rax is saved above, so OK to clobber here */
+ 	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ 	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+-	orq	x86_cr3_pcid_noflush, %rax
++	ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ 	pushq	%rax
+ 	/* mask off "user" bit of pgd address and 12 PCID bits: */
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+@@ -1588,7 +1589,7 @@ end_repeat_nmi:
+ 	/* %rax is saved above, so OK to clobber here */
+ 	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ 	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+-	orq	x86_cr3_pcid_noflush, %rax
++	ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ 	pushq	%rax
+ 	/* mask off "user" bit of pgd address and 12 PCID bits: */
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -25,7 +25,8 @@
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-orq  x86_cr3_pcid_noflush, \reg
++/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
++ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID
+ movq \reg, %cr3
+ .endm
+ 
+@@ -39,7 +40,7 @@ movq \reg, %cr3
+ movq %cr3, \reg
+ orq  PER_CPU_VAR(x86_cr3_pcid_user), \reg
+ js   9f
+-/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
++/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */
+ movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
+ 9:
+ movq \reg, %cr3
+@@ -90,7 +91,6 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ 
+-extern unsigned long x86_cr3_pcid_noflush;
+ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+ 
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -31,7 +31,6 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long
+  * This is also handy because systems that do not support PCIDs
+  * just end up or'ing a 0 into their CR3, which does no harm.
+  */
+-unsigned long x86_cr3_pcid_noflush __read_mostly;
+ DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+ 
+ /*
+@@ -356,10 +355,6 @@ void __init kaiser_init(void)
+ 	kaiser_add_user_map_early(&debug_idt_table,
+ 				  sizeof(gate_desc) * NR_VECTORS,
+ 				  __PAGE_KERNEL);
+-
+-	kaiser_add_user_map_early(&x86_cr3_pcid_noflush,
+-				  sizeof(x86_cr3_pcid_noflush),
+-				  __PAGE_KERNEL);
+ }
+ 
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+@@ -433,18 +428,14 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp,
+ 
+ void kaiser_setup_pcid(void)
+ {
+-	unsigned long kern_cr3 = 0;
+ 	unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
+ 
+-	if (this_cpu_has(X86_FEATURE_PCID)) {
+-		kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
++	if (this_cpu_has(X86_FEATURE_PCID))
+ 		user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
+-	}
+ 	/*
+ 	 * These variables are used by the entry/exit
+ 	 * code to change PCID and pgd and TLB flushing.
+ 	 */
+-	x86_cr3_pcid_noflush = kern_cr3;
+ 	this_cpu_write(x86_cr3_pcid_user, user_cr3);
+ }
+ 
diff --git a/queue/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch b/queue/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
new file mode 100644
index 0000000..f7c6026
--- /dev/null
+++ b/queue/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
@@ -0,0 +1,116 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 9 Sep 2017 21:27:32 -0700
+Subject: kaiser: vmstat show NR_KAISERTABLE as nr_overhead
+
+From: Hugh Dickins <hughd@google.com>
+
+
+The kaiser update made an interesting choice, never to free any shadow
+page tables.  Contention on global spinlock was worrying, particularly
+with it held across page table scans when freeing.  Something had to be
+done: I was going to add refcounting; but simply never to free them is
+an appealing choice, minimizing contention without complicating the code
+(the more a page table is found already, the less the spinlock is used).
+
+But leaking pages in this way is also a worry: can we get away with it?
+At the very least, we need a count to show how bad it actually gets:
+in principle, one might end up wasting about 1/256 of memory that way
+(1/512 for when direct-mapped pages have to be user-mapped, plus 1/512
+for when they are user-mapped from the vmalloc area on another occasion
+(but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed).
+
+Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the
+shared pgd entries, and 1 for each intermediate page table added
+thereafter for user-mapping - but leave out the 1 per mm, for its
+shadow pgd, because that distracts from the monotonic increase.
+Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled).
+
+In practice, it doesn't look so bad so far: more like 1/12000 after
+nine hours of gtests below; and movable pageblock segregation should
+tend to cluster the kaiser tables into a subset of the address space
+(if not, they will be bad for compaction too).  But production may
+tell a different story: keep an eye on this number, and bring back
+lighter freeing if it gets out of control (maybe a shrinker).
+
+["nr_overhead" should of course say "nr_kaisertable", if it needs
+to stay; but for the moment we are being coy, preferring that when
+Joe Blow notices a new line in his /proc/vmstat, he does not get
+too curious about what this "kaiser" stuff might be.]
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c   |   16 +++++++++++-----
+ include/linux/mmzone.h |    3 ++-
+ mm/vmstat.c            |    1 +
+ 3 files changed, 14 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -121,9 +121,11 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 		if (!new_pmd_page)
+ 			return NULL;
+ 		spin_lock(&shadow_table_allocation_lock);
+-		if (pud_none(*pud))
++		if (pud_none(*pud)) {
+ 			set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+-		else
++			__inc_zone_page_state(virt_to_page((void *)
++						new_pmd_page), NR_KAISERTABLE);
++		} else
+ 			free_page(new_pmd_page);
+ 		spin_unlock(&shadow_table_allocation_lock);
+ 	}
+@@ -139,9 +141,11 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 		if (!new_pte_page)
+ 			return NULL;
+ 		spin_lock(&shadow_table_allocation_lock);
+-		if (pmd_none(*pmd))
++		if (pmd_none(*pmd)) {
+ 			set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+-		else
++			__inc_zone_page_state(virt_to_page((void *)
++						new_pte_page), NR_KAISERTABLE);
++		} else
+ 			free_page(new_pte_page);
+ 		spin_unlock(&shadow_table_allocation_lock);
+ 	}
+@@ -205,11 +209,13 @@ static void __init kaiser_init_all_pgds(
+ 	pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+ 	for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+ 		pgd_t new_pgd;
+-		pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE);
++		pud_t *pud = pud_alloc_one(&init_mm,
++					   PAGE_OFFSET + i * PGDIR_SIZE);
+ 		if (!pud) {
+ 			WARN_ON(1);
+ 			break;
+ 		}
++		inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE);
+ 		new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
+ 		/*
+ 		 * Make sure not to stomp on some other pgd entry.
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -124,8 +124,9 @@ enum zone_stat_item {
+ 	NR_SLAB_UNRECLAIMABLE,
+ 	NR_PAGETABLE,		/* used for pagetables */
+ 	NR_KERNEL_STACK_KB,	/* measured in KiB */
+-	/* Second 128 byte cacheline */
++	NR_KAISERTABLE,
+ 	NR_BOUNCE,
++	/* Second 128 byte cacheline */
+ #if IS_ENABLED(CONFIG_ZSMALLOC)
+ 	NR_ZSPAGES,		/* allocated in zsmalloc */
+ #endif
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -932,6 +932,7 @@ const char * const vmstat_text[] = {
+ 	"nr_slab_unreclaimable",
+ 	"nr_page_table_pages",
+ 	"nr_kernel_stack",
++	"nr_overhead",
+ 	"nr_bounce",
+ #if IS_ENABLED(CONFIG_ZSMALLOC)
+ 	"nr_zspages",
diff --git a/queue/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch b/queue/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
new file mode 100644
index 0000000..2098b48
--- /dev/null
+++ b/queue/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
@@ -0,0 +1,141 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 27 Aug 2017 16:24:27 -0700
+Subject: kaiser: x86_cr3_pcid_noflush and x86_cr3_pcid_user
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Mostly this commit is just unshouting X86_CR3_PCID_KERN_VAR and
+X86_CR3_PCID_USER_VAR: we usually name variables in lower-case.
+
+But why does x86_cr3_pcid_noflush need to be __aligned(PAGE_SIZE)?
+Ah, it's a leftover from when kaiser_add_user_map() once complained
+about mapping the same page twice.  Make it __read_mostly instead.
+(I'm a little uneasy about all the unrelated data which shares its
+page getting user-mapped too, but that was so before, and not a big
+deal: though we call it user-mapped, it's not mapped with _PAGE_USER.)
+
+And there is a little change around the two calls to do_nmi().
+Previously they set the NOFLUSH bit (if PCID supported) when
+forcing to kernel context before do_nmi(); now they also have the
+NOFLUSH bit set (if PCID supported) when restoring context after:
+nothing done in do_nmi() should require a TLB to be flushed here.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S     |    8 ++++----
+ arch/x86/include/asm/kaiser.h |   11 +++++------
+ arch/x86/mm/kaiser.c          |   13 +++++++------
+ 3 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1316,11 +1316,11 @@ ENTRY(nmi)
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+ 	movq	%cr3, %rax
++	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
++	orq	x86_cr3_pcid_noflush, %rax
+ 	pushq	%rax
+ 	/* mask off "user" bit of pgd address and 12 PCID bits: */
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+-	/* Add back kernel PCID and "no flush" bit */
+-	orq	X86_CR3_PCID_KERN_VAR, %rax
+ 	movq	%rax, %cr3
+ #endif
+ 	call	do_nmi
+@@ -1560,11 +1560,11 @@ end_repeat_nmi:
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+ 	movq	%cr3, %rax
++	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
++	orq	x86_cr3_pcid_noflush, %rax
+ 	pushq	%rax
+ 	/* mask off "user" bit of pgd address and 12 PCID bits: */
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+-	/* Add back kernel PCID and "no flush" bit */
+-	orq	X86_CR3_PCID_KERN_VAR, %rax
+ 	movq	%rax, %cr3
+ #endif
+ 
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -25,7 +25,7 @@
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-orq  X86_CR3_PCID_KERN_VAR, \reg
++orq  x86_cr3_pcid_noflush, \reg
+ movq \reg, %cr3
+ .endm
+ 
+@@ -37,11 +37,10 @@ movq \reg, %cr3
+  * not enabled): so that the one register can update both memory and cr3.
+  */
+ movq %cr3, \reg
+-andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-orq  PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
++orq  PER_CPU_VAR(x86_cr3_pcid_user), \reg
+ js   9f
+ /* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
+-movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
++movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
+ 9:
+ movq \reg, %cr3
+ .endm
+@@ -94,8 +93,8 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ 
+-extern unsigned long X86_CR3_PCID_KERN_VAR;
+-DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
++extern unsigned long x86_cr3_pcid_noflush;
++DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+ 
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+ 
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -28,8 +28,8 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long
+  * This is also handy because systems that do not support PCIDs
+  * just end up or'ing a 0 into their CR3, which does no harm.
+  */
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR;
+-DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
++unsigned long x86_cr3_pcid_noflush __read_mostly;
++DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+ 
+ /*
+  * At runtime, the only things we map are some things for CPU
+@@ -303,7 +303,8 @@ void __init kaiser_init(void)
+ 				  sizeof(gate_desc) * NR_VECTORS,
+ 				  __PAGE_KERNEL);
+ 
+-	kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
++	kaiser_add_user_map_early(&x86_cr3_pcid_noflush,
++				  sizeof(x86_cr3_pcid_noflush),
+ 				  __PAGE_KERNEL);
+ }
+ 
+@@ -381,8 +382,8 @@ void kaiser_setup_pcid(void)
+ 	 * These variables are used by the entry/exit
+ 	 * code to change PCID and pgd and TLB flushing.
+ 	 */
+-	X86_CR3_PCID_KERN_VAR = kern_cr3;
+-	this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3);
++	x86_cr3_pcid_noflush = kern_cr3;
++	this_cpu_write(x86_cr3_pcid_user, user_cr3);
+ }
+ 
+ /*
+@@ -392,7 +393,7 @@ void kaiser_setup_pcid(void)
+  */
+ void kaiser_flush_tlb_on_return_to_user(void)
+ {
+-	this_cpu_write(X86_CR3_PCID_USER_VAR,
++	this_cpu_write(x86_cr3_pcid_user,
+ 			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
diff --git a/queue/kbuild-add-fno-stack-check-to-kernel-build-options.patch b/queue/kbuild-add-fno-stack-check-to-kernel-build-options.patch
new file mode 100644
index 0000000..144d30d
--- /dev/null
+++ b/queue/kbuild-add-fno-stack-check-to-kernel-build-options.patch
@@ -0,0 +1,49 @@
+From 3ce120b16cc548472f80cf8644f90eda958cf1b6 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Fri, 29 Dec 2017 17:34:43 -0800
+Subject: kbuild: add '-fno-stack-check' to kernel build options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 3ce120b16cc548472f80cf8644f90eda958cf1b6 upstream.
+
+It appears that hardened gentoo enables "-fstack-check" by default for
+gcc.
+
+That doesn't work _at_all_ for the kernel, because the kernel stack
+doesn't act like a user stack at all: it's much smaller, and it doesn't
+auto-expand on use.  So the extra "probe one page below the stack" code
+generated by -fstack-check just breaks the kernel in horrible ways,
+causing infinite double faults etc.
+
+[ I have to say, that the particular code gcc generates looks very
+  stupid even for user space where it works, but that's a separate
+  issue.  ]
+
+Reported-and-tested-by: Alexander Tsoy <alexander@tsoy.me>
+Reported-and-tested-by: Toralf Förster <toralf.foerster@gmx.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Makefile |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/Makefile
++++ b/Makefile
+@@ -788,6 +788,9 @@ KBUILD_CFLAGS += $(call cc-disable-warni
+ # disable invalid "can't wrap" optimizations for signed / pointers
+ KBUILD_CFLAGS	+= $(call cc-option,-fno-strict-overflow)
+ 
++# Make sure -fstack-check isn't enabled (like gentoo apparently did)
++KBUILD_CFLAGS  += $(call cc-option,-fno-stack-check,)
++
+ # conserve stack if available
+ KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
+ 
diff --git a/queue/kprobes-x86-blacklist-indirect-thunk-functions-for-kprobes.patch b/queue/kprobes-x86-blacklist-indirect-thunk-functions-for-kprobes.patch
new file mode 100644
index 0000000..a552f71
--- /dev/null
+++ b/queue/kprobes-x86-blacklist-indirect-thunk-functions-for-kprobes.patch
@@ -0,0 +1,40 @@
+From c1804a236894ecc942da7dc6c5abe209e56cba93 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 19 Jan 2018 01:14:51 +0900
+Subject: kprobes/x86: Blacklist indirect thunk functions for kprobes
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit c1804a236894ecc942da7dc6c5abe209e56cba93 upstream.
+
+Mark __x86_indirect_thunk_* functions as blacklist for kprobes
+because those functions can be called from anywhere in the kernel
+including blacklist functions of kprobes.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Link: https://lkml.kernel.org/r/151629209111.10241.5444852823378068683.stgit@devbox
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/lib/retpoline.S |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg)
+  * than one per register with the correct names. So we do it
+  * the simple and nasty way...
+  */
+-#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
++#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
++#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+ #define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+ 
+ GENERATE_THUNK(_ASM_AX)
diff --git a/queue/kprobes-x86-disable-optimizing-on-the-function-jumps-to-indirect-thunk.patch b/queue/kprobes-x86-disable-optimizing-on-the-function-jumps-to-indirect-thunk.patch
new file mode 100644
index 0000000..27f0a7f
--- /dev/null
+++ b/queue/kprobes-x86-disable-optimizing-on-the-function-jumps-to-indirect-thunk.patch
@@ -0,0 +1,80 @@
+From c86a32c09f8ced67971a2310e3b0dda4d1749007 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 19 Jan 2018 01:15:20 +0900
+Subject: kprobes/x86: Disable optimizing on the function jumps to indirect thunk
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit c86a32c09f8ced67971a2310e3b0dda4d1749007 upstream.
+
+Since indirect jump instructions will be replaced by jump
+to __x86_indirect_thunk_*, those jmp instruction must be
+treated as an indirect jump. Since optprobe prohibits to
+optimize probes in the function which uses an indirect jump,
+it also needs to find out the function which jump to
+__x86_indirect_thunk_* and disable optimization.
+
+Add a check that the jump target address is between the
+__indirect_thunk_start/end when optimizing kprobe.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Link: https://lkml.kernel.org/r/151629212062.10241.6991266100233002273.stgit@devbox
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/kprobes/opt.c |   23 ++++++++++++++++++++++-
+ 1 file changed, 22 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/kprobes/opt.c
++++ b/arch/x86/kernel/kprobes/opt.c
+@@ -37,6 +37,7 @@
+ #include <asm/alternative.h>
+ #include <asm/insn.h>
+ #include <asm/debugreg.h>
++#include <asm/nospec-branch.h>
+ 
+ #include "common.h"
+ 
+@@ -192,7 +193,7 @@ static int copy_optimized_instructions(u
+ }
+ 
+ /* Check whether insn is indirect jump */
+-static int insn_is_indirect_jump(struct insn *insn)
++static int __insn_is_indirect_jump(struct insn *insn)
+ {
+ 	return ((insn->opcode.bytes[0] == 0xff &&
+ 		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
+@@ -226,6 +227,26 @@ static int insn_jump_into_range(struct i
+ 	return (start <= target && target <= start + len);
+ }
+ 
++static int insn_is_indirect_jump(struct insn *insn)
++{
++	int ret = __insn_is_indirect_jump(insn);
++
++#ifdef CONFIG_RETPOLINE
++	/*
++	 * Jump to x86_indirect_thunk_* is treated as an indirect jump.
++	 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
++	 * older gcc may use indirect jump. So we add this check instead of
++	 * replace indirect-jump check.
++	 */
++	if (!ret)
++		ret = insn_jump_into_range(insn,
++				(unsigned long)__indirect_thunk_start,
++				(unsigned long)__indirect_thunk_end -
++				(unsigned long)__indirect_thunk_start);
++#endif
++	return ret;
++}
++
+ /* Decode whole function to ensure any instructions don't jump into target */
+ static int can_optimize(unsigned long paddr)
+ {
diff --git a/queue/kpti-rename-to-page_table_isolation.patch b/queue/kpti-rename-to-page_table_isolation.patch
new file mode 100644
index 0000000..f77732d
--- /dev/null
+++ b/queue/kpti-rename-to-page_table_isolation.patch
@@ -0,0 +1,329 @@
+From keescook@chromium.org  Wed Jan  3 20:47:22 2018
+From: Kees Cook <keescook@chromium.org>
+Date: Wed, 3 Jan 2018 10:17:35 -0800
+Subject: KPTI: Rename to PAGE_TABLE_ISOLATION
+To: Greg KH <gregkh@linuxfoundation.org>
+Message-ID: <20180103181735.GA33341@beast>
+Content-Disposition: inline
+
+From: Kees Cook <keescook@chromium.org>
+
+This renames CONFIG_KAISER to CONFIG_PAGE_TABLE_ISOLATION.
+
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/misc.h          |    2 +-
+ arch/x86/entry/entry_64.S                |   12 ++++++------
+ arch/x86/events/intel/ds.c               |    4 ++--
+ arch/x86/include/asm/cpufeatures.h       |    2 +-
+ arch/x86/include/asm/kaiser.h            |   12 ++++++------
+ arch/x86/include/asm/pgtable.h           |    4 ++--
+ arch/x86/include/asm/pgtable_64.h        |    4 ++--
+ arch/x86/include/asm/pgtable_types.h     |    2 +-
+ arch/x86/include/asm/tlbflush.h          |    2 +-
+ arch/x86/kernel/head_64.S                |    2 +-
+ arch/x86/mm/Makefile                     |    2 +-
+ arch/x86/mm/kaslr.c                      |    2 +-
+ include/linux/kaiser.h                   |    6 +++---
+ include/linux/percpu-defs.h              |    2 +-
+ security/Kconfig                         |    2 +-
+ tools/arch/x86/include/asm/cpufeatures.h |    2 +-
+ 16 files changed, 31 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/boot/compressed/misc.h
++++ b/arch/x86/boot/compressed/misc.h
+@@ -9,7 +9,7 @@
+  */
+ #undef CONFIG_PARAVIRT
+ #undef CONFIG_PARAVIRT_SPINLOCKS
+-#undef CONFIG_KAISER
++#undef CONFIG_PAGE_TABLE_ISOLATION
+ #undef CONFIG_KASAN
+ 
+ #include <linux/linkage.h>
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1071,7 +1071,7 @@ ENTRY(paranoid_entry)
+ 	SWAPGS
+ 	xorl	%ebx, %ebx
+ 1:
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/*
+ 	 * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
+ 	 * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
+@@ -1111,7 +1111,7 @@ ENTRY(paranoid_exit)
+ 	DISABLE_INTERRUPTS(CLBR_NONE)
+ 	TRACE_IRQS_OFF_DEBUG
+ 	TRACE_IRQS_IRETQ_DEBUG
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
+ 	testl	$2, %ebx			/* SWITCH_USER_CR3 needed? */
+ 	jz	paranoid_exit_no_switch
+@@ -1340,7 +1340,7 @@ ENTRY(nmi)
+ 
+ 	movq	%rsp, %rdi
+ 	movq	$-1, %rsi
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+ 	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+@@ -1354,7 +1354,7 @@ ENTRY(nmi)
+ #endif
+ 	call	do_nmi
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/*
+ 	 * Unconditionally restore CR3.  I know we return to
+ 	 * kernel code that needs user CR3, but do we ever return
+@@ -1584,7 +1584,7 @@ end_repeat_nmi:
+ 1:
+ 	movq	%rsp, %rdi
+ 	movq	$-1, %rsi
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+ 	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+@@ -1600,7 +1600,7 @@ end_repeat_nmi:
+ 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+ 	call	do_nmi
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/*
+ 	 * Unconditionally restore CR3.  We might be returning to
+ 	 * kernel code that needs user CR3, like just just before
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -274,7 +274,7 @@ static DEFINE_PER_CPU(void *, insn_buffe
+ 
+ static void *dsalloc(size_t size, gfp_t flags, int node)
+ {
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	unsigned int order = get_order(size);
+ 	struct page *page;
+ 	unsigned long addr;
+@@ -295,7 +295,7 @@ static void *dsalloc(size_t size, gfp_t
+ 
+ static void dsfree(const void *buffer, size_t size)
+ {
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	if (!buffer)
+ 		return;
+ 	kaiser_remove_mapping((unsigned long)buffer, size);
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -199,7 +199,7 @@
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ 
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+-#define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
++#define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -20,7 +20,7 @@
+ #define KAISER_SHADOW_PGD_OFFSET 0x1000
+ 
+ #ifdef __ASSEMBLY__
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+@@ -69,7 +69,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ 8:
+ .endm
+ 
+-#else /* CONFIG_KAISER */
++#else /* CONFIG_PAGE_TABLE_ISOLATION */
+ 
+ .macro SWITCH_KERNEL_CR3
+ .endm
+@@ -78,11 +78,11 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+ .endm
+ 
+-#endif /* CONFIG_KAISER */
++#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+ 
+ #else /* __ASSEMBLY__ */
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+  * Upon kernel/user mode switch, it may happen that the address
+  * space has to be switched before the registers have been
+@@ -100,10 +100,10 @@ extern void __init kaiser_check_boottime
+ #else
+ #define kaiser_enabled	0
+ static inline void __init kaiser_check_boottime_disable(void) {}
+-#endif /* CONFIG_KAISER */
++#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+ 
+ /*
+- * Kaiser function prototypes are needed even when CONFIG_KAISER is not set,
++ * Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set,
+  * so as to build with tests on kaiser_enabled instead of #ifdefs.
+  */
+ 
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -18,7 +18,7 @@
+ #ifndef __ASSEMBLY__
+ #include <asm/x86_init.h>
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ extern int kaiser_enabled;
+ #else
+ #define kaiser_enabled 0
+@@ -920,7 +920,7 @@ static inline void pmdp_set_wrprotect(st
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+ 	memcpy(dst, src, count * sizeof(pgd_t));
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	if (kaiser_enabled) {
+ 		/* Clone the shadow pgd part as well */
+ 		memcpy(native_get_shadow_pgd(dst),
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -106,7 +106,7 @@ static inline void native_pud_clear(pud_
+ 	native_set_pud(pud, native_make_pud(0));
+ }
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
+ 
+ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+@@ -127,7 +127,7 @@ static inline pgd_t *native_get_shadow_p
+ 	BUILD_BUG_ON(1);
+ 	return NULL;
+ }
+-#endif /* CONFIG_KAISER */
++#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+ 
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -144,7 +144,7 @@
+ #define X86_CR3_PCID_MASK       (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
+ #define X86_CR3_PCID_ASID_KERN  (_AC(0x0,UL))
+ 
+-#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
++#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64)
+ /* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
+ #define X86_CR3_PCID_ASID_USER	(_AC(0x80,UL))
+ 
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -136,7 +136,7 @@ static inline void cr4_set_bits_and_upda
+  * Declare a couple of kaiser interfaces here for convenience,
+  * to avoid the need for asm/kaiser.h in unexpected places.
+  */
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ extern int kaiser_enabled;
+ extern void kaiser_setup_pcid(void);
+ extern void kaiser_flush_tlb_on_return_to_user(void);
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -405,7 +405,7 @@ GLOBAL(early_recursion_flag)
+ 	.balign	PAGE_SIZE; \
+ GLOBAL(name)
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+  * Each PGD needs to be 8k long and 8k aligned.  We do not
+  * ever go out to userspace with these, so we do not
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU)		+= numa_emulatio
+ obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
+ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+ obj-$(CONFIG_RANDOMIZE_MEMORY)	+= kaslr.o
+-obj-$(CONFIG_KAISER)		+= kaiser.o
++obj-$(CONFIG_PAGE_TABLE_ISOLATION)		+= kaiser.o
+--- a/arch/x86/mm/kaslr.c
++++ b/arch/x86/mm/kaslr.c
+@@ -189,6 +189,6 @@ void __meminit init_trampoline(void)
+ 		*pud_tramp = *pud;
+ 	}
+ 
+-	/* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */
++	/* Avoid set_pgd(), in case it's complicated by CONFIG_PAGE_TABLE_ISOLATION */
+ 	trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
+ }
+--- a/include/linux/kaiser.h
++++ b/include/linux/kaiser.h
+@@ -1,7 +1,7 @@
+ #ifndef _LINUX_KAISER_H
+ #define _LINUX_KAISER_H
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ #include <asm/kaiser.h>
+ 
+ static inline int kaiser_map_thread_stack(void *stack)
+@@ -24,7 +24,7 @@ static inline void kaiser_unmap_thread_s
+ #else
+ 
+ /*
+- * These stubs are used whenever CONFIG_KAISER is off, which
++ * These stubs are used whenever CONFIG_PAGE_TABLE_ISOLATION is off, which
+  * includes architectures that support KAISER, but have it disabled.
+  */
+ 
+@@ -48,5 +48,5 @@ static inline void kaiser_unmap_thread_s
+ {
+ }
+ 
+-#endif /* !CONFIG_KAISER */
++#endif /* !CONFIG_PAGE_TABLE_ISOLATION */
+ #endif /* _LINUX_KAISER_H */
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -35,7 +35,7 @@
+ 
+ #endif
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ #define USER_MAPPED_SECTION "..user_mapped"
+ #else
+ #define USER_MAPPED_SECTION ""
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -31,7 +31,7 @@ config SECURITY
+ 
+ 	  If you are unsure how to answer this question, answer N.
+ 
+-config KAISER
++config PAGE_TABLE_ISOLATION
+ 	bool "Remove the kernel mapping in user mode"
+ 	default y
+ 	depends on X86_64 && SMP
+--- a/tools/arch/x86/include/asm/cpufeatures.h
++++ b/tools/arch/x86/include/asm/cpufeatures.h
+@@ -198,7 +198,7 @@
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ 
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+-#define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
++#define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/queue/kpti-report-when-enabled.patch b/queue/kpti-report-when-enabled.patch
new file mode 100644
index 0000000..dbd4233
--- /dev/null
+++ b/queue/kpti-report-when-enabled.patch
@@ -0,0 +1,48 @@
+From keescook@chromium.org  Wed Jan  3 20:48:07 2018
+From: Kees Cook <keescook@chromium.org>
+Date: Wed, 3 Jan 2018 10:18:01 -0800
+Subject: KPTI: Report when enabled
+To: Greg KH <gregkh@linuxfoundation.org>
+Message-ID: <20180103181801.GA33383@beast>
+Content-Disposition: inline
+
+From: Kees Cook <keescook@chromium.org>
+
+Make sure dmesg reports when KPTI is enabled.
+
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -10,6 +10,9 @@
+ #include <linux/mm.h>
+ #include <linux/uaccess.h>
+ 
++#undef pr_fmt
++#define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
++
+ #include <asm/kaiser.h>
+ #include <asm/tlbflush.h>	/* to verify its kaiser declarations */
+ #include <asm/pgtable.h>
+@@ -292,7 +295,7 @@ enable:
+ 	return;
+ 
+ disable:
+-	pr_info("Kernel/User page tables isolation: disabled\n");
++	pr_info("disabled\n");
+ 
+ silent_disable:
+ 	kaiser_enabled = 0;
+@@ -352,6 +355,8 @@ void __init kaiser_init(void)
+ 	kaiser_add_user_map_early(&debug_idt_table,
+ 				  sizeof(gate_desc) * NR_VECTORS,
+ 				  __PAGE_KERNEL);
++
++	pr_info("enabled\n");
+ }
+ 
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
diff --git a/queue/kvm-vmx-make-indirect-call-speculation-safe.patch b/queue/kvm-vmx-make-indirect-call-speculation-safe.patch
new file mode 100644
index 0000000..a981c11
--- /dev/null
+++ b/queue/kvm-vmx-make-indirect-call-speculation-safe.patch
@@ -0,0 +1,57 @@
+From foo@baz Wed Feb  7 19:38:23 CST 2018
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 25 Jan 2018 10:58:14 +0100
+Subject: KVM: VMX: Make indirect call speculation safe
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+(cherry picked from commit c940a3fb1e2e9b7d03228ab28f375fb5a47ff699)
+
+Replace indirect call with CALL_NOSPEC.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: rga@amazon.de
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Link: https://lkml.kernel.org/r/20180125095843.645776917@infradead.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -8676,14 +8676,14 @@ static void vmx_handle_external_intr(str
+ #endif
+ 			"pushf\n\t"
+ 			__ASM_SIZE(push) " $%c[cs]\n\t"
+-			"call *%[entry]\n\t"
++			CALL_NOSPEC
+ 			:
+ #ifdef CONFIG_X86_64
+ 			[sp]"=&r"(tmp),
+ #endif
+ 			"+r"(__sp)
+ 			:
+-			[entry]"r"(entry),
++			THUNK_TARGET(entry),
+ 			[ss]"i"(__KERNEL_DS),
+ 			[cs]"i"(__KERNEL_CS)
+ 			);
diff --git a/queue/kvm-x86-make-indirect-calls-in-emulator-speculation-safe.patch b/queue/kvm-x86-make-indirect-calls-in-emulator-speculation-safe.patch
new file mode 100644
index 0000000..86fb749
--- /dev/null
+++ b/queue/kvm-x86-make-indirect-calls-in-emulator-speculation-safe.patch
@@ -0,0 +1,78 @@
+From foo@baz Wed Feb  7 19:38:23 CST 2018
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 25 Jan 2018 10:58:13 +0100
+Subject: KVM: x86: Make indirect calls in emulator speculation safe
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+(cherry picked from commit 1a29b5b7f347a1a9230c1e0af5b37e3e571588ab)
+
+Replace the indirect calls with CALL_NOSPEC.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: rga@amazon.de
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Link: https://lkml.kernel.org/r/20180125095843.595615683@infradead.org
+[dwmw2: Use ASM_CALL_CONSTRAINT like upstream, now we have it]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -25,6 +25,7 @@
+ #include <asm/kvm_emulate.h>
+ #include <linux/stringify.h>
+ #include <asm/debugreg.h>
++#include <asm/nospec-branch.h>
+ 
+ #include "x86.h"
+ #include "tss.h"
+@@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsign
+ 	void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
+ 
+ 	flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
+-	asm("push %[flags]; popf; call *%[fastop]"
+-	    : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
++	asm("push %[flags]; popf; " CALL_NOSPEC
++	    : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
+ 	return rc;
+ }
+ 
+@@ -5306,15 +5307,14 @@ static void fetch_possible_mmx_operand(s
+ 
+ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
+ {
+-	register void *__sp asm(_ASM_SP);
+ 	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
+ 
+ 	if (!(ctxt->d & ByteOp))
+ 		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
+ 
+-	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
++	asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
+ 	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
+-	      [fastop]"+S"(fop), "+r"(__sp)
++	      [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
+ 	    : "c"(ctxt->src2.val));
+ 
+ 	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/queue/map-the-vsyscall-page-with-_page_user.patch b/queue/map-the-vsyscall-page-with-_page_user.patch
new file mode 100644
index 0000000..1b85895
--- /dev/null
+++ b/queue/map-the-vsyscall-page-with-_page_user.patch
@@ -0,0 +1,143 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Thu, 4 Jan 2018 17:42:45 +0100
+Subject: Map the vsyscall page with _PAGE_USER
+
+From: Borislav Petkov <bp@suse.de>
+
+This needs to happen early in kaiser_pagetable_walk(), before the
+hierarchy is established so that _PAGE_USER permission can be really
+set.
+
+A proper fix would be to teach kaiser_pagetable_walk() to update those
+permissions but the vsyscall page is the only exception here so ...
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c |    5 +++++
+ arch/x86/include/asm/vsyscall.h       |    2 ++
+ arch/x86/mm/kaiser.c                  |   34 ++++++++++++++++++++++++++++++----
+ 3 files changed, 37 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -66,6 +66,11 @@ static int __init vsyscall_setup(char *s
+ }
+ early_param("vsyscall", vsyscall_setup);
+ 
++bool vsyscall_enabled(void)
++{
++	return vsyscall_mode != NONE;
++}
++
+ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
+ 			      const char *message)
+ {
+--- a/arch/x86/include/asm/vsyscall.h
++++ b/arch/x86/include/asm/vsyscall.h
+@@ -12,12 +12,14 @@ extern void map_vsyscall(void);
+  * Returns true if handled.
+  */
+ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
++extern bool vsyscall_enabled(void);
+ #else
+ static inline void map_vsyscall(void) {}
+ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+ {
+ 	return false;
+ }
++static inline bool vsyscall_enabled(void) { return false; }
+ #endif
+ 
+ #endif /* _ASM_X86_VSYSCALL_H */
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -19,6 +19,7 @@
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
+ #include <asm/cmdline.h>
++#include <asm/vsyscall.h>
+ 
+ int kaiser_enabled __read_mostly = 1;
+ EXPORT_SYMBOL(kaiser_enabled);	/* for inlined TLB flush functions */
+@@ -110,12 +111,13 @@ static inline unsigned long get_pa_from_
+  *
+  * Returns a pointer to a PTE on success, or NULL on failure.
+  */
+-static pte_t *kaiser_pagetable_walk(unsigned long address)
++static pte_t *kaiser_pagetable_walk(unsigned long address, bool user)
+ {
+ 	pmd_t *pmd;
+ 	pud_t *pud;
+ 	pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
++	unsigned long prot = _KERNPG_TABLE;
+ 
+ 	if (pgd_none(*pgd)) {
+ 		WARN_ONCE(1, "All shadow pgds should have been populated");
+@@ -123,6 +125,17 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 	}
+ 	BUILD_BUG_ON(pgd_large(*pgd) != 0);
+ 
++	if (user) {
++		/*
++		 * The vsyscall page is the only page that will have
++		 *  _PAGE_USER set. Catch everything else.
++		 */
++		BUG_ON(address != VSYSCALL_ADDR);
++
++		set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
++		prot = _PAGE_TABLE;
++	}
++
+ 	pud = pud_offset(pgd, address);
+ 	/* The shadow page tables do not use large mappings: */
+ 	if (pud_large(*pud)) {
+@@ -135,7 +148,7 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 			return NULL;
+ 		spin_lock(&shadow_table_allocation_lock);
+ 		if (pud_none(*pud)) {
+-			set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
++			set_pud(pud, __pud(prot | __pa(new_pmd_page)));
+ 			__inc_zone_page_state(virt_to_page((void *)
+ 						new_pmd_page), NR_KAISERTABLE);
+ 		} else
+@@ -155,7 +168,7 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 			return NULL;
+ 		spin_lock(&shadow_table_allocation_lock);
+ 		if (pmd_none(*pmd)) {
+-			set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
++			set_pmd(pmd, __pmd(prot | __pa(new_pte_page)));
+ 			__inc_zone_page_state(virt_to_page((void *)
+ 						new_pte_page), NR_KAISERTABLE);
+ 		} else
+@@ -191,7 +204,7 @@ static int kaiser_add_user_map(const voi
+ 			ret = -EIO;
+ 			break;
+ 		}
+-		pte = kaiser_pagetable_walk(address);
++		pte = kaiser_pagetable_walk(address, flags & _PAGE_USER);
+ 		if (!pte) {
+ 			ret = -ENOMEM;
+ 			break;
+@@ -318,6 +331,19 @@ void __init kaiser_init(void)
+ 
+ 	kaiser_init_all_pgds();
+ 
++	/*
++	 * Note that this sets _PAGE_USER and it needs to happen when the
++	 * pagetable hierarchy gets created, i.e., early. Otherwise
++	 * kaiser_pagetable_walk() will encounter initialized PTEs in the
++	 * hierarchy and not set the proper permissions, leading to the
++	 * pagefaults with page-protection violations when trying to read the
++	 * vsyscall page. For example.
++	 */
++	if (vsyscall_enabled())
++		kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
++					  PAGE_SIZE,
++					   __PAGE_KERNEL_VSYSCALL);
++
+ 	for_each_possible_cpu(cpu) {
+ 		void *percpu_vaddr = __per_cpu_user_mapped_start +
+ 				     per_cpu_offset(cpu);
diff --git a/queue/mm-vmstat-make-nr_tlb_remote_flush_received-available-even-on-up.patch b/queue/mm-vmstat-make-nr_tlb_remote_flush_received-available-even-on-up.patch
new file mode 100644
index 0000000..df764bb
--- /dev/null
+++ b/queue/mm-vmstat-make-nr_tlb_remote_flush_received-available-even-on-up.patch
@@ -0,0 +1,39 @@
+From 5dd0b16cdaff9b94da06074d5888b03235c0bf17 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 5 Jun 2017 07:40:25 -0700
+Subject: mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 5dd0b16cdaff9b94da06074d5888b03235c0bf17 upstream.
+
+This fixes CONFIG_SMP=n, CONFIG_DEBUG_TLBFLUSH=y without introducing
+further #ifdef soup.  Caught by a Kbuild bot randconfig build.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: ce4a4e565f52 ("x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code")
+Link: http://lkml.kernel.org/r/76da9a3cc4415996f2ad2c905b93414add322021.1496673616.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/vm_event_item.h |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/include/linux/vm_event_item.h
++++ b/include/linux/vm_event_item.h
+@@ -89,10 +89,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS
+ #endif
+ #endif
+ #ifdef CONFIG_DEBUG_TLBFLUSH
+-#ifdef CONFIG_SMP
+ 		NR_TLB_REMOTE_FLUSH,	/* cpu tried to flush others' tlbs */
+ 		NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
+-#endif /* CONFIG_SMP */
+ 		NR_TLB_LOCAL_FLUSH_ALL,
+ 		NR_TLB_LOCAL_FLUSH_ONE,
+ #endif /* CONFIG_DEBUG_TLBFLUSH */
diff --git a/queue/module-add-retpoline-tag-to-vermagic.patch b/queue/module-add-retpoline-tag-to-vermagic.patch
new file mode 100644
index 0000000..7870bc4
--- /dev/null
+++ b/queue/module-add-retpoline-tag-to-vermagic.patch
@@ -0,0 +1,53 @@
+From 6cfb521ac0d5b97470883ff9b7facae264b7ab12 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 16 Jan 2018 12:52:28 -0800
+Subject: module: Add retpoline tag to VERMAGIC
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12 upstream.
+
+Add a marker for retpoline to the module VERMAGIC. This catches the case
+when a non RETPOLINE compiled module gets loaded into a retpoline kernel,
+making it insecure.
+
+It doesn't handle the case when retpoline has been runtime disabled.  Even
+in this case the match of the retcompile status will be enforced.  This
+implies that even with retpoline run time disabled all modules loaded need
+to be recompiled.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: rusty@rustcorp.com.au
+Cc: arjan.van.de.ven@intel.com
+Cc: jeyu@kernel.org
+Cc: torvalds@linux-foundation.org
+Link: https://lkml.kernel.org/r/20180116205228.4890-1-andi@firstfloor.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/vermagic.h |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/include/linux/vermagic.h
++++ b/include/linux/vermagic.h
+@@ -24,10 +24,16 @@
+ #ifndef MODULE_ARCH_VERMAGIC
+ #define MODULE_ARCH_VERMAGIC ""
+ #endif
++#ifdef RETPOLINE
++#define MODULE_VERMAGIC_RETPOLINE "retpoline "
++#else
++#define MODULE_VERMAGIC_RETPOLINE ""
++#endif
+ 
+ #define VERMAGIC_STRING 						\
+ 	UTS_RELEASE " "							\
+ 	MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT 			\
+ 	MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS	\
+-	MODULE_ARCH_VERMAGIC
++	MODULE_ARCH_VERMAGIC						\
++	MODULE_VERMAGIC_RETPOLINE
+ 
diff --git a/queue/module-retpoline-warn-about-missing-retpoline-in-module.patch b/queue/module-retpoline-warn-about-missing-retpoline-in-module.patch
new file mode 100644
index 0000000..23a00b6
--- /dev/null
+++ b/queue/module-retpoline-warn-about-missing-retpoline-in-module.patch
@@ -0,0 +1,149 @@
+From foo@baz Wed Feb  7 19:38:23 CST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Thu, 25 Jan 2018 15:50:28 -0800
+Subject: module/retpoline: Warn about missing retpoline in module
+
+From: Andi Kleen <ak@linux.intel.com>
+
+(cherry picked from commit caf7501a1b4ec964190f31f9c3f163de252273b8)
+
+There's a risk that a kernel which has full retpoline mitigations becomes
+vulnerable when a module gets loaded that hasn't been compiled with the
+right compiler or the right option.
+
+To enable detection of that mismatch at module load time, add a module info
+string "retpoline" at build time when the module was compiled with
+retpoline support. This only covers compiled C source, but assembler source
+or prebuilt object files are not checked.
+
+If a retpoline enabled kernel detects a non retpoline protected module at
+load time, print a warning and report it in the sysfs vulnerability file.
+
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: gregkh@linuxfoundation.org
+Cc: torvalds@linux-foundation.org
+Cc: jeyu@kernel.org
+Cc: arjan@linux.intel.com
+Link: https://lkml.kernel.org/r/20180125235028.31211-1-andi@firstfloor.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   17 ++++++++++++++++-
+ include/linux/module.h     |    9 +++++++++
+ kernel/module.c            |   11 +++++++++++
+ scripts/mod/modpost.c      |    9 +++++++++
+ 4 files changed, 45 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -10,6 +10,7 @@
+ #include <linux/init.h>
+ #include <linux/utsname.h>
+ #include <linux/cpu.h>
++#include <linux/module.h>
+ 
+ #include <asm/nospec-branch.h>
+ #include <asm/cmdline.h>
+@@ -92,6 +93,19 @@ static const char *spectre_v2_strings[]
+ #define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt
+ 
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
++static bool spectre_v2_bad_module;
++
++#ifdef RETPOLINE
++bool retpoline_module_ok(bool has_retpoline)
++{
++	if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
++		return true;
++
++	pr_err("System may be vunerable to spectre v2\n");
++	spectre_v2_bad_module = true;
++	return false;
++}
++#endif
+ 
+ static void __init spec2_print_if_insecure(const char *reason)
+ {
+@@ -277,6 +291,7 @@ ssize_t cpu_show_spectre_v2(struct devic
+ 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ 		return sprintf(buf, "Not affected\n");
+ 
+-	return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
++	return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled],
++		       spectre_v2_bad_module ? " - vulnerable module loaded" : "");
+ }
+ #endif
+--- a/include/linux/module.h
++++ b/include/linux/module.h
+@@ -791,6 +791,15 @@ static inline void module_bug_finalize(c
+ static inline void module_bug_cleanup(struct module *mod) {}
+ #endif	/* CONFIG_GENERIC_BUG */
+ 
++#ifdef RETPOLINE
++extern bool retpoline_module_ok(bool has_retpoline);
++#else
++static inline bool retpoline_module_ok(bool has_retpoline)
++{
++	return true;
++}
++#endif
++
+ #ifdef CONFIG_MODULE_SIG
+ static inline bool module_sig_ok(struct module *module)
+ {
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -2817,6 +2817,15 @@ static int check_modinfo_livepatch(struc
+ }
+ #endif /* CONFIG_LIVEPATCH */
+ 
++static void check_modinfo_retpoline(struct module *mod, struct load_info *info)
++{
++	if (retpoline_module_ok(get_modinfo(info, "retpoline")))
++		return;
++
++	pr_warn("%s: loading module not compiled with retpoline compiler.\n",
++		mod->name);
++}
++
+ /* Sets info->hdr and info->len. */
+ static int copy_module_from_user(const void __user *umod, unsigned long len,
+ 				  struct load_info *info)
+@@ -2969,6 +2978,8 @@ static int check_modinfo(struct module *
+ 		add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
+ 	}
+ 
++	check_modinfo_retpoline(mod, info);
++
+ 	if (get_modinfo(info, "staging")) {
+ 		add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
+ 		pr_warn("%s: module is from the staging directory, the quality "
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -2130,6 +2130,14 @@ static void add_intree_flag(struct buffe
+ 		buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n");
+ }
+ 
++/* Cannot check for assembler */
++static void add_retpoline(struct buffer *b)
++{
++	buf_printf(b, "\n#ifdef RETPOLINE\n");
++	buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
++	buf_printf(b, "#endif\n");
++}
++
+ static void add_staging_flag(struct buffer *b, const char *name)
+ {
+ 	static const char *staging_dir = "drivers/staging";
+@@ -2474,6 +2482,7 @@ int main(int argc, char **argv)
+ 
+ 		add_header(&buf, mod);
+ 		add_intree_flag(&buf, !external_module);
++		add_retpoline(&buf);
+ 		add_staging_flag(&buf, mod->name);
+ 		err |= add_versions(&buf, mod);
+ 		add_depends(&buf, mod, modules);
diff --git a/queue/nl80211-sanitize-array-index-in-parse_txq_params.patch b/queue/nl80211-sanitize-array-index-in-parse_txq_params.patch
new file mode 100644
index 0000000..5de0872
--- /dev/null
+++ b/queue/nl80211-sanitize-array-index-in-parse_txq_params.patch
@@ -0,0 +1,72 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:03:15 -0800
+Subject: nl80211: Sanitize array index in parse_txq_params
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit 259d8c1e984318497c84eef547bbb6b1d9f4eb05)
+
+Wireless drivers rely on parse_txq_params to validate that txq_params->ac
+is less than NL80211_NUM_ACS by the time the low-level driver's ->conf_tx()
+handler is called. Use a new helper, array_index_nospec(), to sanitize
+txq_params->ac with respect to speculation. I.e. ensure that any
+speculation into ->conf_tx() handlers is done with a value of
+txq_params->ac that is within the bounds of [0, NL80211_NUM_ACS).
+
+Reported-by: Christian Lamparter <chunkeey@gmail.com>
+Reported-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Johannes Berg <johannes@sipsolutions.net>
+Cc: linux-arch@vger.kernel.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: linux-wireless@vger.kernel.org
+Cc: torvalds@linux-foundation.org
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727419584.33451.7700736761686184303.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/wireless/nl80211.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/wireless/nl80211.c
++++ b/net/wireless/nl80211.c
+@@ -16,6 +16,7 @@
+ #include <linux/nl80211.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/netlink.h>
++#include <linux/nospec.h>
+ #include <linux/etherdevice.h>
+ #include <net/net_namespace.h>
+ #include <net/genetlink.h>
+@@ -2014,20 +2015,22 @@ static const struct nla_policy txq_param
+ static int parse_txq_params(struct nlattr *tb[],
+ 			    struct ieee80211_txq_params *txq_params)
+ {
++	u8 ac;
++
+ 	if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] ||
+ 	    !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] ||
+ 	    !tb[NL80211_TXQ_ATTR_AIFS])
+ 		return -EINVAL;
+ 
+-	txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
++	ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
+ 	txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]);
+ 	txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]);
+ 	txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]);
+ 	txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]);
+ 
+-	if (txq_params->ac >= NL80211_NUM_ACS)
++	if (ac >= NL80211_NUM_ACS)
+ 		return -EINVAL;
+-
++	txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS);
+ 	return 0;
+ }
+ 
diff --git a/queue/objtool-allow-alternatives-to-be-ignored.patch b/queue/objtool-allow-alternatives-to-be-ignored.patch
new file mode 100644
index 0000000..db02f25
--- /dev/null
+++ b/queue/objtool-allow-alternatives-to-be-ignored.patch
@@ -0,0 +1,163 @@
+From 258c76059cece01bebae098e81bacb1af2edad17 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 11 Jan 2018 21:46:24 +0000
+Subject: objtool: Allow alternatives to be ignored
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 258c76059cece01bebae098e81bacb1af2edad17 upstream.
+
+Getting objtool to understand retpolines is going to be a bit of a
+challenge.  For now, take advantage of the fact that retpolines are
+patched in with alternatives.  Just read the original (sane)
+non-alternative instruction, and ignore the patched-in retpoline.
+
+This allows objtool to understand the control flow *around* the
+retpoline, even if it can't yet follow what's inside.  This means the
+ORC unwinder will fail to unwind from inside a retpoline, but will work
+fine otherwise.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-3-git-send-email-dwmw@amazon.co.uk
+[dwmw2: Applies to tools/objtool/builtin-check.c not check.[ch]]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/builtin-check.c |   64 +++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 57 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -51,7 +51,7 @@ struct instruction {
+ 	unsigned int len, state;
+ 	unsigned char type;
+ 	unsigned long immediate;
+-	bool alt_group, visited;
++	bool alt_group, visited, ignore_alts;
+ 	struct symbol *call_dest;
+ 	struct instruction *jump_dest;
+ 	struct list_head alts;
+@@ -353,6 +353,40 @@ static void add_ignores(struct objtool_f
+ }
+ 
+ /*
++ * FIXME: For now, just ignore any alternatives which add retpolines.  This is
++ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
++ * But it at least allows objtool to understand the control flow *around* the
++ * retpoline.
++ */
++static int add_nospec_ignores(struct objtool_file *file)
++{
++	struct section *sec;
++	struct rela *rela;
++	struct instruction *insn;
++
++	sec = find_section_by_name(file->elf, ".rela.discard.nospec");
++	if (!sec)
++		return 0;
++
++	list_for_each_entry(rela, &sec->rela_list, list) {
++		if (rela->sym->type != STT_SECTION) {
++			WARN("unexpected relocation symbol type in %s", sec->name);
++			return -1;
++		}
++
++		insn = find_insn(file, rela->sym->sec, rela->addend);
++		if (!insn) {
++			WARN("bad .discard.nospec entry");
++			return -1;
++		}
++
++		insn->ignore_alts = true;
++	}
++
++	return 0;
++}
++
++/*
+  * Find the destination instructions for all jumps.
+  */
+ static int add_jump_destinations(struct objtool_file *file)
+@@ -435,11 +469,18 @@ static int add_call_destinations(struct
+ 			dest_off = insn->offset + insn->len + insn->immediate;
+ 			insn->call_dest = find_symbol_by_offset(insn->sec,
+ 								dest_off);
++			/*
++			 * FIXME: Thanks to retpolines, it's now considered
++			 * normal for a function to call within itself.  So
++			 * disable this warning for now.
++			 */
++#if 0
+ 			if (!insn->call_dest) {
+ 				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
+ 					  insn->sec, insn->offset, dest_off);
+ 				return -1;
+ 			}
++#endif
+ 		} else if (rela->sym->type == STT_SECTION) {
+ 			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
+ 								rela->addend+4);
+@@ -601,12 +642,6 @@ static int add_special_section_alts(stru
+ 		return ret;
+ 
+ 	list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
+-		alt = malloc(sizeof(*alt));
+-		if (!alt) {
+-			WARN("malloc failed");
+-			ret = -1;
+-			goto out;
+-		}
+ 
+ 		orig_insn = find_insn(file, special_alt->orig_sec,
+ 				      special_alt->orig_off);
+@@ -617,6 +652,10 @@ static int add_special_section_alts(stru
+ 			goto out;
+ 		}
+ 
++		/* Ignore retpoline alternatives. */
++		if (orig_insn->ignore_alts)
++			continue;
++
+ 		new_insn = NULL;
+ 		if (!special_alt->group || special_alt->new_len) {
+ 			new_insn = find_insn(file, special_alt->new_sec,
+@@ -642,6 +681,13 @@ static int add_special_section_alts(stru
+ 				goto out;
+ 		}
+ 
++		alt = malloc(sizeof(*alt));
++		if (!alt) {
++			WARN("malloc failed");
++			ret = -1;
++			goto out;
++		}
++
+ 		alt->insn = new_insn;
+ 		list_add_tail(&alt->list, &orig_insn->alts);
+ 
+@@ -861,6 +907,10 @@ static int decode_sections(struct objtoo
+ 
+ 	add_ignores(file);
+ 
++	ret = add_nospec_ignores(file);
++	if (ret)
++		return ret;
++
+ 	ret = add_jump_destinations(file);
+ 	if (ret)
+ 		return ret;
diff --git a/queue/objtool-detect-jumps-to-retpoline-thunks.patch b/queue/objtool-detect-jumps-to-retpoline-thunks.patch
new file mode 100644
index 0000000..0079121
--- /dev/null
+++ b/queue/objtool-detect-jumps-to-retpoline-thunks.patch
@@ -0,0 +1,61 @@
+From 39b735332cb8b33a27c28592d969e4016c86c3ea Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 11 Jan 2018 21:46:23 +0000
+Subject: objtool: Detect jumps to retpoline thunks
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 39b735332cb8b33a27c28592d969e4016c86c3ea upstream.
+
+A direct jump to a retpoline thunk is really an indirect jump in
+disguise.  Change the objtool instruction type accordingly.
+
+Objtool needs to know where indirect branches are so it can detect
+switch statement jump tables.
+
+This fixes a bunch of warnings with CONFIG_RETPOLINE like:
+
+  arch/x86/events/intel/uncore_nhmex.o: warning: objtool: nhmex_rbox_msr_enable_event()+0x44: sibling call from callable instruction with modified stack frame
+  kernel/signal.o: warning: objtool: copy_siginfo_to_user()+0x91: sibling call from callable instruction with modified stack frame
+  ...
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-2-git-send-email-dwmw@amazon.co.uk
+[dwmw2: Applies to tools/objtool/builtin-check.c not check.c]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/builtin-check.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -382,6 +382,13 @@ static int add_jump_destinations(struct
+ 		} else if (rela->sym->sec->idx) {
+ 			dest_sec = rela->sym->sec;
+ 			dest_off = rela->sym->sym.st_value + rela->addend + 4;
++		} else if (strstr(rela->sym->name, "_indirect_thunk_")) {
++			/*
++			 * Retpoline jumps are really dynamic jumps in
++			 * disguise, so convert them accordingly.
++			 */
++			insn->type = INSN_JUMP_DYNAMIC;
++			continue;
+ 		} else {
+ 			/* sibling call */
+ 			insn->jump_dest = 0;
diff --git a/queue/objtool-fix-retpoline-support-for-pre-orc-objtool.patch b/queue/objtool-fix-retpoline-support-for-pre-orc-objtool.patch
new file mode 100644
index 0000000..a773f63
--- /dev/null
+++ b/queue/objtool-fix-retpoline-support-for-pre-orc-objtool.patch
@@ -0,0 +1,45 @@
+From jpoimboe@redhat.com  Mon Jan 15 18:44:58 2018
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 15 Jan 2018 11:00:54 -0600
+Subject: objtool: Fix retpoline support for pre-ORC objtool
+To: David Woodhouse <dwmw2@infradead.org>
+Cc: gregkh@linuxfoundation.org, ak@linux.intel.com, dave.hansen@intel.com, gregkh@linux-foundation.org, jikos@kernel.org, keescook@google.com, luto@amacapital.net, peterz@infradead.org, pjt@google.com, riel@redhat.com, tglx@linutronix.de, tim.c.chen@linux.intel.com, torvalds@linux-foundation.org, stable@vger.kernel.org, stable-commits@vger.kernel.org
+Message-ID: <20180115170054.6baepkgihtla4nub@treble>
+Content-Disposition: inline
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+Objtool 1.0 (pre-ORC) produces the following warning when it encounters
+a retpoline:
+
+  arch/x86/crypto/camellia-aesni-avx2-asm_64.o: warning: objtool: .altinstr_replacement+0xf: return instruction outside of a callable function
+
+That warning is meant to catch GCC bugs and missing ENTRY/ENDPROC
+annotations, neither of which are applicable to alternatives.  Silence
+the warning for alternative instructions, just like objtool 2.0 already
+does.
+
+Reported-by: David Woodhouse <dwmw2@infradead.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/builtin-check.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -1230,6 +1230,14 @@ static int validate_uncallable_instructi
+ 
+ 	for_each_insn(file, insn) {
+ 		if (!insn->visited && insn->type == INSN_RETURN) {
++
++			/*
++			 * Don't warn about call instructions in unvisited
++			 * retpoline alternatives.
++			 */
++			if (!strcmp(insn->sec->name, ".altinstr_replacement"))
++				continue;
++
+ 			WARN_FUNC("return instruction outside of a callable function",
+ 				  insn->sec, insn->offset);
+ 			warnings++;
diff --git a/queue/objtool-modules-discard-objtool-annotation-sections-for-modules.patch b/queue/objtool-modules-discard-objtool-annotation-sections-for-modules.patch
new file mode 100644
index 0000000..ceb2b5f
--- /dev/null
+++ b/queue/objtool-modules-discard-objtool-annotation-sections-for-modules.patch
@@ -0,0 +1,84 @@
+From e390f9a9689a42f477a6073e2e7df530a4c1b740 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Wed, 1 Mar 2017 12:04:44 -0600
+Subject: objtool, modules: Discard objtool annotation sections for modules
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit e390f9a9689a42f477a6073e2e7df530a4c1b740 upstream.
+
+The '__unreachable' and '__func_stack_frame_non_standard' sections are
+only used at compile time.  They're discarded for vmlinux but they
+should also be discarded for modules.
+
+Since this is a recurring pattern, prefix the section names with
+".discard.".  It's a nice convention and vmlinux.lds.h already discards
+such sections.
+
+Also remove the 'a' (allocatable) flag from the __unreachable section
+since it doesn't make sense for a discarded section.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Jessica Yu <jeyu@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
+Link: http://lkml.kernel.org/r/20170301180444.lhd53c5tibc4ns77@treble
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+[dwmw2: Remove the unreachable part in backporting since it's not here yet]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.ku>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/frame.h         |    2 +-
+ scripts/mod/modpost.c         |    1 +
+ scripts/module-common.lds     |    5 ++++-
+ tools/objtool/builtin-check.c |    2 +-
+ 4 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/include/linux/frame.h
++++ b/include/linux/frame.h
+@@ -11,7 +11,7 @@
+  * For more information, see tools/objtool/Documentation/stack-validation.txt.
+  */
+ #define STACK_FRAME_NON_STANDARD(func) \
+-	static void __used __section(__func_stack_frame_non_standard) \
++	static void __used __section(.discard.func_stack_frame_non_standard) \
+ 		*__func_stack_frame_non_standard_##func = func
+ 
+ #else /* !CONFIG_STACK_VALIDATION */
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -838,6 +838,7 @@ static const char *const section_white_l
+ 	".cmem*",			/* EZchip */
+ 	".fmt_slot*",			/* EZchip */
+ 	".gnu.lto*",
++	".discard.*",
+ 	NULL
+ };
+ 
+--- a/scripts/module-common.lds
++++ b/scripts/module-common.lds
+@@ -4,7 +4,10 @@
+  * combine them automatically.
+  */
+ SECTIONS {
+-	/DISCARD/ : { *(.discard) }
++	/DISCARD/ : {
++		*(.discard)
++		*(.discard.*)
++	}
+ 
+ 	__ksymtab		0 : { *(SORT(___ksymtab+*)) }
+ 	__ksymtab_gpl		0 : { *(SORT(___ksymtab_gpl+*)) }
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -1229,7 +1229,7 @@ int cmd_check(int argc, const char **arg
+ 
+ 	INIT_LIST_HEAD(&file.insn_list);
+ 	hash_init(file.insn_hash);
+-	file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard");
++	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
+ 	file.rodata = find_section_by_name(file.elf, ".rodata");
+ 	file.ignore_unreachables = false;
+ 	file.c_file = find_section_by_name(file.elf, ".comment");
diff --git a/queue/retpoline-introduce-start-end-markers-of-indirect-thunk.patch b/queue/retpoline-introduce-start-end-markers-of-indirect-thunk.patch
new file mode 100644
index 0000000..3fdc39b
--- /dev/null
+++ b/queue/retpoline-introduce-start-end-markers-of-indirect-thunk.patch
@@ -0,0 +1,71 @@
+From 736e80a4213e9bbce40a7c050337047128b472ac Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 19 Jan 2018 01:14:21 +0900
+Subject: retpoline: Introduce start/end markers of indirect thunk
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 736e80a4213e9bbce40a7c050337047128b472ac upstream.
+
+Introduce start/end markers of __x86_indirect_thunk_* functions.
+To make it easy, consolidate .text.__x86.indirect_thunk.* sections
+to one .text.__x86.indirect_thunk section and put it in the
+end of kernel text section and adds __indirect_thunk_start/end
+so that other subsystem (e.g. kprobes) can identify it.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Link: https://lkml.kernel.org/r/151629206178.10241.6828804696410044771.stgit@devbox
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/nospec-branch.h |    3 +++
+ arch/x86/kernel/vmlinux.lds.S        |    7 +++++++
+ arch/x86/lib/retpoline.S             |    2 +-
+ 3 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -194,6 +194,9 @@ enum spectre_v2_mitigation {
+ 	SPECTRE_V2_IBRS,
+ };
+ 
++extern char __indirect_thunk_start[];
++extern char __indirect_thunk_end[];
++
+ /*
+  * On VMEXIT we must ensure that no RSB predictions learned in the guest
+  * can be followed in the host, by overwriting the RSB completely. Both
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -105,6 +105,13 @@ SECTIONS
+ 		SOFTIRQENTRY_TEXT
+ 		*(.fixup)
+ 		*(.gnu.warning)
++
++#ifdef CONFIG_RETPOLINE
++		__indirect_thunk_start = .;
++		*(.text.__x86.indirect_thunk)
++		__indirect_thunk_end = .;
++#endif
++
+ 		/* End of text section */
+ 		_etext = .;
+ 	} :text = 0x9090
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -9,7 +9,7 @@
+ #include <asm/nospec-branch.h>
+ 
+ .macro THUNK reg
+-	.section .text.__x86.indirect_thunk.\reg
++	.section .text.__x86.indirect_thunk
+ 
+ ENTRY(__x86_indirect_thunk_\reg)
+ 	CFI_STARTPROC
diff --git a/queue/selftests-x86-add-test_vsyscall.patch b/queue/selftests-x86-add-test_vsyscall.patch
new file mode 100644
index 0000000..73478ec
--- /dev/null
+++ b/queue/selftests-x86-add-test_vsyscall.patch
@@ -0,0 +1,569 @@
+From 6fcf09dcfd33e93cfe1808fcb9474087dd40cc05 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 11 Jan 2018 17:16:51 -0800
+Subject: [PATCH] selftests/x86: Add test_vsyscall
+
+commit 352909b49ba0d74929b96af6dfbefc854ab6ebb5 upstream.
+
+This tests that the vsyscall entries do what they're expected to do.
+It also confirms that attempts to read the vsyscall page behave as
+expected.
+
+If changes are made to the vsyscall code or its memory map handling,
+running this test in all three of vsyscall=none, vsyscall=emulate,
+and vsyscall=native are helpful.
+
+(Because it's easy, this also compares the vsyscall results to their
+ vDSO equivalents.)
+
+Note to KAISER backporters: please test this under all three
+vsyscall modes.  Also, in the emulate and native modes, make sure
+that test_vsyscall_64 agrees with the command line or config
+option as to which mode you're in.  It's quite easy to mess up
+the kernel such that native mode accidentally emulates
+or vice versa.
+
+Greg, etc: please backport this to all your Meltdown-patched
+kernels.  It'll help make sure the patches didn't regress
+vsyscalls.
+
+CSigned-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: http://lkml.kernel.org/r/2b9c5a174c1d60fd7774461d518aa75598b1d8fd.1515719552.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
+index 4f747ee07f10..38dbdf4bfd89 100644
+--- a/tools/testing/selftests/x86/Makefile
++++ b/tools/testing/selftests/x86/Makefile
+@@ -5,7 +5,7 @@ include ../lib.mk
+ .PHONY: all all_32 all_64 warn_32bit_failure clean
+ 
+ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
+-			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test
++			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test test_vsyscall
+ TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
+ 			test_FCMOV test_FCOMI test_FISTTP \
+ 			vdso_restorer
+diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
+new file mode 100644
+index 000000000000..6e0bd52ad53d
+--- /dev/null
++++ b/tools/testing/selftests/x86/test_vsyscall.c
+@@ -0,0 +1,500 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++#define _GNU_SOURCE
++
++#include <stdio.h>
++#include <sys/time.h>
++#include <time.h>
++#include <stdlib.h>
++#include <sys/syscall.h>
++#include <unistd.h>
++#include <dlfcn.h>
++#include <string.h>
++#include <inttypes.h>
++#include <signal.h>
++#include <sys/ucontext.h>
++#include <errno.h>
++#include <err.h>
++#include <sched.h>
++#include <stdbool.h>
++#include <setjmp.h>
++
++#ifdef __x86_64__
++# define VSYS(x) (x)
++#else
++# define VSYS(x) 0
++#endif
++
++#ifndef SYS_getcpu
++# ifdef __x86_64__
++#  define SYS_getcpu 309
++# else
++#  define SYS_getcpu 318
++# endif
++#endif
++
++static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
++		       int flags)
++{
++	struct sigaction sa;
++	memset(&sa, 0, sizeof(sa));
++	sa.sa_sigaction = handler;
++	sa.sa_flags = SA_SIGINFO | flags;
++	sigemptyset(&sa.sa_mask);
++	if (sigaction(sig, &sa, 0))
++		err(1, "sigaction");
++}
++
++/* vsyscalls and vDSO */
++bool should_read_vsyscall = false;
++
++typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
++gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
++gtod_t vdso_gtod;
++
++typedef int (*vgettime_t)(clockid_t, struct timespec *);
++vgettime_t vdso_gettime;
++
++typedef long (*time_func_t)(time_t *t);
++time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
++time_func_t vdso_time;
++
++typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
++getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
++getcpu_t vdso_getcpu;
++
++static void init_vdso(void)
++{
++	void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
++	if (!vdso)
++		vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
++	if (!vdso) {
++		printf("[WARN]\tfailed to find vDSO\n");
++		return;
++	}
++
++	vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
++	if (!vdso_gtod)
++		printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
++
++	vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
++	if (!vdso_gettime)
++		printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
++
++	vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
++	if (!vdso_time)
++		printf("[WARN]\tfailed to find time in vDSO\n");
++
++	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
++	if (!vdso_getcpu) {
++		/* getcpu() was never wired up in the 32-bit vDSO. */
++		printf("[%s]\tfailed to find getcpu in vDSO\n",
++		       sizeof(long) == 8 ? "WARN" : "NOTE");
++	}
++}
++
++static int init_vsys(void)
++{
++#ifdef __x86_64__
++	int nerrs = 0;
++	FILE *maps;
++	char line[128];
++	bool found = false;
++
++	maps = fopen("/proc/self/maps", "r");
++	if (!maps) {
++		printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
++		should_read_vsyscall = true;
++		return 0;
++	}
++
++	while (fgets(line, sizeof(line), maps)) {
++		char r, x;
++		void *start, *end;
++		char name[128];
++		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
++			   &start, &end, &r, &x, name) != 5)
++			continue;
++
++		if (strcmp(name, "[vsyscall]"))
++			continue;
++
++		printf("\tvsyscall map: %s", line);
++
++		if (start != (void *)0xffffffffff600000 ||
++		    end != (void *)0xffffffffff601000) {
++			printf("[FAIL]\taddress range is nonsense\n");
++			nerrs++;
++		}
++
++		printf("\tvsyscall permissions are %c-%c\n", r, x);
++		should_read_vsyscall = (r == 'r');
++		if (x != 'x') {
++			vgtod = NULL;
++			vtime = NULL;
++			vgetcpu = NULL;
++		}
++
++		found = true;
++		break;
++	}
++
++	fclose(maps);
++
++	if (!found) {
++		printf("\tno vsyscall map in /proc/self/maps\n");
++		should_read_vsyscall = false;
++		vgtod = NULL;
++		vtime = NULL;
++		vgetcpu = NULL;
++	}
++
++	return nerrs;
++#else
++	return 0;
++#endif
++}
++
++/* syscalls */
++static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
++{
++	return syscall(SYS_gettimeofday, tv, tz);
++}
++
++static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
++{
++	return syscall(SYS_clock_gettime, id, ts);
++}
++
++static inline long sys_time(time_t *t)
++{
++	return syscall(SYS_time, t);
++}
++
++static inline long sys_getcpu(unsigned * cpu, unsigned * node,
++			      void* cache)
++{
++	return syscall(SYS_getcpu, cpu, node, cache);
++}
++
++static jmp_buf jmpbuf;
++
++static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
++{
++	siglongjmp(jmpbuf, 1);
++}
++
++static double tv_diff(const struct timeval *a, const struct timeval *b)
++{
++	return (double)(a->tv_sec - b->tv_sec) +
++		(double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
++}
++
++static int check_gtod(const struct timeval *tv_sys1,
++		      const struct timeval *tv_sys2,
++		      const struct timezone *tz_sys,
++		      const char *which,
++		      const struct timeval *tv_other,
++		      const struct timezone *tz_other)
++{
++	int nerrs = 0;
++	double d1, d2;
++
++	if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
++		printf("[FAIL] %s tz mismatch\n", which);
++		nerrs++;
++	}
++
++	d1 = tv_diff(tv_other, tv_sys1);
++	d2 = tv_diff(tv_sys2, tv_other);
++	printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
++
++	if (d1 < 0 || d2 < 0) {
++		printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
++		nerrs++;
++	} else {
++		printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
++	}
++
++	return nerrs;
++}
++
++static int test_gtod(void)
++{
++	struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
++	struct timezone tz_sys, tz_vdso, tz_vsys;
++	long ret_vdso = -1;
++	long ret_vsys = -1;
++	int nerrs = 0;
++
++	printf("[RUN]\ttest gettimeofday()\n");
++
++	if (sys_gtod(&tv_sys1, &tz_sys) != 0)
++		err(1, "syscall gettimeofday");
++	if (vdso_gtod)
++		ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
++	if (vgtod)
++		ret_vsys = vgtod(&tv_vsys, &tz_vsys);
++	if (sys_gtod(&tv_sys2, &tz_sys) != 0)
++		err(1, "syscall gettimeofday");
++
++	if (vdso_gtod) {
++		if (ret_vdso == 0) {
++			nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
++		} else {
++			printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
++			nerrs++;
++		}
++	}
++
++	if (vgtod) {
++		if (ret_vsys == 0) {
++			nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
++		} else {
++			printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
++			nerrs++;
++		}
++	}
++
++	return nerrs;
++}
++
++static int test_time(void) {
++	int nerrs = 0;
++
++	printf("[RUN]\ttest time()\n");
++	long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
++	long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
++	t_sys1 = sys_time(&t2_sys1);
++	if (vdso_time)
++		t_vdso = vdso_time(&t2_vdso);
++	if (vtime)
++		t_vsys = vtime(&t2_vsys);
++	t_sys2 = sys_time(&t2_sys2);
++	if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
++		printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
++		nerrs++;
++		return nerrs;
++	}
++
++	if (vdso_time) {
++		if (t_vdso < 0 || t_vdso != t2_vdso) {
++			printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
++			nerrs++;
++		} else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
++			printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
++			nerrs++;
++		} else {
++			printf("[OK]\tvDSO time() is okay\n");
++		}
++	}
++
++	if (vtime) {
++		if (t_vsys < 0 || t_vsys != t2_vsys) {
++			printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
++			nerrs++;
++		} else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
++			printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
++			nerrs++;
++		} else {
++			printf("[OK]\tvsyscall time() is okay\n");
++		}
++	}
++
++	return nerrs;
++}
++
++static int test_getcpu(int cpu)
++{
++	int nerrs = 0;
++	long ret_sys, ret_vdso = -1, ret_vsys = -1;
++
++	printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
++
++	cpu_set_t cpuset;
++	CPU_ZERO(&cpuset);
++	CPU_SET(cpu, &cpuset);
++	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
++		printf("[SKIP]\tfailed to force CPU %d\n", cpu);
++		return nerrs;
++	}
++
++	unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
++	unsigned node = 0;
++	bool have_node = false;
++	ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
++	if (vdso_getcpu)
++		ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
++	if (vgetcpu)
++		ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
++
++	if (ret_sys == 0) {
++		if (cpu_sys != cpu) {
++			printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
++			nerrs++;
++		}
++
++		have_node = true;
++		node = node_sys;
++	}
++
++	if (vdso_getcpu) {
++		if (ret_vdso) {
++			printf("[FAIL]\tvDSO getcpu() failed\n");
++			nerrs++;
++		} else {
++			if (!have_node) {
++				have_node = true;
++				node = node_vdso;
++			}
++
++			if (cpu_vdso != cpu) {
++				printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
++				nerrs++;
++			} else {
++				printf("[OK]\tvDSO reported correct CPU\n");
++			}
++
++			if (node_vdso != node) {
++				printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
++				nerrs++;
++			} else {
++				printf("[OK]\tvDSO reported correct node\n");
++			}
++		}
++	}
++
++	if (vgetcpu) {
++		if (ret_vsys) {
++			printf("[FAIL]\tvsyscall getcpu() failed\n");
++			nerrs++;
++		} else {
++			if (!have_node) {
++				have_node = true;
++				node = node_vsys;
++			}
++
++			if (cpu_vsys != cpu) {
++				printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
++				nerrs++;
++			} else {
++				printf("[OK]\tvsyscall reported correct CPU\n");
++			}
++
++			if (node_vsys != node) {
++				printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
++				nerrs++;
++			} else {
++				printf("[OK]\tvsyscall reported correct node\n");
++			}
++		}
++	}
++
++	return nerrs;
++}
++
++static int test_vsys_r(void)
++{
++#ifdef __x86_64__
++	printf("[RUN]\tChecking read access to the vsyscall page\n");
++	bool can_read;
++	if (sigsetjmp(jmpbuf, 1) == 0) {
++		*(volatile int *)0xffffffffff600000;
++		can_read = true;
++	} else {
++		can_read = false;
++	}
++
++	if (can_read && !should_read_vsyscall) {
++		printf("[FAIL]\tWe have read access, but we shouldn't\n");
++		return 1;
++	} else if (!can_read && should_read_vsyscall) {
++		printf("[FAIL]\tWe don't have read access, but we should\n");
++		return 1;
++	} else {
++		printf("[OK]\tgot expected result\n");
++	}
++#endif
++
++	return 0;
++}
++
++
++#ifdef __x86_64__
++#define X86_EFLAGS_TF (1UL << 8)
++static volatile sig_atomic_t num_vsyscall_traps;
++
++static unsigned long get_eflags(void)
++{
++	unsigned long eflags;
++	asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
++	return eflags;
++}
++
++static void set_eflags(unsigned long eflags)
++{
++	asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
++}
++
++static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
++{
++	ucontext_t *ctx = (ucontext_t *)ctx_void;
++	unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
++
++	if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
++		num_vsyscall_traps++;
++}
++
++static int test_native_vsyscall(void)
++{
++	time_t tmp;
++	bool is_native;
++
++	if (!vtime)
++		return 0;
++
++	printf("[RUN]\tchecking for native vsyscall\n");
++	sethandler(SIGTRAP, sigtrap, 0);
++	set_eflags(get_eflags() | X86_EFLAGS_TF);
++	vtime(&tmp);
++	set_eflags(get_eflags() & ~X86_EFLAGS_TF);
++
++	/*
++	 * If vsyscalls are emulated, we expect a single trap in the
++	 * vsyscall page -- the call instruction will trap with RIP
++	 * pointing to the entry point before emulation takes over.
++	 * In native mode, we expect two traps, since whatever code
++	 * the vsyscall page contains will be more than just a ret
++	 * instruction.
++	 */
++	is_native = (num_vsyscall_traps > 1);
++
++	printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
++	       (is_native ? "native" : "emulated"),
++	       (int)num_vsyscall_traps);
++
++	return 0;
++}
++#endif
++
++int main(int argc, char **argv)
++{
++	int nerrs = 0;
++
++	init_vdso();
++	nerrs += init_vsys();
++
++	nerrs += test_gtod();
++	nerrs += test_time();
++	nerrs += test_getcpu(0);
++	nerrs += test_getcpu(1);
++
++	sethandler(SIGSEGV, sigsegv, 0);
++	nerrs += test_vsys_r();
++
++#ifdef __x86_64__
++	nerrs += test_native_vsyscall();
++#endif
++
++	return nerrs ? 1 : 0;
++}
+-- 
+2.15.0
+
diff --git a/queue/sysfs-cpu-add-vulnerability-folder.patch b/queue/sysfs-cpu-add-vulnerability-folder.patch
new file mode 100644
index 0000000..92522ac
--- /dev/null
+++ b/queue/sysfs-cpu-add-vulnerability-folder.patch
@@ -0,0 +1,149 @@
+From 87590ce6e373d1a5401f6539f0c59ef92dd924a9 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 7 Jan 2018 22:48:00 +0100
+Subject: sysfs/cpu: Add vulnerability folder
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 87590ce6e373d1a5401f6539f0c59ef92dd924a9 upstream.
+
+As the meltdown/spectre problem affects several CPU architectures, it makes
+sense to have common way to express whether a system is affected by a
+particular vulnerability or not. If affected the way to express the
+mitigation should be common as well.
+
+Create /sys/devices/system/cpu/vulnerabilities folder and files for
+meltdown, spectre_v1 and spectre_v2.
+
+Allow architectures to override the show function.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Link: https://lkml.kernel.org/r/20180107214913.096657732@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/ABI/testing/sysfs-devices-system-cpu |   16 +++++++
+ drivers/base/Kconfig                               |    3 +
+ drivers/base/cpu.c                                 |   48 +++++++++++++++++++++
+ include/linux/cpu.h                                |    7 +++
+ 4 files changed, 74 insertions(+)
+
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -350,3 +350,19 @@ Contact:	Linux ARM Kernel Mailing list <
+ Description:	AArch64 CPU registers
+ 		'identification' directory exposes the CPU ID registers for
+ 		 identifying model and revision of the CPU.
++
++What:		/sys/devices/system/cpu/vulnerabilities
++		/sys/devices/system/cpu/vulnerabilities/meltdown
++		/sys/devices/system/cpu/vulnerabilities/spectre_v1
++		/sys/devices/system/cpu/vulnerabilities/spectre_v2
++Date:		Januar 2018
++Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
++Description:	Information about CPU vulnerabilities
++
++		The files are named after the code names of CPU
++		vulnerabilities. The output of those files reflects the
++		state of the CPUs in the system. Possible output values:
++
++		"Not affected"	  CPU is not affected by the vulnerability
++		"Vulnerable"	  CPU is affected and no mitigation in effect
++		"Mitigation: $M"  CPU is affetcted and mitigation $M is in effect
+--- a/drivers/base/Kconfig
++++ b/drivers/base/Kconfig
+@@ -235,6 +235,9 @@ config GENERIC_CPU_DEVICES
+ config GENERIC_CPU_AUTOPROBE
+ 	bool
+ 
++config GENERIC_CPU_VULNERABILITIES
++	bool
++
+ config SOC_BUS
+ 	bool
+ 
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -499,10 +499,58 @@ static void __init cpu_dev_register_gene
+ #endif
+ }
+ 
++#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
++
++ssize_t __weak cpu_show_meltdown(struct device *dev,
++				 struct device_attribute *attr, char *buf)
++{
++	return sprintf(buf, "Not affected\n");
++}
++
++ssize_t __weak cpu_show_spectre_v1(struct device *dev,
++				   struct device_attribute *attr, char *buf)
++{
++	return sprintf(buf, "Not affected\n");
++}
++
++ssize_t __weak cpu_show_spectre_v2(struct device *dev,
++				   struct device_attribute *attr, char *buf)
++{
++	return sprintf(buf, "Not affected\n");
++}
++
++static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
++static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
++static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
++
++static struct attribute *cpu_root_vulnerabilities_attrs[] = {
++	&dev_attr_meltdown.attr,
++	&dev_attr_spectre_v1.attr,
++	&dev_attr_spectre_v2.attr,
++	NULL
++};
++
++static const struct attribute_group cpu_root_vulnerabilities_group = {
++	.name  = "vulnerabilities",
++	.attrs = cpu_root_vulnerabilities_attrs,
++};
++
++static void __init cpu_register_vulnerabilities(void)
++{
++	if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
++			       &cpu_root_vulnerabilities_group))
++		pr_err("Unable to register CPU vulnerabilities\n");
++}
++
++#else
++static inline void cpu_register_vulnerabilities(void) { }
++#endif
++
+ void __init cpu_dev_init(void)
+ {
+ 	if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
+ 		panic("Failed to register CPU subsystem");
+ 
+ 	cpu_dev_register_generic();
++	cpu_register_vulnerabilities();
+ }
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -44,6 +44,13 @@ extern void cpu_remove_dev_attr(struct d
+ extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
+ extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
+ 
++extern ssize_t cpu_show_meltdown(struct device *dev,
++				 struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_spectre_v1(struct device *dev,
++				   struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_spectre_v2(struct device *dev,
++				   struct device_attribute *attr, char *buf);
++
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
+ 				 const struct attribute_group **groups,
diff --git a/queue/sysfs-cpu-fix-typos-in-vulnerability-documentation.patch b/queue/sysfs-cpu-fix-typos-in-vulnerability-documentation.patch
new file mode 100644
index 0000000..181d38d
--- /dev/null
+++ b/queue/sysfs-cpu-fix-typos-in-vulnerability-documentation.patch
@@ -0,0 +1,35 @@
+From 9ecccfaa7cb5249bd31bdceb93fcf5bedb8a24d8 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Tue, 9 Jan 2018 15:02:51 +0000
+Subject: sysfs/cpu: Fix typos in vulnerability documentation
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 9ecccfaa7cb5249bd31bdceb93fcf5bedb8a24d8 upstream.
+
+Fixes: 87590ce6e ("sysfs/cpu: Add vulnerability folder")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/ABI/testing/sysfs-devices-system-cpu |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -355,7 +355,7 @@ What:		/sys/devices/system/cpu/vulnerabi
+ 		/sys/devices/system/cpu/vulnerabilities/meltdown
+ 		/sys/devices/system/cpu/vulnerabilities/spectre_v1
+ 		/sys/devices/system/cpu/vulnerabilities/spectre_v2
+-Date:		Januar 2018
++Date:		January 2018
+ Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+ Description:	Information about CPU vulnerabilities
+ 
+@@ -365,4 +365,4 @@ Description:	Information about CPU vulne
+ 
+ 		"Not affected"	  CPU is not affected by the vulnerability
+ 		"Vulnerable"	  CPU is affected and no mitigation in effect
+-		"Mitigation: $M"  CPU is affetcted and mitigation $M is in effect
++		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
diff --git a/queue/vfs-fdtable-prevent-bounds-check-bypass-via-speculative-execution.patch b/queue/vfs-fdtable-prevent-bounds-check-bypass-via-speculative-execution.patch
new file mode 100644
index 0000000..6ed1c28
--- /dev/null
+++ b/queue/vfs-fdtable-prevent-bounds-check-bypass-via-speculative-execution.patch
@@ -0,0 +1,54 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:03:05 -0800
+Subject: vfs, fdtable: Prevent bounds-check bypass via speculative execution
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit 56c30ba7b348b90484969054d561f711ba196507)
+
+'fd' is a user controlled value that is used as a data dependency to
+read from the 'fdt->fd' array.  In order to avoid potential leaks of
+kernel memory values, block speculative execution of the instruction
+stream that could issue reads based on an invalid 'file *' returned from
+__fcheck_files.
+
+Co-developed-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: torvalds@linux-foundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727418500.33451.17392199002892248656.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/fdtable.h |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/linux/fdtable.h
++++ b/include/linux/fdtable.h
+@@ -9,6 +9,7 @@
+ #include <linux/compiler.h>
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
++#include <linux/nospec.h>
+ #include <linux/types.h>
+ #include <linux/init.h>
+ #include <linux/fs.h>
+@@ -81,8 +82,10 @@ static inline struct file *__fcheck_file
+ {
+ 	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+ 
+-	if (fd < fdt->max_fds)
++	if (fd < fdt->max_fds) {
++		fd = array_index_nospec(fd, fdt->max_fds);
+ 		return rcu_dereference_raw(fdt->fd[fd]);
++	}
+ 	return NULL;
+ }
+ 
diff --git a/queue/vsyscall-fix-permissions-for-emulate-mode-with-kaiser-pti.patch b/queue/vsyscall-fix-permissions-for-emulate-mode-with-kaiser-pti.patch
new file mode 100644
index 0000000..bce5e67
--- /dev/null
+++ b/queue/vsyscall-fix-permissions-for-emulate-mode-with-kaiser-pti.patch
@@ -0,0 +1,72 @@
+From ben.hutchings@codethink.co.uk  Fri Jan 26 17:35:59 2018
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Fri, 26 Jan 2018 16:23:02 +0000
+Subject: vsyscall: Fix permissions for emulate mode with KAISER/PTI
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Borislav Petkov <bp@suse.de>, Hugh Dickins <hughd@google.com>, stable@vger.kernel.org
+Message-ID: <20180126162302.ei4tmiltl73npmr6@xylophone.i.decadent.org.uk>
+
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+
+The backport of KAISER to 4.4 turned vsyscall emulate mode into native
+mode.  Add a vsyscall_pgprot variable to hold the correct page
+protections, like Borislav and Hugh did for 3.2 and 3.18.
+
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c |    7 ++++---
+ arch/x86/include/asm/vsyscall.h       |    1 +
+ arch/x86/mm/kaiser.c                  |    2 +-
+ 3 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -46,6 +46,7 @@ static enum { EMULATE, NATIVE, NONE } vs
+ #else
+ 	EMULATE;
+ #endif
++unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL;
+ 
+ static int __init vsyscall_setup(char *str)
+ {
+@@ -336,11 +337,11 @@ void __init map_vsyscall(void)
+ 	extern char __vsyscall_page;
+ 	unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
+ 
++	if (vsyscall_mode != NATIVE)
++		vsyscall_pgprot = __PAGE_KERNEL_VVAR;
+ 	if (vsyscall_mode != NONE)
+ 		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
+-			     vsyscall_mode == NATIVE
+-			     ? PAGE_KERNEL_VSYSCALL
+-			     : PAGE_KERNEL_VVAR);
++			     __pgprot(vsyscall_pgprot));
+ 
+ 	BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
+ 		     (unsigned long)VSYSCALL_ADDR);
+--- a/arch/x86/include/asm/vsyscall.h
++++ b/arch/x86/include/asm/vsyscall.h
+@@ -13,6 +13,7 @@ extern void map_vsyscall(void);
+  */
+ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+ extern bool vsyscall_enabled(void);
++extern unsigned long vsyscall_pgprot;
+ #else
+ static inline void map_vsyscall(void) {}
+ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -344,7 +344,7 @@ void __init kaiser_init(void)
+ 	if (vsyscall_enabled())
+ 		kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
+ 					  PAGE_SIZE,
+-					   __PAGE_KERNEL_VSYSCALL);
++					  vsyscall_pgprot);
+ 
+ 	for_each_possible_cpu(cpu) {
+ 		void *percpu_vaddr = __per_cpu_user_mapped_start +
diff --git a/queue/x86-alternatives-add-missing-n-at-end-of-alternative-inline-asm.patch b/queue/x86-alternatives-add-missing-n-at-end-of-alternative-inline-asm.patch
new file mode 100644
index 0000000..483e564
--- /dev/null
+++ b/queue/x86-alternatives-add-missing-n-at-end-of-alternative-inline-asm.patch
@@ -0,0 +1,56 @@
+From b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 4 Jan 2018 14:37:05 +0000
+Subject: x86/alternatives: Add missing '\n' at end of ALTERNATIVE inline asm
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 upstream.
+
+Where an ALTERNATIVE is used in the middle of an inline asm block, this
+would otherwise lead to the following instruction being appended directly
+to the trailing ".popsection", and a failed compile.
+
+Fixes: 9cebed423c84 ("x86, alternative: Use .pushsection/.popsection")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: ak@linux.intel.com
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Paul Turner <pjt@google.com>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20180104143710.8961-8-dwmw@amazon.co.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/alternative.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -139,7 +139,7 @@ static inline int alternatives_text_rese
+ 	".popsection\n"							\
+ 	".pushsection .altinstr_replacement, \"ax\"\n"			\
+ 	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
+-	".popsection"
++	".popsection\n"
+ 
+ #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
+ 	OLDINSTR_2(oldinstr, 1, 2)					\
+@@ -150,7 +150,7 @@ static inline int alternatives_text_rese
+ 	".pushsection .altinstr_replacement, \"ax\"\n"			\
+ 	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
+ 	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
+-	".popsection"
++	".popsection\n"
+ 
+ /*
+  * Alternative instructions for different CPU types or capabilities.
diff --git a/queue/x86-alternatives-fix-optimize_nops-checking.patch b/queue/x86-alternatives-fix-optimize_nops-checking.patch
new file mode 100644
index 0000000..3574563
--- /dev/null
+++ b/queue/x86-alternatives-fix-optimize_nops-checking.patch
@@ -0,0 +1,53 @@
+From 612e8e9350fd19cae6900cf36ea0c6892d1a0dca Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Wed, 10 Jan 2018 12:28:16 +0100
+Subject: x86/alternatives: Fix optimize_nops() checking
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 612e8e9350fd19cae6900cf36ea0c6892d1a0dca upstream.
+
+The alternatives code checks only the first byte whether it is a NOP, but
+with NOPs in front of the payload and having actual instructions after it
+breaks the "optimized' test.
+
+Make sure to scan all bytes before deciding to optimize the NOPs in there.
+
+Reported-by: David Woodhouse <dwmw2@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Andrew Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/20180110112815.mgciyf5acwacphkq@pd.tnic
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -340,9 +340,12 @@ done:
+ static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+ {
+ 	unsigned long flags;
++	int i;
+ 
+-	if (instr[0] != 0x90)
+-		return;
++	for (i = 0; i < a->padlen; i++) {
++		if (instr[i] != 0x90)
++			return;
++	}
+ 
+ 	local_irq_save(flags);
+ 	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
diff --git a/queue/x86-asm-move-status-from-thread_struct-to-thread_info.patch b/queue/x86-asm-move-status-from-thread_struct-to-thread_info.patch
new file mode 100644
index 0000000..b22fc0a
--- /dev/null
+++ b/queue/x86-asm-move-status-from-thread_struct-to-thread_info.patch
@@ -0,0 +1,172 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 Jan 2018 10:38:50 -0800
+Subject: x86/asm: Move 'status' from thread_struct to thread_info
+
+From: Andy Lutomirski <luto@kernel.org>
+
+
+(cherry picked from commit 37a8f7c38339b22b69876d6f5a0ab851565284e3)
+
+The TS_COMPAT bit is very hot and is accessed from code paths that mostly
+also touch thread_info::flags.  Move it into struct thread_info to improve
+cache locality.
+
+The only reason it was in thread_struct is that there was a brief period
+during which arch-specific fields were not allowed in struct thread_info.
+
+Linus suggested further changing:
+
+  ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+
+to:
+
+  if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED)))
+          ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+
+on the theory that frequently dirtying the cacheline even in pure 64-bit
+code that never needs to modify status hurts performance.  That could be a
+reasonable followup patch, but I suspect it matters less on top of this
+patch.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
+Link: https://lkml.kernel.org/r/03148bcc1b217100e6e8ecf6a5468c45cf4304b6.1517164461.git.luto@kernel.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/common.c            |    4 ++--
+ arch/x86/include/asm/processor.h   |    2 --
+ arch/x86/include/asm/syscall.h     |    6 +++---
+ arch/x86/include/asm/thread_info.h |    3 ++-
+ arch/x86/kernel/process_64.c       |    4 ++--
+ arch/x86/kernel/ptrace.c           |    2 +-
+ arch/x86/kernel/signal.c           |    2 +-
+ 7 files changed, 11 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/entry/common.c
++++ b/arch/x86/entry/common.c
+@@ -201,7 +201,7 @@ __visible inline void prepare_exit_to_us
+ 	 * special case only applies after poking regs and before the
+ 	 * very next return to user mode.
+ 	 */
+-	current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
++	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+ #endif
+ 
+ 	user_enter_irqoff();
+@@ -299,7 +299,7 @@ static __always_inline void do_syscall_3
+ 	unsigned int nr = (unsigned int)regs->orig_ax;
+ 
+ #ifdef CONFIG_IA32_EMULATION
+-	current->thread.status |= TS_COMPAT;
++	ti->status |= TS_COMPAT;
+ #endif
+ 
+ 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -391,8 +391,6 @@ struct thread_struct {
+ 	unsigned short		gsindex;
+ #endif
+ 
+-	u32			status;		/* thread synchronous flags */
+-
+ #ifdef CONFIG_X86_64
+ 	unsigned long		fsbase;
+ 	unsigned long		gsbase;
+--- a/arch/x86/include/asm/syscall.h
++++ b/arch/x86/include/asm/syscall.h
+@@ -60,7 +60,7 @@ static inline long syscall_get_error(str
+ 	 * TS_COMPAT is set for 32-bit syscall entries and then
+ 	 * remains set until we return to user mode.
+ 	 */
+-	if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
++	if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ 		/*
+ 		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+ 		 * and will match correctly in comparisons.
+@@ -116,7 +116,7 @@ static inline void syscall_get_arguments
+ 					 unsigned long *args)
+ {
+ # ifdef CONFIG_IA32_EMULATION
+-	if (task->thread.status & TS_COMPAT)
++	if (task->thread_info.status & TS_COMPAT)
+ 		switch (i) {
+ 		case 0:
+ 			if (!n--) break;
+@@ -177,7 +177,7 @@ static inline void syscall_set_arguments
+ 					 const unsigned long *args)
+ {
+ # ifdef CONFIG_IA32_EMULATION
+-	if (task->thread.status & TS_COMPAT)
++	if (task->thread_info.status & TS_COMPAT)
+ 		switch (i) {
+ 		case 0:
+ 			if (!n--) break;
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -54,6 +54,7 @@ struct task_struct;
+ 
+ struct thread_info {
+ 	unsigned long		flags;		/* low level flags */
++	u32			status;		/* thread synchronous flags */
+ };
+ 
+ #define INIT_THREAD_INFO(tsk)			\
+@@ -213,7 +214,7 @@ static inline int arch_within_stack_fram
+ #define in_ia32_syscall() true
+ #else
+ #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
+-			   current->thread.status & TS_COMPAT)
++			   current_thread_info()->status & TS_COMPAT)
+ #endif
+ 
+ /*
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -538,7 +538,7 @@ void set_personality_ia32(bool x32)
+ 		current->personality &= ~READ_IMPLIES_EXEC;
+ 		/* in_compat_syscall() uses the presence of the x32
+ 		   syscall bit flag to determine compat status */
+-		current->thread.status &= ~TS_COMPAT;
++		current_thread_info()->status &= ~TS_COMPAT;
+ 	} else {
+ 		set_thread_flag(TIF_IA32);
+ 		clear_thread_flag(TIF_X32);
+@@ -546,7 +546,7 @@ void set_personality_ia32(bool x32)
+ 			current->mm->context.ia32_compat = TIF_IA32;
+ 		current->personality |= force_personality32;
+ 		/* Prepare the first "return" to user space */
+-		current->thread.status |= TS_COMPAT;
++		current_thread_info()->status |= TS_COMPAT;
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(set_personality_ia32);
+--- a/arch/x86/kernel/ptrace.c
++++ b/arch/x86/kernel/ptrace.c
+@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *
+ 		 */
+ 		regs->orig_ax = value;
+ 		if (syscall_get_nr(child, regs) >= 0)
+-			child->thread.status |= TS_I386_REGS_POKED;
++			child->thread_info.status |= TS_I386_REGS_POKED;
+ 		break;
+ 
+ 	case offsetof(struct user32, regs.eflags):
+--- a/arch/x86/kernel/signal.c
++++ b/arch/x86/kernel/signal.c
+@@ -785,7 +785,7 @@ static inline unsigned long get_nr_resta
+ 	 * than the tracee.
+ 	 */
+ #ifdef CONFIG_IA32_EMULATION
+-	if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
++	if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
+ 		return __NR_ia32_restart_syscall;
+ #endif
+ #ifdef CONFIG_X86_X32_ABI
diff --git a/queue/x86-asm-use-register-variable-to-get-stack-pointer-value.patch b/queue/x86-asm-use-register-variable-to-get-stack-pointer-value.patch
new file mode 100644
index 0000000..d1036be
--- /dev/null
+++ b/queue/x86-asm-use-register-variable-to-get-stack-pointer-value.patch
@@ -0,0 +1,138 @@
+From 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Fri, 29 Sep 2017 17:15:36 +0300
+Subject: x86/asm: Use register variable to get stack pointer value
+
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+
+commit 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc upstream.
+
+Currently we use current_stack_pointer() function to get the value
+of the stack pointer register. Since commit:
+
+  f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang")
+
+... we have a stack register variable declared. It can be used instead of
+current_stack_pointer() function which allows to optimize away some
+excessive "mov %rsp, %<dst>" instructions:
+
+ -mov    %rsp,%rdx
+ -sub    %rdx,%rax
+ -cmp    $0x3fff,%rax
+ -ja     ffffffff810722fd <ist_begin_non_atomic+0x2d>
+
+ +sub    %rsp,%rax
+ +cmp    $0x3fff,%rax
+ +ja     ffffffff810722fa <ist_begin_non_atomic+0x2a>
+
+Remove current_stack_pointer(), rename __asm_call_sp to current_stack_pointer
+and use it instead of the removed function.
+
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20170929141537.29167-1-aryabinin@virtuozzo.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+[dwmw2: We want ASM_CALL_CONSTRAINT for retpoline]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.ku>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm.h         |   11 +++++++++++
+ arch/x86/include/asm/thread_info.h |   11 -----------
+ arch/x86/kernel/irq_32.c           |    6 +++---
+ arch/x86/kernel/traps.c            |    2 +-
+ arch/x86/mm/tlb.c                  |    2 +-
+ 5 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/include/asm/asm.h
++++ b/arch/x86/include/asm/asm.h
+@@ -125,4 +125,15 @@
+ /* For C file, we already have NOKPROBE_SYMBOL macro */
+ #endif
+ 
++#ifndef __ASSEMBLY__
++/*
++ * This output constraint should be used for any inline asm which has a "call"
++ * instruction.  Otherwise the asm may be inserted before the frame pointer
++ * gets set up by the containing function.  If you forget to do this, objtool
++ * may print a "call without frame pointer save/setup" warning.
++ */
++register unsigned long current_stack_pointer asm(_ASM_SP);
++#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
++#endif
++
+ #endif /* _ASM_X86_ASM_H */
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -152,17 +152,6 @@ struct thread_info {
+  */
+ #ifndef __ASSEMBLY__
+ 
+-static inline unsigned long current_stack_pointer(void)
+-{
+-	unsigned long sp;
+-#ifdef CONFIG_X86_64
+-	asm("mov %%rsp,%0" : "=g" (sp));
+-#else
+-	asm("mov %%esp,%0" : "=g" (sp));
+-#endif
+-	return sp;
+-}
+-
+ /*
+  * Walks up the stack frames to make sure that the specified object is
+  * entirely contained by a single stack frame.
+--- a/arch/x86/kernel/irq_32.c
++++ b/arch/x86/kernel/irq_32.c
+@@ -64,7 +64,7 @@ static void call_on_stack(void *func, vo
+ 
+ static inline void *current_stack(void)
+ {
+-	return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
++	return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
+ }
+ 
+ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
+@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(i
+ 
+ 	/* Save the next esp at the bottom of the stack */
+ 	prev_esp = (u32 *)irqstk;
+-	*prev_esp = current_stack_pointer();
++	*prev_esp = current_stack_pointer;
+ 
+ 	if (unlikely(overflow))
+ 		call_on_stack(print_stack_overflow, isp);
+@@ -139,7 +139,7 @@ void do_softirq_own_stack(void)
+ 
+ 	/* Push the previous esp onto the stack */
+ 	prev_esp = (u32 *)irqstk;
+-	*prev_esp = current_stack_pointer();
++	*prev_esp = current_stack_pointer;
+ 
+ 	call_on_stack(__do_softirq, isp);
+ }
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs
+ 	 * from double_fault.
+ 	 */
+ 	BUG_ON((unsigned long)(current_top_of_stack() -
+-			       current_stack_pointer()) >= THREAD_SIZE);
++			       current_stack_pointer) >= THREAD_SIZE);
+ 
+ 	preempt_enable_no_resched();
+ }
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -110,7 +110,7 @@ void switch_mm_irqs_off(struct mm_struct
+ 			 * mapped in the new pgd, we'll double-fault.  Forcibly
+ 			 * map it.
+ 			 */
+-			unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
++			unsigned int stack_pgd_index = pgd_index(current_stack_pointer);
+ 
+ 			pgd_t *pgd = next->pgd + stack_pgd_index;
+ 
diff --git a/queue/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch b/queue/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch
new file mode 100644
index 0000000..bb7c7d3
--- /dev/null
+++ b/queue/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch
@@ -0,0 +1,178 @@
+From e505371dd83963caae1a37ead9524e8d997341be Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 17 Jul 2017 16:10:33 -0500
+Subject: x86/boot: Add early cmdline parsing for options with arguments
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit e505371dd83963caae1a37ead9524e8d997341be upstream.
+
+Add a cmdline_find_option() function to look for cmdline options that
+take arguments. The argument is returned in a supplied buffer and the
+argument length (regardless of whether it fits in the supplied buffer)
+is returned, with -1 indicating not found.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Larry Woodman <lwoodman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Toshimitsu Kani <toshi.kani@hpe.com>
+Cc: kasan-dev@googlegroups.com
+Cc: kvm@vger.kernel.org
+Cc: linux-arch@vger.kernel.org
+Cc: linux-doc@vger.kernel.org
+Cc: linux-efi@vger.kernel.org
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cmdline.h |    2 
+ arch/x86/lib/cmdline.c         |  105 +++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 107 insertions(+)
+
+--- a/arch/x86/include/asm/cmdline.h
++++ b/arch/x86/include/asm/cmdline.h
+@@ -2,5 +2,7 @@
+ #define _ASM_X86_CMDLINE_H
+ 
+ int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
++int cmdline_find_option(const char *cmdline_ptr, const char *option,
++			char *buffer, int bufsize);
+ 
+ #endif /* _ASM_X86_CMDLINE_H */
+--- a/arch/x86/lib/cmdline.c
++++ b/arch/x86/lib/cmdline.c
+@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *c
+ 	return 0;	/* Buffer overrun */
+ }
+ 
++/*
++ * Find a non-boolean option (i.e. option=argument). In accordance with
++ * standard Linux practice, if this option is repeated, this returns the
++ * last instance on the command line.
++ *
++ * @cmdline: the cmdline string
++ * @max_cmdline_size: the maximum size of cmdline
++ * @option: option string to look for
++ * @buffer: memory buffer to return the option argument
++ * @bufsize: size of the supplied memory buffer
++ *
++ * Returns the length of the argument (regardless of if it was
++ * truncated to fit in the buffer), or -1 on not found.
++ */
++static int
++__cmdline_find_option(const char *cmdline, int max_cmdline_size,
++		      const char *option, char *buffer, int bufsize)
++{
++	char c;
++	int pos = 0, len = -1;
++	const char *opptr = NULL;
++	char *bufptr = buffer;
++	enum {
++		st_wordstart = 0,	/* Start of word/after whitespace */
++		st_wordcmp,	/* Comparing this word */
++		st_wordskip,	/* Miscompare, skip */
++		st_bufcpy,	/* Copying this to buffer */
++	} state = st_wordstart;
++
++	if (!cmdline)
++		return -1;      /* No command line */
++
++	/*
++	 * This 'pos' check ensures we do not overrun
++	 * a non-NULL-terminated 'cmdline'
++	 */
++	while (pos++ < max_cmdline_size) {
++		c = *(char *)cmdline++;
++		if (!c)
++			break;
++
++		switch (state) {
++		case st_wordstart:
++			if (myisspace(c))
++				break;
++
++			state = st_wordcmp;
++			opptr = option;
++			/* fall through */
++
++		case st_wordcmp:
++			if ((c == '=') && !*opptr) {
++				/*
++				 * We matched all the way to the end of the
++				 * option we were looking for, prepare to
++				 * copy the argument.
++				 */
++				len = 0;
++				bufptr = buffer;
++				state = st_bufcpy;
++				break;
++			} else if (c == *opptr++) {
++				/*
++				 * We are currently matching, so continue
++				 * to the next character on the cmdline.
++				 */
++				break;
++			}
++			state = st_wordskip;
++			/* fall through */
++
++		case st_wordskip:
++			if (myisspace(c))
++				state = st_wordstart;
++			break;
++
++		case st_bufcpy:
++			if (myisspace(c)) {
++				state = st_wordstart;
++			} else {
++				/*
++				 * Increment len, but don't overrun the
++				 * supplied buffer and leave room for the
++				 * NULL terminator.
++				 */
++				if (++len < bufsize)
++					*bufptr++ = c;
++			}
++			break;
++		}
++	}
++
++	if (bufsize)
++		*bufptr = '\0';
++
++	return len;
++}
++
+ int cmdline_find_option_bool(const char *cmdline, const char *option)
+ {
+ 	return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
+ }
++
++int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
++			int bufsize)
++{
++	return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
++				     buffer, bufsize);
++}
diff --git a/queue/x86-bugs-drop-one-mitigation-from-dmesg.patch b/queue/x86-bugs-drop-one-mitigation-from-dmesg.patch
new file mode 100644
index 0000000..d9380c1
--- /dev/null
+++ b/queue/x86-bugs-drop-one-mitigation-from-dmesg.patch
@@ -0,0 +1,52 @@
+From foo@baz Thu Feb  8 03:30:27 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 26 Jan 2018 13:11:39 +0100
+Subject: x86/bugs: Drop one "mitigation" from dmesg
+
+From: Borislav Petkov <bp@suse.de>
+
+(cherry picked from commit 55fa19d3e51f33d9cd4056d25836d93abf9438db)
+
+Make
+
+[    0.031118] Spectre V2 mitigation: Mitigation: Full generic retpoline
+
+into
+
+[    0.031118] Spectre V2: Mitigation: Full generic retpoline
+
+to reduce the mitigation mitigations strings.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: riel@redhat.com
+Cc: ak@linux.intel.com
+Cc: peterz@infradead.org
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: jikos@kernel.org
+Cc: luto@amacapital.net
+Cc: dave.hansen@intel.com
+Cc: torvalds@linux-foundation.org
+Cc: keescook@google.com
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: tim.c.chen@linux.intel.com
+Cc: pjt@google.com
+Link: https://lkml.kernel.org/r/20180126121139.31959-5-bp@alien8.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -90,7 +90,7 @@ static const char *spectre_v2_strings[]
+ };
+ 
+ #undef pr_fmt
+-#define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt
++#define pr_fmt(fmt)     "Spectre V2 : " fmt
+ 
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+ static bool spectre_v2_bad_module;
diff --git a/queue/x86-cpu-amd-make-lfence-a-serializing-instruction.patch b/queue/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
new file mode 100644
index 0000000..db8ec7a
--- /dev/null
+++ b/queue/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
@@ -0,0 +1,66 @@
+From e4d0e84e490790798691aaa0f2e598637f1867ec Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 8 Jan 2018 16:09:21 -0600
+Subject: x86/cpu/AMD: Make LFENCE a serializing instruction
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit e4d0e84e490790798691aaa0f2e598637f1867ec upstream.
+
+To aid in speculation control, make LFENCE a serializing instruction
+since it has less overhead than MFENCE.  This is done by setting bit 1
+of MSR 0xc0011029 (DE_CFG).  Some families that support LFENCE do not
+have this MSR.  For these families, the LFENCE instruction is already
+serializing.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/20180108220921.12580.71694.stgit@tlendack-t1.amdoffice.net
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/msr-index.h |    2 ++
+ arch/x86/kernel/cpu/amd.c        |   10 ++++++++++
+ 2 files changed, 12 insertions(+)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -330,6 +330,8 @@
+ #define FAM10H_MMIO_CONF_BASE_MASK	0xfffffffULL
+ #define FAM10H_MMIO_CONF_BASE_SHIFT	20
+ #define MSR_FAM10H_NODE_ID		0xc001100c
++#define MSR_F10H_DECFG			0xc0011029
++#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT	1
+ 
+ /* K8 MSRs */
+ #define MSR_K8_TOP_MEM1			0xc001001a
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -782,6 +782,16 @@ static void init_amd(struct cpuinfo_x86
+ 		set_cpu_cap(c, X86_FEATURE_K8);
+ 
+ 	if (cpu_has(c, X86_FEATURE_XMM2)) {
++		/*
++		 * A serializing LFENCE has less overhead than MFENCE, so
++		 * use it for execution serialization.  On families which
++		 * don't have that MSR, LFENCE is already serializing.
++		 * msr_set_bit() uses the safe accessors, too, even if the MSR
++		 * is not present.
++		 */
++		msr_set_bit(MSR_F10H_DECFG,
++			    MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
++
+ 		/* MFENCE stops RDTSC speculation */
+ 		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ 	}
diff --git a/queue/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch b/queue/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
new file mode 100644
index 0000000..8c8a0ce
--- /dev/null
+++ b/queue/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
@@ -0,0 +1,81 @@
+From 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 8 Jan 2018 16:09:32 -0600
+Subject: x86/cpu/AMD: Use LFENCE_RDTSC in preference to MFENCE_RDTSC
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f upstream.
+
+With LFENCE now a serializing instruction, use LFENCE_RDTSC in preference
+to MFENCE_RDTSC.  However, since the kernel could be running under a
+hypervisor that does not support writing that MSR, read the MSR back and
+verify that the bit has been set successfully.  If the MSR can be read
+and the bit is set, then set the LFENCE_RDTSC feature, otherwise set the
+MFENCE_RDTSC feature.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/20180108220932.12580.52458.stgit@tlendack-t1.amdoffice.net
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/msr-index.h |    1 +
+ arch/x86/kernel/cpu/amd.c        |   18 ++++++++++++++++--
+ 2 files changed, 17 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -332,6 +332,7 @@
+ #define MSR_FAM10H_NODE_ID		0xc001100c
+ #define MSR_F10H_DECFG			0xc0011029
+ #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT	1
++#define MSR_F10H_DECFG_LFENCE_SERIALIZE		BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+ 
+ /* K8 MSRs */
+ #define MSR_K8_TOP_MEM1			0xc001001a
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -782,6 +782,9 @@ static void init_amd(struct cpuinfo_x86
+ 		set_cpu_cap(c, X86_FEATURE_K8);
+ 
+ 	if (cpu_has(c, X86_FEATURE_XMM2)) {
++		unsigned long long val;
++		int ret;
++
+ 		/*
+ 		 * A serializing LFENCE has less overhead than MFENCE, so
+ 		 * use it for execution serialization.  On families which
+@@ -792,8 +795,19 @@ static void init_amd(struct cpuinfo_x86
+ 		msr_set_bit(MSR_F10H_DECFG,
+ 			    MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+ 
+-		/* MFENCE stops RDTSC speculation */
+-		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
++		/*
++		 * Verify that the MSR write was successful (could be running
++		 * under a hypervisor) and only then assume that LFENCE is
++		 * serializing.
++		 */
++		ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
++		if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
++			/* A serializing LFENCE stops RDTSC speculation */
++			set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
++		} else {
++			/* MFENCE stops RDTSC speculation */
++			set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
++		}
+ 	}
+ 
+ 	/*
diff --git a/queue/x86-cpu-bugs-make-retpoline-module-warning-conditional.patch b/queue/x86-cpu-bugs-make-retpoline-module-warning-conditional.patch
new file mode 100644
index 0000000..f8a1204
--- /dev/null
+++ b/queue/x86-cpu-bugs-make-retpoline-module-warning-conditional.patch
@@ -0,0 +1,66 @@
+From foo@baz Thu Feb  8 03:30:27 CET 2018
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 27 Jan 2018 15:45:14 +0100
+Subject: x86/cpu/bugs: Make retpoline module warning conditional
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+(cherry picked from commit e383095c7fe8d218e00ec0f83e4b95ed4e627b02)
+
+If sysfs is disabled and RETPOLINE not defined:
+
+arch/x86/kernel/cpu/bugs.c:97:13: warning: ‘spectre_v2_bad_module’ defined but not used
+[-Wunused-variable]
+ static bool spectre_v2_bad_module;
+
+Hide it.
+
+Fixes: caf7501a1b4e ("module/retpoline: Warn about missing retpoline in module")
+Reported-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -93,9 +93,10 @@ static const char *spectre_v2_strings[]
+ #define pr_fmt(fmt)     "Spectre V2 : " fmt
+ 
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+-static bool spectre_v2_bad_module;
+ 
+ #ifdef RETPOLINE
++static bool spectre_v2_bad_module;
++
+ bool retpoline_module_ok(bool has_retpoline)
+ {
+ 	if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
+@@ -105,6 +106,13 @@ bool retpoline_module_ok(bool has_retpol
+ 	spectre_v2_bad_module = true;
+ 	return false;
+ }
++
++static inline const char *spectre_v2_module_string(void)
++{
++	return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
++}
++#else
++static inline const char *spectre_v2_module_string(void) { return ""; }
+ #endif
+ 
+ static void __init spec2_print_if_insecure(const char *reason)
+@@ -299,7 +307,7 @@ ssize_t cpu_show_spectre_v2(struct devic
+ 		return sprintf(buf, "Not affected\n");
+ 
+ 	return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+-		       boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "",
+-		       spectre_v2_bad_module ? " - vulnerable module loaded" : "");
++		       boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "",
++		       spectre_v2_module_string());
+ }
+ #endif
diff --git a/queue/x86-cpu-factor-out-application-of-forced-cpu-caps.patch b/queue/x86-cpu-factor-out-application-of-forced-cpu-caps.patch
new file mode 100644
index 0000000..8c79c8f
--- /dev/null
+++ b/queue/x86-cpu-factor-out-application-of-forced-cpu-caps.patch
@@ -0,0 +1,79 @@
+From 8bf1ebca215c262e48c15a4a15f175991776f57f Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Wed, 18 Jan 2017 11:15:38 -0800
+Subject: x86/cpu: Factor out application of forced CPU caps
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 8bf1ebca215c262e48c15a4a15f175991776f57f upstream.
+
+There are multiple call sites that apply forced CPU caps.  Factor
+them into a helper.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matthew Whitehead <tedheadster@gmail.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
+Link: http://lkml.kernel.org/r/623ff7555488122143e4417de09b18be2085ad06.1484705016.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/common.c |   20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -706,6 +706,16 @@ void cpu_detect(struct cpuinfo_x86 *c)
+ 	}
+ }
+ 
++static void apply_forced_caps(struct cpuinfo_x86 *c)
++{
++	int i;
++
++	for (i = 0; i < NCAPINTS; i++) {
++		c->x86_capability[i] &= ~cpu_caps_cleared[i];
++		c->x86_capability[i] |= cpu_caps_set[i];
++	}
++}
++
+ void get_cpu_cap(struct cpuinfo_x86 *c)
+ {
+ 	u32 eax, ebx, ecx, edx;
+@@ -1086,10 +1096,7 @@ static void identify_cpu(struct cpuinfo_
+ 		this_cpu->c_identify(c);
+ 
+ 	/* Clear/Set all flags overridden by options, after probe */
+-	for (i = 0; i < NCAPINTS; i++) {
+-		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+-		c->x86_capability[i] |= cpu_caps_set[i];
+-	}
++	apply_forced_caps(c);
+ 
+ #ifdef CONFIG_X86_64
+ 	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
+@@ -1151,10 +1158,7 @@ static void identify_cpu(struct cpuinfo_
+ 	 * Clear/Set all flags overridden by options, need do it
+ 	 * before following smp all cpus cap AND.
+ 	 */
+-	for (i = 0; i < NCAPINTS; i++) {
+-		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+-		c->x86_capability[i] |= cpu_caps_set[i];
+-	}
++	apply_forced_caps(c);
+ 
+ 	/*
+ 	 * On SMP, boot_cpu_data holds the common feature set between
diff --git a/queue/x86-cpu-implement-cpu-vulnerabilites-sysfs-functions.patch b/queue/x86-cpu-implement-cpu-vulnerabilites-sysfs-functions.patch
new file mode 100644
index 0000000..3d30088
--- /dev/null
+++ b/queue/x86-cpu-implement-cpu-vulnerabilites-sysfs-functions.patch
@@ -0,0 +1,82 @@
+From 61dc0f555b5c761cdafb0ba5bd41ecf22d68a4c4 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 7 Jan 2018 22:48:01 +0100
+Subject: x86/cpu: Implement CPU vulnerabilites sysfs functions
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 61dc0f555b5c761cdafb0ba5bd41ecf22d68a4c4 upstream.
+
+Implement the CPU vulnerabilty show functions for meltdown, spectre_v1 and
+spectre_v2.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Link: https://lkml.kernel.org/r/20180107214913.177414879@linutronix.de
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/Kconfig           |    1 +
+ arch/x86/kernel/cpu/bugs.c |   29 +++++++++++++++++++++++++++++
+ 2 files changed, 30 insertions(+)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -64,6 +64,7 @@ config X86
+ 	select GENERIC_CLOCKEVENTS_MIN_ADJUST
+ 	select GENERIC_CMOS_UPDATE
+ 	select GENERIC_CPU_AUTOPROBE
++	select GENERIC_CPU_VULNERABILITIES
+ 	select GENERIC_EARLY_IOREMAP
+ 	select GENERIC_FIND_FIRST_BIT
+ 	select GENERIC_IOMAP
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -9,6 +9,7 @@
+  */
+ #include <linux/init.h>
+ #include <linux/utsname.h>
++#include <linux/cpu.h>
+ #include <asm/bugs.h>
+ #include <asm/processor.h>
+ #include <asm/processor-flags.h>
+@@ -67,3 +68,31 @@ void __init check_bugs(void)
+ 		set_memory_4k((unsigned long)__va(0), 1);
+ #endif
+ }
++
++#ifdef CONFIG_SYSFS
++ssize_t cpu_show_meltdown(struct device *dev,
++			  struct device_attribute *attr, char *buf)
++{
++	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
++		return sprintf(buf, "Not affected\n");
++	if (boot_cpu_has(X86_FEATURE_KAISER))
++		return sprintf(buf, "Mitigation: PTI\n");
++	return sprintf(buf, "Vulnerable\n");
++}
++
++ssize_t cpu_show_spectre_v1(struct device *dev,
++			    struct device_attribute *attr, char *buf)
++{
++	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
++		return sprintf(buf, "Not affected\n");
++	return sprintf(buf, "Vulnerable\n");
++}
++
++ssize_t cpu_show_spectre_v2(struct device *dev,
++			    struct device_attribute *attr, char *buf)
++{
++	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
++		return sprintf(buf, "Not affected\n");
++	return sprintf(buf, "Vulnerable\n");
++}
++#endif
diff --git a/queue/x86-cpu-merge-bugs.c-and-bugs_64.c.patch b/queue/x86-cpu-merge-bugs.c-and-bugs_64.c.patch
new file mode 100644
index 0000000..cceebca
--- /dev/null
+++ b/queue/x86-cpu-merge-bugs.c-and-bugs_64.c.patch
@@ -0,0 +1,136 @@
+From 62a67e123e058a67db58bc6a14354dd037bafd0a Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 24 Oct 2016 19:38:43 +0200
+Subject: x86/cpu: Merge bugs.c and bugs_64.c
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 62a67e123e058a67db58bc6a14354dd037bafd0a upstream.
+
+Should be easier when following boot paths. It probably is a left over
+from the x86 unification eons ago.
+
+No functionality change.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20161024173844.23038-3-bp@alien8.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/Makefile  |    4 +---
+ arch/x86/kernel/cpu/bugs.c    |   26 ++++++++++++++++++++++----
+ arch/x86/kernel/cpu/bugs_64.c |   33 ---------------------------------
+ 3 files changed, 23 insertions(+), 40 deletions(-)
+ delete mode 100644 arch/x86/kernel/cpu/bugs_64.c
+
+--- a/arch/x86/kernel/cpu/Makefile
++++ b/arch/x86/kernel/cpu/Makefile
+@@ -20,13 +20,11 @@ obj-y			:= intel_cacheinfo.o scattered.o
+ obj-y			+= common.o
+ obj-y			+= rdrand.o
+ obj-y			+= match.o
++obj-y			+= bugs.o
+ 
+ obj-$(CONFIG_PROC_FS)	+= proc.o
+ obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
+ 
+-obj-$(CONFIG_X86_32)	+= bugs.o
+-obj-$(CONFIG_X86_64)	+= bugs_64.o
+-
+ obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o
+ obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
+ obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -16,6 +16,8 @@
+ #include <asm/msr.h>
+ #include <asm/paravirt.h>
+ #include <asm/alternative.h>
++#include <asm/pgtable.h>
++#include <asm/cacheflush.h>
+ 
+ void __init check_bugs(void)
+ {
+@@ -28,11 +30,13 @@ void __init check_bugs(void)
+ #endif
+ 
+ 	identify_boot_cpu();
+-#ifndef CONFIG_SMP
+-	pr_info("CPU: ");
+-	print_cpu_info(&boot_cpu_data);
+-#endif
+ 
++	if (!IS_ENABLED(CONFIG_SMP)) {
++		pr_info("CPU: ");
++		print_cpu_info(&boot_cpu_data);
++	}
++
++#ifdef CONFIG_X86_32
+ 	/*
+ 	 * Check whether we are able to run this kernel safely on SMP.
+ 	 *
+@@ -48,4 +52,18 @@ void __init check_bugs(void)
+ 	alternative_instructions();
+ 
+ 	fpu__init_check_bugs();
++#else /* CONFIG_X86_64 */
++	alternative_instructions();
++
++	/*
++	 * Make sure the first 2MB area is not mapped by huge pages
++	 * There are typically fixed size MTRRs in there and overlapping
++	 * MTRRs into large pages causes slow downs.
++	 *
++	 * Right now we don't do that with gbpages because there seems
++	 * very little benefit for that case.
++	 */
++	if (!direct_gbpages)
++		set_memory_4k((unsigned long)__va(0), 1);
++#endif
+ }
+--- a/arch/x86/kernel/cpu/bugs_64.c
++++ /dev/null
+@@ -1,33 +0,0 @@
+-/*
+- *  Copyright (C) 1994  Linus Torvalds
+- *  Copyright (C) 2000  SuSE
+- */
+-
+-#include <linux/kernel.h>
+-#include <linux/init.h>
+-#include <asm/alternative.h>
+-#include <asm/bugs.h>
+-#include <asm/processor.h>
+-#include <asm/mtrr.h>
+-#include <asm/cacheflush.h>
+-
+-void __init check_bugs(void)
+-{
+-	identify_boot_cpu();
+-#if !defined(CONFIG_SMP)
+-	pr_info("CPU: ");
+-	print_cpu_info(&boot_cpu_data);
+-#endif
+-	alternative_instructions();
+-
+-	/*
+-	 * Make sure the first 2MB area is not mapped by huge pages
+-	 * There are typically fixed size MTRRs in there and overlapping
+-	 * MTRRs into large pages causes slow downs.
+-	 *
+-	 * Right now we don't do that with gbpages because there seems
+-	 * very little benefit for that case.
+-	 */
+-	if (!direct_gbpages)
+-		set_memory_4k((unsigned long)__va(0), 1);
+-}
diff --git a/queue/x86-cpu-x86-pti-do-not-enable-pti-on-amd-processors.patch b/queue/x86-cpu-x86-pti-do-not-enable-pti-on-amd-processors.patch
new file mode 100644
index 0000000..7a7cbec
--- /dev/null
+++ b/queue/x86-cpu-x86-pti-do-not-enable-pti-on-amd-processors.patch
@@ -0,0 +1,46 @@
+From 694d99d40972f12e59a3696effee8a376b79d7c8 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Tue, 26 Dec 2017 23:43:54 -0600
+Subject: x86/cpu, x86/pti: Do not enable PTI on AMD processors
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit 694d99d40972f12e59a3696effee8a376b79d7c8 upstream.
+
+AMD processors are not subject to the types of attacks that the kernel
+page table isolation feature protects against.  The AMD microarchitecture
+does not allow memory references, including speculative references, that
+access higher privileged data when running in a lesser privileged mode
+when that access would result in a page fault.
+
+Disable page table isolation by default on AMD processors by not setting
+the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI
+is set.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@tlendack-t1.amdoffice.net
+Cc: Nick Lowe <nick.lowe@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/common.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -883,8 +883,8 @@ static void __init early_identify_cpu(st
+ 
+ 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+ 
+-	/* Assume for now that ALL x86 CPUs are insecure */
+-	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
++	if (c->x86_vendor != X86_VENDOR_AMD)
++		setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ 
+ 	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ 	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
diff --git a/queue/x86-cpufeature-blacklist-spec_ctrl-pred_cmd-on-early-spectre-v2-microcodes.patch b/queue/x86-cpufeature-blacklist-spec_ctrl-pred_cmd-on-early-spectre-v2-microcodes.patch
new file mode 100644
index 0000000..6397ee8
--- /dev/null
+++ b/queue/x86-cpufeature-blacklist-spec_ctrl-pred_cmd-on-early-spectre-v2-microcodes.patch
@@ -0,0 +1,167 @@
+From foo@baz Wed Feb  7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:14 +0000
+Subject: x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit a5b2966364538a0e68c9fa29bc0a3a1651799035)
+
+This doesn't refuse to load the affected microcodes; it just refuses to
+use the Spectre v2 mitigation features if they're detected, by clearing
+the appropriate feature bits.
+
+The AMD CPUID bits are handled here too, because hypervisors *may* have
+been exposing those bits even on Intel chips, for fine-grained control
+of what's available.
+
+It is non-trivial to use x86_match_cpu() for this table because that
+doesn't handle steppings. And the approach taken in commit bd9240a18
+almost made me lose my lunch.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-7-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/intel-family.h |    7 ++-
+ arch/x86/kernel/cpu/intel.c         |   66 ++++++++++++++++++++++++++++++++++++
+ 2 files changed, 71 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/intel-family.h
++++ b/arch/x86/include/asm/intel-family.h
+@@ -12,6 +12,7 @@
+  */
+ 
+ #define INTEL_FAM6_CORE_YONAH		0x0E
++
+ #define INTEL_FAM6_CORE2_MEROM		0x0F
+ #define INTEL_FAM6_CORE2_MEROM_L	0x16
+ #define INTEL_FAM6_CORE2_PENRYN		0x17
+@@ -21,6 +22,7 @@
+ #define INTEL_FAM6_NEHALEM_G		0x1F /* Auburndale / Havendale */
+ #define INTEL_FAM6_NEHALEM_EP		0x1A
+ #define INTEL_FAM6_NEHALEM_EX		0x2E
++
+ #define INTEL_FAM6_WESTMERE		0x25
+ #define INTEL_FAM6_WESTMERE_EP		0x2C
+ #define INTEL_FAM6_WESTMERE_EX		0x2F
+@@ -36,9 +38,9 @@
+ #define INTEL_FAM6_HASWELL_GT3E		0x46
+ 
+ #define INTEL_FAM6_BROADWELL_CORE	0x3D
+-#define INTEL_FAM6_BROADWELL_XEON_D	0x56
+ #define INTEL_FAM6_BROADWELL_GT3E	0x47
+ #define INTEL_FAM6_BROADWELL_X		0x4F
++#define INTEL_FAM6_BROADWELL_XEON_D	0x56
+ 
+ #define INTEL_FAM6_SKYLAKE_MOBILE	0x4E
+ #define INTEL_FAM6_SKYLAKE_DESKTOP	0x5E
+@@ -57,9 +59,10 @@
+ #define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
+ #define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
+ #define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
+-#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Annidale */
++#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Anniedale */
+ #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
+ #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
++#define INTEL_FAM6_ATOM_GEMINI_LAKE	0x7A
+ 
+ /* Xeon Phi */
+ 
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -61,6 +61,59 @@ void check_mpx_erratum(struct cpuinfo_x8
+ 	}
+ }
+ 
++/*
++ * Early microcode releases for the Spectre v2 mitigation were broken.
++ * Information taken from;
++ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
++ * - https://kb.vmware.com/s/article/52345
++ * - Microcode revisions observed in the wild
++ * - Release note from 20180108 microcode release
++ */
++struct sku_microcode {
++	u8 model;
++	u8 stepping;
++	u32 microcode;
++};
++static const struct sku_microcode spectre_bad_microcodes[] = {
++	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0B,	0x84 },
++	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0A,	0x84 },
++	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x09,	0x84 },
++	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x0A,	0x84 },
++	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x84 },
++	{ INTEL_FAM6_SKYLAKE_X,		0x03,	0x0100013e },
++	{ INTEL_FAM6_SKYLAKE_X,		0x04,	0x0200003c },
++	{ INTEL_FAM6_SKYLAKE_MOBILE,	0x03,	0xc2 },
++	{ INTEL_FAM6_SKYLAKE_DESKTOP,	0x03,	0xc2 },
++	{ INTEL_FAM6_BROADWELL_CORE,	0x04,	0x28 },
++	{ INTEL_FAM6_BROADWELL_GT3E,	0x01,	0x1b },
++	{ INTEL_FAM6_BROADWELL_XEON_D,	0x02,	0x14 },
++	{ INTEL_FAM6_BROADWELL_XEON_D,	0x03,	0x07000011 },
++	{ INTEL_FAM6_BROADWELL_X,	0x01,	0x0b000025 },
++	{ INTEL_FAM6_HASWELL_ULT,	0x01,	0x21 },
++	{ INTEL_FAM6_HASWELL_GT3E,	0x01,	0x18 },
++	{ INTEL_FAM6_HASWELL_CORE,	0x03,	0x23 },
++	{ INTEL_FAM6_HASWELL_X,		0x02,	0x3b },
++	{ INTEL_FAM6_HASWELL_X,		0x04,	0x10 },
++	{ INTEL_FAM6_IVYBRIDGE_X,	0x04,	0x42a },
++	/* Updated in the 20180108 release; blacklist until we know otherwise */
++	{ INTEL_FAM6_ATOM_GEMINI_LAKE,	0x01,	0x22 },
++	/* Observed in the wild */
++	{ INTEL_FAM6_SANDYBRIDGE_X,	0x06,	0x61b },
++	{ INTEL_FAM6_SANDYBRIDGE_X,	0x07,	0x712 },
++};
++
++static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
++		if (c->x86_model == spectre_bad_microcodes[i].model &&
++		    c->x86_mask == spectre_bad_microcodes[i].stepping)
++			return (c->microcode <= spectre_bad_microcodes[i].microcode);
++	}
++	return false;
++}
++
+ static void early_init_intel(struct cpuinfo_x86 *c)
+ {
+ 	u64 misc_enable;
+@@ -87,6 +140,19 @@ static void early_init_intel(struct cpui
+ 		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
+ 	}
+ 
++	if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
++	     cpu_has(c, X86_FEATURE_STIBP) ||
++	     cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) ||
++	     cpu_has(c, X86_FEATURE_AMD_PRED_CMD) ||
++	     cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) {
++		pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n");
++		clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
++		clear_cpu_cap(c, X86_FEATURE_STIBP);
++		clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL);
++		clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD);
++		clear_cpu_cap(c, X86_FEATURE_AMD_STIBP);
++	}
++
+ 	/*
+ 	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
+ 	 *
diff --git a/queue/x86-cpufeature-move-processor-tracing-out-of-scattered-features.patch b/queue/x86-cpufeature-move-processor-tracing-out-of-scattered-features.patch
new file mode 100644
index 0000000..37baee4
--- /dev/null
+++ b/queue/x86-cpufeature-move-processor-tracing-out-of-scattered-features.patch
@@ -0,0 +1,68 @@
+From 4fdec2034b7540dda461c6ba33325dfcff345c64 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 16 Jan 2018 16:42:25 +0100
+Subject: x86/cpufeature: Move processor tracing out of scattered features
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 4fdec2034b7540dda461c6ba33325dfcff345c64 upstream.
+
+Processor tracing is already enumerated in word 9 (CPUID[7,0].EBX),
+so do not duplicate it in the scattered features word.
+
+Besides being more tidy, this will be useful for KVM when it presents
+processor tracing to the guests.  KVM selects host features that are
+supported by both the host kernel (depending on command line options,
+CPU errata, or whatever) and KVM.  Whenever a full feature word exists,
+KVM's code is written in the expectation that the CPUID bit number
+matches the X86_FEATURE_* bit number, but this is not the case for
+X86_FEATURE_INTEL_PT.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luwei Kang <luwei.kang@intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: kvm@vger.kernel.org
+Link: http://lkml.kernel.org/r/1516117345-34561-1-git-send-email-pbonzini@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeatures.h |    2 +-
+ arch/x86/kernel/cpu/scattered.c    |    1 -
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -197,7 +197,6 @@
+ #define X86_FEATURE_RETPOLINE	( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
+ 
+-#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* Fill RSB on context switches */
+@@ -236,6 +235,7 @@
+ #define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
+ #define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
+ #define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
++#define X86_FEATURE_INTEL_PT	( 9*32+25) /* Intel Processor Trace */
+ #define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
+ #define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+ #define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -31,7 +31,6 @@ void init_scattered_cpuid_features(struc
+ 	const struct cpuid_bit *cb;
+ 
+ 	static const struct cpuid_bit cpuid_bits[] = {
+-		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
+ 		{ X86_FEATURE_AVX512_4VNNIW,	CR_EDX, 2, 0x00000007, 0 },
+ 		{ X86_FEATURE_AVX512_4FMAPS,	CR_EDX, 3, 0x00000007, 0 },
+ 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
diff --git a/queue/x86-cpufeatures-add-amd-feature-bits-for-speculation-control.patch b/queue/x86-cpufeatures-add-amd-feature-bits-for-speculation-control.patch
new file mode 100644
index 0000000..7b48105
--- /dev/null
+++ b/queue/x86-cpufeatures-add-amd-feature-bits-for-speculation-control.patch
@@ -0,0 +1,47 @@
+From foo@baz Wed Feb  7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:11 +0000
+Subject: x86/cpufeatures: Add AMD feature bits for Speculation Control
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit 5d10cbc91d9eb5537998b65608441b592eec65e7)
+
+AMD exposes the PRED_CMD/SPEC_CTRL MSRs slightly differently to Intel.
+See http://lkml.kernel.org/r/2b3e25cc-286d-8bd0-aeaf-9ac4aae39de8@amd.com
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-4-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -258,6 +258,9 @@
+ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+ #define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
+ #define X86_FEATURE_IRPERF	(13*32+1) /* Instructions Retired Count */
++#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */
++#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */
++#define X86_FEATURE_AMD_STIBP	(13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */
+ 
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+ #define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
diff --git a/queue/x86-cpufeatures-add-cpuid_7_edx-cpuid-leaf.patch b/queue/x86-cpufeatures-add-cpuid_7_edx-cpuid-leaf.patch
new file mode 100644
index 0000000..a47b816
--- /dev/null
+++ b/queue/x86-cpufeatures-add-cpuid_7_edx-cpuid-leaf.patch
@@ -0,0 +1,149 @@
+From foo@baz Wed Feb  7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:09 +0000
+Subject: x86/cpufeatures: Add CPUID_7_EDX CPUID leaf
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit 95ca0ee8636059ea2800dfbac9ecac6212d6b38f)
+
+This is a pure feature bits leaf. There are two AVX512 feature bits in it
+already which were handled as scattered bits, and three more from this leaf
+are going to be added for speculation control features.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeature.h        |    7 +++++--
+ arch/x86/include/asm/cpufeatures.h       |   10 ++++++----
+ arch/x86/include/asm/disabled-features.h |    3 ++-
+ arch/x86/include/asm/required-features.h |    3 ++-
+ arch/x86/kernel/cpu/common.c             |    1 +
+ arch/x86/kernel/cpu/scattered.c          |    2 --
+ 6 files changed, 16 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -28,6 +28,7 @@ enum cpuid_leafs
+ 	CPUID_8000_000A_EDX,
+ 	CPUID_7_ECX,
+ 	CPUID_8000_0007_EBX,
++	CPUID_7_EDX,
+ };
+ 
+ #ifdef CONFIG_X86_FEATURE_NAMES
+@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[
+ 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||	\
++	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||	\
+ 	   REQUIRED_MASK_CHECK					  ||	\
+-	   BUILD_BUG_ON_ZERO(NCAPINTS != 18))
++	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+ 
+ #define DISABLED_MASK_BIT_SET(feature_bit)				\
+ 	 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||	\
+@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[
+ 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||	\
++	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||	\
+ 	   DISABLED_MASK_CHECK					  ||	\
+-	   BUILD_BUG_ON_ZERO(NCAPINTS != 18))
++	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+ 
+ #define cpu_has(c, bit)							\
+ 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -12,7 +12,7 @@
+ /*
+  * Defines x86 CPU feature bits
+  */
+-#define NCAPINTS	18	/* N 32-bit words worth of info */
++#define NCAPINTS	19	/* N 32-bit words worth of info */
+ #define NBUGINTS	1	/* N 32-bit bug flags */
+ 
+ /*
+@@ -197,9 +197,7 @@
+ #define X86_FEATURE_RETPOLINE	( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
+ 
+-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+-#define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* Fill RSB on context switches */
++#define X86_FEATURE_RSB_CTXSW	( 7*32+19) /* Fill RSB on context switches */
+ 
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+ #define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+@@ -295,6 +293,10 @@
+ #define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
+ #define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
+ 
++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
++#define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
++#define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
++
+ /*
+  * BUG word(s)
+  */
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -59,6 +59,7 @@
+ #define DISABLED_MASK15	0
+ #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
+ #define DISABLED_MASK17	0
+-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
++#define DISABLED_MASK18	0
++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+ 
+ #endif /* _ASM_X86_DISABLED_FEATURES_H */
+--- a/arch/x86/include/asm/required-features.h
++++ b/arch/x86/include/asm/required-features.h
+@@ -100,6 +100,7 @@
+ #define REQUIRED_MASK15	0
+ #define REQUIRED_MASK16	0
+ #define REQUIRED_MASK17	0
+-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
++#define REQUIRED_MASK18	0
++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+ 
+ #endif /* _ASM_X86_REQUIRED_FEATURES_H */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -737,6 +737,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ 		cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
+ 		c->x86_capability[CPUID_7_0_EBX] = ebx;
+ 		c->x86_capability[CPUID_7_ECX] = ecx;
++		c->x86_capability[CPUID_7_EDX] = edx;
+ 	}
+ 
+ 	/* Extended state features: level 0x0000000d */
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -31,8 +31,6 @@ void init_scattered_cpuid_features(struc
+ 	const struct cpuid_bit *cb;
+ 
+ 	static const struct cpuid_bit cpuid_bits[] = {
+-		{ X86_FEATURE_AVX512_4VNNIW,	CR_EDX, 2, 0x00000007, 0 },
+-		{ X86_FEATURE_AVX512_4FMAPS,	CR_EDX, 3, 0x00000007, 0 },
+ 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
+ 		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
+ 		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
diff --git a/queue/x86-cpufeatures-add-intel-feature-bits-for-speculation-control.patch b/queue/x86-cpufeatures-add-intel-feature-bits-for-speculation-control.patch
new file mode 100644
index 0000000..568f3bf
--- /dev/null
+++ b/queue/x86-cpufeatures-add-intel-feature-bits-for-speculation-control.patch
@@ -0,0 +1,47 @@
+From foo@baz Wed Feb  7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:10 +0000
+Subject: x86/cpufeatures: Add Intel feature bits for Speculation Control
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit fc67dd70adb711a45d2ef34e12d1a8be75edde61)
+
+Add three feature bits exposed by new microcode on Intel CPUs for
+speculation control.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-3-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -296,6 +296,9 @@
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+ #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
++#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* Speculation Control (IBRS + IBPB) */
++#define X86_FEATURE_STIBP		(18*32+27) /* Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+ 
+ /*
+  * BUG word(s)
diff --git a/queue/x86-cpufeatures-add-x86_bug_cpu_insecure.patch b/queue/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
new file mode 100644
index 0000000..0f6fc2c
--- /dev/null
+++ b/queue/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
@@ -0,0 +1,73 @@
+From a89f040fa34ec9cd682aed98b8f04e3c47d998bd Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:33 +0100
+Subject: x86/cpufeatures: Add X86_BUG_CPU_INSECURE
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit a89f040fa34ec9cd682aed98b8f04e3c47d998bd upstream.
+
+Many x86 CPUs leak information to user space due to missing isolation of
+user space and kernel space page tables. There are many well documented
+ways to exploit that.
+
+The upcoming software migitation of isolating the user and kernel space
+page tables needs a misfeature flag so code can be made runtime
+conditional.
+
+Add the BUG bits which indicates that the CPU is affected and add a feature
+bit which indicates that the software migitation is enabled.
+
+Assume for now that _ALL_ x86 CPUs are affected by this. Exceptions can be
+made later.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    1 +
+ arch/x86/kernel/cpu/common.c       |    4 ++++
+ 2 files changed, 5 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -316,5 +316,6 @@
+ #define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
+ #define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400	X86_BUG(13) /* CPU is among the affected by Erratum 400 */
++#define X86_BUG_CPU_INSECURE	X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
+ 
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -882,6 +882,10 @@ static void __init early_identify_cpu(st
+ 	}
+ 
+ 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
++
++	/* Assume for now that ALL x86 CPUs are insecure */
++	setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
++
+ 	fpu__init_system(c);
+ }
+ 
diff --git a/queue/x86-cpufeatures-add-x86_bug_spectre_v.patch b/queue/x86-cpufeatures-add-x86_bug_spectre_v.patch
new file mode 100644
index 0000000..e59cfe5
--- /dev/null
+++ b/queue/x86-cpufeatures-add-x86_bug_spectre_v.patch
@@ -0,0 +1,58 @@
+From 99c6fa2511d8a683e61468be91b83f85452115fa Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Sat, 6 Jan 2018 11:49:23 +0000
+Subject: x86/cpufeatures: Add X86_BUG_SPECTRE_V[12]
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 99c6fa2511d8a683e61468be91b83f85452115fa upstream.
+
+Add the bug bits for spectre v1/2 and force them unconditionally for all
+cpus.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1515239374-23361-2-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeatures.h |    2 ++
+ arch/x86/kernel/cpu/common.c       |    3 +++
+ 2 files changed, 5 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -317,5 +317,7 @@
+ #define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400	X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+ #define X86_BUG_CPU_MELTDOWN	X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
++#define X86_BUG_SPECTRE_V1	X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
++#define X86_BUG_SPECTRE_V2	X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+ 
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -886,6 +886,9 @@ static void __init early_identify_cpu(st
+ 	/* Assume for now that ALL x86 CPUs are insecure */
+ 	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ 
++	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
++	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++
+ 	fpu__init_system(c);
+ }
+ 
diff --git a/queue/x86-cpufeatures-clean-up-spectre-v2-related-cpuid-flags.patch b/queue/x86-cpufeatures-clean-up-spectre-v2-related-cpuid-flags.patch
new file mode 100644
index 0000000..96900c0
--- /dev/null
+++ b/queue/x86-cpufeatures-clean-up-spectre-v2-related-cpuid-flags.patch
@@ -0,0 +1,171 @@
+From foo@baz Thu Feb  8 03:30:27 CET 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Sat, 27 Jan 2018 16:24:32 +0000
+Subject: x86/cpufeatures: Clean up Spectre v2 related CPUID flags
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit 2961298efe1ea1b6fc0d7ee8b76018fa6c0bcef2)
+
+We want to expose the hardware features simply in /proc/cpuinfo as "ibrs",
+"ibpb" and "stibp". Since AMD has separate CPUID bits for those, use them
+as the user-visible bits.
+
+When the Intel SPEC_CTRL bit is set which indicates both IBRS and IBPB
+capability, set those (AMD) bits accordingly. Likewise if the Intel STIBP
+bit is set, set the AMD STIBP that's used for the generic hardware
+capability.
+
+Hide the rest from /proc/cpuinfo by putting "" in the comments. Including
+RETPOLINE and RETPOLINE_AMD which shouldn't be visible there. There are
+patches to make the sysfs vulnerabilities information non-readable by
+non-root, and the same should apply to all information about which
+mitigations are actually in use. Those *shouldn't* appear in /proc/cpuinfo.
+
+The feature bit for whether IBPB is actually used, which is needed for
+ALTERNATIVEs, is renamed to X86_FEATURE_USE_IBPB.
+
+Originally-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: ak@linux.intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1517070274-12128-2-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h   |   18 +++++++++---------
+ arch/x86/include/asm/nospec-branch.h |    2 +-
+ arch/x86/kernel/cpu/bugs.c           |    7 +++----
+ arch/x86/kernel/cpu/intel.c          |   31 +++++++++++++++++++++----------
+ 4 files changed, 34 insertions(+), 24 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -194,15 +194,15 @@
+ #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ 
+-#define X86_FEATURE_RETPOLINE	( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE	( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+ 
+-#define X86_FEATURE_RSB_CTXSW	( 7*32+19) /* Fill RSB on context switches */
++#define X86_FEATURE_RSB_CTXSW	( 7*32+19) /* "" Fill RSB on context switches */
+ 
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+ #define X86_FEATURE_KAISER	( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+ 
+-#define X86_FEATURE_IBPB		( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/
++#define X86_FEATURE_USE_IBPB	( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+@@ -260,9 +260,9 @@
+ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+ #define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
+ #define X86_FEATURE_IRPERF	(13*32+1) /* Instructions Retired Count */
+-#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */
+-#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */
+-#define X86_FEATURE_AMD_STIBP	(13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */
++#define X86_FEATURE_IBPB	(13*32+12) /* Indirect Branch Prediction Barrier */
++#define X86_FEATURE_IBRS	(13*32+14) /* Indirect Branch Restricted Speculation */
++#define X86_FEATURE_STIBP	(13*32+15) /* Single Thread Indirect Branch Predictors */
+ 
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+ #define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
+@@ -301,8 +301,8 @@
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+ #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+-#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* Speculation Control (IBRS + IBPB) */
+-#define X86_FEATURE_STIBP		(18*32+27) /* Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
++#define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
+ #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+ 
+ /*
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -225,7 +225,7 @@ static inline void indirect_branch_predi
+ 				 "movl %[val], %%eax\n\t"
+ 				 "movl $0, %%edx\n\t"
+ 				 "wrmsr",
+-				 X86_FEATURE_IBPB)
++				 X86_FEATURE_USE_IBPB)
+ 		     : : [msr] "i" (MSR_IA32_PRED_CMD),
+ 			 [val] "i" (PRED_CMD_IBPB)
+ 		     : "eax", "ecx", "edx", "memory");
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -272,9 +272,8 @@ retpoline_auto:
+ 	}
+ 
+ 	/* Initialize Indirect Branch Prediction Barrier if supported */
+-	if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+-	    boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) {
+-		setup_force_cpu_cap(X86_FEATURE_IBPB);
++	if (boot_cpu_has(X86_FEATURE_IBPB)) {
++		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+ 		pr_info("Enabling Indirect Branch Prediction Barrier\n");
+ 	}
+ }
+@@ -307,7 +306,7 @@ ssize_t cpu_show_spectre_v2(struct devic
+ 		return sprintf(buf, "Not affected\n");
+ 
+ 	return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+-		       boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "",
++		       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ 		       spectre_v2_module_string());
+ }
+ #endif
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -140,17 +140,28 @@ static void early_init_intel(struct cpui
+ 		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
+ 	}
+ 
+-	if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
+-	     cpu_has(c, X86_FEATURE_STIBP) ||
+-	     cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) ||
+-	     cpu_has(c, X86_FEATURE_AMD_PRED_CMD) ||
+-	     cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) {
+-		pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n");
+-		clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
++	/*
++	 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
++	 * and they also have a different bit for STIBP support. Also,
++	 * a hypervisor might have set the individual AMD bits even on
++	 * Intel CPUs, for finer-grained selection of what's available.
++	 */
++	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
++		set_cpu_cap(c, X86_FEATURE_IBRS);
++		set_cpu_cap(c, X86_FEATURE_IBPB);
++	}
++	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
++		set_cpu_cap(c, X86_FEATURE_STIBP);
++
++	/* Now if any of them are set, check the blacklist and clear the lot */
++	if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
++	     cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
++		pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
++		clear_cpu_cap(c, X86_FEATURE_IBRS);
++		clear_cpu_cap(c, X86_FEATURE_IBPB);
+ 		clear_cpu_cap(c, X86_FEATURE_STIBP);
+-		clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL);
+-		clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD);
+-		clear_cpu_cap(c, X86_FEATURE_AMD_STIBP);
++		clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
++		clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP);
+ 	}
+ 
+ 	/*
diff --git a/queue/x86-cpufeatures-make-cpu-bugs-sticky.patch b/queue/x86-cpufeatures-make-cpu-bugs-sticky.patch
new file mode 100644
index 0000000..aa4b9e0
--- /dev/null
+++ b/queue/x86-cpufeatures-make-cpu-bugs-sticky.patch
@@ -0,0 +1,96 @@
+From 6cbd2171e89b13377261d15e64384df60ecb530e Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:32 +0100
+Subject: x86/cpufeatures: Make CPU bugs sticky
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 6cbd2171e89b13377261d15e64384df60ecb530e upstream.
+
+There is currently no way to force CPU bug bits like CPU feature bits. That
+makes it impossible to set a bug bit once at boot and have it stick for all
+upcoming CPUs.
+
+Extend the force set/clear arrays to handle bug bits as well.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeature.h |    2 ++
+ arch/x86/include/asm/processor.h  |    4 ++--
+ arch/x86/kernel/cpu/common.c      |    6 +++---
+ 3 files changed, 7 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -135,6 +135,8 @@ extern const char * const x86_bug_flags[
+ 	set_bit(bit, (unsigned long *)cpu_caps_set);	\
+ } while (0)
+ 
++#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
++
+ #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
+ /*
+  * Static testing of CPU features.  Used the same as boot_cpu_has().
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -156,8 +156,8 @@ extern struct cpuinfo_x86	boot_cpu_data;
+ extern struct cpuinfo_x86	new_cpu_data;
+ 
+ extern struct tss_struct	doublefault_tss;
+-extern __u32			cpu_caps_cleared[NCAPINTS];
+-extern __u32			cpu_caps_set[NCAPINTS];
++extern __u32			cpu_caps_cleared[NCAPINTS + NBUGINTS];
++extern __u32			cpu_caps_set[NCAPINTS + NBUGINTS];
+ 
+ #ifdef CONFIG_SMP
+ DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -480,8 +480,8 @@ static const char *table_lookup_model(st
+ 	return NULL;		/* Not found */
+ }
+ 
+-__u32 cpu_caps_cleared[NCAPINTS];
+-__u32 cpu_caps_set[NCAPINTS];
++__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
++__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
+ 
+ void load_percpu_segment(int cpu)
+ {
+@@ -710,7 +710,7 @@ static void apply_forced_caps(struct cpu
+ {
+ 	int i;
+ 
+-	for (i = 0; i < NCAPINTS; i++) {
++	for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
+ 		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ 		c->x86_capability[i] |= cpu_caps_set[i];
+ 	}
diff --git a/queue/x86-cpuid-fix-up-virtual-ibrs-ibpb-stibp-feature-bits-on-intel.patch b/queue/x86-cpuid-fix-up-virtual-ibrs-ibpb-stibp-feature-bits-on-intel.patch
new file mode 100644
index 0000000..7e942e5
--- /dev/null
+++ b/queue/x86-cpuid-fix-up-virtual-ibrs-ibpb-stibp-feature-bits-on-intel.patch
@@ -0,0 +1,122 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Tue, 30 Jan 2018 14:30:23 +0000
+Subject: x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature bits on Intel
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+
+(cherry picked from commit 7fcae1118f5fd44a862aa5c3525248e35ee67c3b)
+
+Despite the fact that all the other code there seems to be doing it, just
+using set_cpu_cap() in early_intel_init() doesn't actually work.
+
+For CPUs with PKU support, setup_pku() calls get_cpu_cap() after
+c->c_init() has set those feature bits. That resets those bits back to what
+was queried from the hardware.
+
+Turning the bits off for bad microcode is easy to fix. That can just use
+setup_clear_cpu_cap() to force them off for all CPUs.
+
+I was less keen on forcing the feature bits *on* that way, just in case
+of inconsistencies. I appreciate that the kernel is going to get this
+utterly wrong if CPU features are not consistent, because it has already
+applied alternatives by the time secondary CPUs are brought up.
+
+But at least if setup_force_cpu_cap() isn't being used, we might have a
+chance of *detecting* the lack of the corresponding bit and either
+panicking or refusing to bring the offending CPU online.
+
+So ensure that the appropriate feature bits are set within get_cpu_cap()
+regardless of how many extra times it's called.
+
+Fixes: 2961298e ("x86/cpufeatures: Clean up Spectre v2 related CPUID flags")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: karahmed@amazon.de
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Link: https://lkml.kernel.org/r/1517322623-15261-1-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |   21 +++++++++++++++++++++
+ arch/x86/kernel/cpu/intel.c  |   27 ++++++++-------------------
+ 2 files changed, 29 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -718,6 +718,26 @@ static void apply_forced_caps(struct cpu
+ 	}
+ }
+ 
++static void init_speculation_control(struct cpuinfo_x86 *c)
++{
++	/*
++	 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
++	 * and they also have a different bit for STIBP support. Also,
++	 * a hypervisor might have set the individual AMD bits even on
++	 * Intel CPUs, for finer-grained selection of what's available.
++	 *
++	 * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
++	 * features, which are visible in /proc/cpuinfo and used by the
++	 * kernel. So set those accordingly from the Intel bits.
++	 */
++	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
++		set_cpu_cap(c, X86_FEATURE_IBRS);
++		set_cpu_cap(c, X86_FEATURE_IBPB);
++	}
++	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
++		set_cpu_cap(c, X86_FEATURE_STIBP);
++}
++
+ void get_cpu_cap(struct cpuinfo_x86 *c)
+ {
+ 	u32 eax, ebx, ecx, edx;
+@@ -812,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ 		c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+ 
+ 	init_scattered_cpuid_features(c);
++	init_speculation_control(c);
+ }
+ 
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -140,28 +140,17 @@ static void early_init_intel(struct cpui
+ 		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
+ 	}
+ 
+-	/*
+-	 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
+-	 * and they also have a different bit for STIBP support. Also,
+-	 * a hypervisor might have set the individual AMD bits even on
+-	 * Intel CPUs, for finer-grained selection of what's available.
+-	 */
+-	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+-		set_cpu_cap(c, X86_FEATURE_IBRS);
+-		set_cpu_cap(c, X86_FEATURE_IBPB);
+-	}
+-	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
+-		set_cpu_cap(c, X86_FEATURE_STIBP);
+-
+ 	/* Now if any of them are set, check the blacklist and clear the lot */
+-	if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
++	if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
++	     cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
++	     cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
+ 	     cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
+ 		pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
+-		clear_cpu_cap(c, X86_FEATURE_IBRS);
+-		clear_cpu_cap(c, X86_FEATURE_IBPB);
+-		clear_cpu_cap(c, X86_FEATURE_STIBP);
+-		clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
+-		clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP);
++		setup_clear_cpu_cap(X86_FEATURE_IBRS);
++		setup_clear_cpu_cap(X86_FEATURE_IBPB);
++		setup_clear_cpu_cap(X86_FEATURE_STIBP);
++		setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
++		setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+ 	}
+ 
+ 	/*
diff --git a/queue/x86-documentation-add-pti-description.patch b/queue/x86-documentation-add-pti-description.patch
new file mode 100644
index 0000000..78b4ebd
--- /dev/null
+++ b/queue/x86-documentation-add-pti-description.patch
@@ -0,0 +1,261 @@
+From 01c9b17bf673b05bb401b76ec763e9730ccf1376 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Fri, 5 Jan 2018 09:44:36 -0800
+Subject: x86/Documentation: Add PTI description
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 01c9b17bf673b05bb401b76ec763e9730ccf1376 upstream.
+
+Add some details about how PTI works, what some of the downsides
+are, and how to debug it when things go wrong.
+
+Also document the kernel parameter: 'pti/nopti'.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+Cc: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+Cc: Richard Fellner <richard.fellner@student.tugraz.at>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Andi Lutomirsky <luto@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20180105174436.1BC6FA2B@viggo.jf.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/kernel-parameters.txt |   21 ++--
+ Documentation/x86/pti.txt           |  186 ++++++++++++++++++++++++++++++++++++
+ 2 files changed, 200 insertions(+), 7 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2763,8 +2763,6 @@ bytes respectively. Such letter suffixes
+ 
+ 	nojitter	[IA-64] Disables jitter checking for ITC timers.
+ 
+-	nopti		[X86-64] Disable KAISER isolation of kernel from user.
+-
+ 	no-kvmclock	[X86,KVM] Disable paravirtualized KVM clock driver
+ 
+ 	no-kvmapf	[X86,KVM] Disable paravirtualized asynchronous page
+@@ -3327,11 +3325,20 @@ bytes respectively. Such letter suffixes
+ 	pt.		[PARIDE]
+ 			See Documentation/blockdev/paride.txt.
+ 
+-	pti=		[X86_64]
+-			Control KAISER user/kernel address space isolation:
+-			on - enable
+-			off - disable
+-			auto - default setting
++	pti=		[X86_64] Control Page Table Isolation of user and
++			kernel address spaces.  Disabling this feature
++			removes hardening, but improves performance of
++			system calls and interrupts.
++
++			on   - unconditionally enable
++			off  - unconditionally disable
++			auto - kernel detects whether your CPU model is
++			       vulnerable to issues that PTI mitigates
++
++			Not specifying this option is equivalent to pti=auto.
++
++	nopti		[X86_64]
++			Equivalent to pti=off
+ 
+ 	pty.legacy_count=
+ 			[KNL] Number of legacy pty's. Overwrites compiled-in
+--- /dev/null
++++ b/Documentation/x86/pti.txt
+@@ -0,0 +1,186 @@
++Overview
++========
++
++Page Table Isolation (pti, previously known as KAISER[1]) is a
++countermeasure against attacks on the shared user/kernel address
++space such as the "Meltdown" approach[2].
++
++To mitigate this class of attacks, we create an independent set of
++page tables for use only when running userspace applications.  When
++the kernel is entered via syscalls, interrupts or exceptions, the
++page tables are switched to the full "kernel" copy.  When the system
++switches back to user mode, the user copy is used again.
++
++The userspace page tables contain only a minimal amount of kernel
++data: only what is needed to enter/exit the kernel such as the
++entry/exit functions themselves and the interrupt descriptor table
++(IDT).  There are a few strictly unnecessary things that get mapped
++such as the first C function when entering an interrupt (see
++comments in pti.c).
++
++This approach helps to ensure that side-channel attacks leveraging
++the paging structures do not function when PTI is enabled.  It can be
++enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
++Once enabled at compile-time, it can be disabled at boot with the
++'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
++
++Page Table Management
++=====================
++
++When PTI is enabled, the kernel manages two sets of page tables.
++The first set is very similar to the single set which is present in
++kernels without PTI.  This includes a complete mapping of userspace
++that the kernel can use for things like copy_to_user().
++
++Although _complete_, the user portion of the kernel page tables is
++crippled by setting the NX bit in the top level.  This ensures
++that any missed kernel->user CR3 switch will immediately crash
++userspace upon executing its first instruction.
++
++The userspace page tables map only the kernel data needed to enter
++and exit the kernel.  This data is entirely contained in the 'struct
++cpu_entry_area' structure which is placed in the fixmap which gives
++each CPU's copy of the area a compile-time-fixed virtual address.
++
++For new userspace mappings, the kernel makes the entries in its
++page tables like normal.  The only difference is when the kernel
++makes entries in the top (PGD) level.  In addition to setting the
++entry in the main kernel PGD, a copy of the entry is made in the
++userspace page tables' PGD.
++
++This sharing at the PGD level also inherently shares all the lower
++layers of the page tables.  This leaves a single, shared set of
++userspace page tables to manage.  One PTE to lock, one set of
++accessed bits, dirty bits, etc...
++
++Overhead
++========
++
++Protection against side-channel attacks is important.  But,
++this protection comes at a cost:
++
++1. Increased Memory Use
++  a. Each process now needs an order-1 PGD instead of order-0.
++     (Consumes an additional 4k per process).
++  b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
++     aligned so that it can be mapped by setting a single PMD
++     entry.  This consumes nearly 2MB of RAM once the kernel
++     is decompressed, but no space in the kernel image itself.
++
++2. Runtime Cost
++  a. CR3 manipulation to switch between the page table copies
++     must be done at interrupt, syscall, and exception entry
++     and exit (it can be skipped when the kernel is interrupted,
++     though.)  Moves to CR3 are on the order of a hundred
++     cycles, and are required at every entry and exit.
++  b. A "trampoline" must be used for SYSCALL entry.  This
++     trampoline depends on a smaller set of resources than the
++     non-PTI SYSCALL entry code, so requires mapping fewer
++     things into the userspace page tables.  The downside is
++     that stacks must be switched at entry time.
++  d. Global pages are disabled for all kernel structures not
++     mapped into both kernel and userspace page tables.  This
++     feature of the MMU allows different processes to share TLB
++     entries mapping the kernel.  Losing the feature means more
++     TLB misses after a context switch.  The actual loss of
++     performance is very small, however, never exceeding 1%.
++  d. Process Context IDentifiers (PCID) is a CPU feature that
++     allows us to skip flushing the entire TLB when switching page
++     tables by setting a special bit in CR3 when the page tables
++     are changed.  This makes switching the page tables (at context
++     switch, or kernel entry/exit) cheaper.  But, on systems with
++     PCID support, the context switch code must flush both the user
++     and kernel entries out of the TLB.  The user PCID TLB flush is
++     deferred until the exit to userspace, minimizing the cost.
++     See intel.com/sdm for the gory PCID/INVPCID details.
++  e. The userspace page tables must be populated for each new
++     process.  Even without PTI, the shared kernel mappings
++     are created by copying top-level (PGD) entries into each
++     new process.  But, with PTI, there are now *two* kernel
++     mappings: one in the kernel page tables that maps everything
++     and one for the entry/exit structures.  At fork(), we need to
++     copy both.
++  f. In addition to the fork()-time copying, there must also
++     be an update to the userspace PGD any time a set_pgd() is done
++     on a PGD used to map userspace.  This ensures that the kernel
++     and userspace copies always map the same userspace
++     memory.
++  g. On systems without PCID support, each CR3 write flushes
++     the entire TLB.  That means that each syscall, interrupt
++     or exception flushes the TLB.
++  h. INVPCID is a TLB-flushing instruction which allows flushing
++     of TLB entries for non-current PCIDs.  Some systems support
++     PCIDs, but do not support INVPCID.  On these systems, addresses
++     can only be flushed from the TLB for the current PCID.  When
++     flushing a kernel address, we need to flush all PCIDs, so a
++     single kernel address flush will require a TLB-flushing CR3
++     write upon the next use of every PCID.
++
++Possible Future Work
++====================
++1. We can be more careful about not actually writing to CR3
++   unless its value is actually changed.
++2. Allow PTI to be enabled/disabled at runtime in addition to the
++   boot-time switching.
++
++Testing
++========
++
++To test stability of PTI, the following test procedure is recommended,
++ideally doing all of these in parallel:
++
++1. Set CONFIG_DEBUG_ENTRY=y
++2. Run several copies of all of the tools/testing/selftests/x86/ tests
++   (excluding MPX and protection_keys) in a loop on multiple CPUs for
++   several minutes.  These tests frequently uncover corner cases in the
++   kernel entry code.  In general, old kernels might cause these tests
++   themselves to crash, but they should never crash the kernel.
++3. Run the 'perf' tool in a mode (top or record) that generates many
++   frequent performance monitoring non-maskable interrupts (see "NMI"
++   in /proc/interrupts).  This exercises the NMI entry/exit code which
++   is known to trigger bugs in code paths that did not expect to be
++   interrupted, including nested NMIs.  Using "-c" boosts the rate of
++   NMIs, and using two -c with separate counters encourages nested NMIs
++   and less deterministic behavior.
++
++	while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
++
++4. Launch a KVM virtual machine.
++5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
++   This has been a lightly-tested code path and needs extra scrutiny.
++
++Debugging
++=========
++
++Bugs in PTI cause a few different signatures of crashes
++that are worth noting here.
++
++ * Failures of the selftests/x86 code.  Usually a bug in one of the
++   more obscure corners of entry_64.S
++ * Crashes in early boot, especially around CPU bringup.  Bugs
++   in the trampoline code or mappings cause these.
++ * Crashes at the first interrupt.  Caused by bugs in entry_64.S,
++   like screwing up a page table switch.  Also caused by
++   incorrectly mapping the IRQ handler entry code.
++ * Crashes at the first NMI.  The NMI code is separate from main
++   interrupt handlers and can have bugs that do not affect
++   normal interrupts.  Also caused by incorrectly mapping NMI
++   code.  NMIs that interrupt the entry code must be very
++   careful and can be the cause of crashes that show up when
++   running perf.
++ * Kernel crashes at the first exit to userspace.  entry_64.S
++   bugs, or failing to map some of the exit code.
++ * Crashes at first interrupt that interrupts userspace. The paths
++   in entry_64.S that return to userspace are sometimes separate
++   from the ones that return to the kernel.
++ * Double faults: overflowing the kernel stack because of page
++   faults upon page faults.  Caused by touching non-pti-mapped
++   data in the entry code, or forgetting to switch to kernel
++   CR3 before calling into C functions which are not pti-mapped.
++ * Userspace segfaults early in boot, sometimes manifesting
++   as mount(8) failing to mount the rootfs.  These have
++   tended to be TLB invalidation issues.  Usually invalidating
++   the wrong PCID, or otherwise missing an invalidation.
++
++1. https://gruss.cc/files/kaiser.pdf
++2. https://meltdownattack.com/meltdown.pdf
diff --git a/queue/x86-entry-64-push-extra-regs-right-away.patch b/queue/x86-entry-64-push-extra-regs-right-away.patch
new file mode 100644
index 0000000..e80f751
--- /dev/null
+++ b/queue/x86-entry-64-push-extra-regs-right-away.patch
@@ -0,0 +1,46 @@
+From foo@baz Thu Feb  8 03:30:27 CET 2018
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 Jan 2018 10:38:49 -0800
+Subject: x86/entry/64: Push extra regs right away
+
+From: Andy Lutomirski <luto@kernel.org>
+
+(cherry picked from commit d1f7732009e0549eedf8ea1db948dc37be77fd46)
+
+With the fast path removed there is no point in splitting the push of the
+normal and the extra register set. Just push the extra regs right away.
+
+[ tglx: Split out from 'x86/entry/64: Remove the SYSCALL64 fast path' ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
+Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -177,10 +177,14 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
+ 	pushq	%r9				/* pt_regs->r9 */
+ 	pushq	%r10				/* pt_regs->r10 */
+ 	pushq	%r11				/* pt_regs->r11 */
+-	sub	$(6*8), %rsp			/* pt_regs->bp, bx, r12-15 not saved */
++	pushq	%rbx				/* pt_regs->rbx */
++	pushq	%rbp				/* pt_regs->rbp */
++	pushq	%r12				/* pt_regs->r12 */
++	pushq	%r13				/* pt_regs->r13 */
++	pushq	%r14				/* pt_regs->r14 */
++	pushq	%r15				/* pt_regs->r15 */
+ 
+ 	/* IRQs are off. */
+-	SAVE_EXTRA_REGS
+ 	movq	%rsp, %rdi
+ 	call	do_syscall_64		/* returns with IRQs disabled */
+ 
diff --git a/queue/x86-entry-64-remove-the-syscall64-fast-path.patch b/queue/x86-entry-64-remove-the-syscall64-fast-path.patch
new file mode 100644
index 0000000..daa62bd
--- /dev/null
+++ b/queue/x86-entry-64-remove-the-syscall64-fast-path.patch
@@ -0,0 +1,202 @@
+From foo@baz Thu Feb  8 03:30:27 CET 2018
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 Jan 2018 10:38:49 -0800
+Subject: x86/entry/64: Remove the SYSCALL64 fast path
+
+From: Andy Lutomirski <luto@kernel.org>
+
+(cherry picked from commit 21d375b6b34ff511a507de27bf316b3dde6938d9)
+
+The SYCALLL64 fast path was a nice, if small, optimization back in the good
+old days when syscalls were actually reasonably fast.  Now there is PTI to
+slow everything down, and indirect branches are verboten, making everything
+messier.  The retpoline code in the fast path is particularly nasty.
+
+Just get rid of the fast path. The slow path is barely slower.
+
+[ tglx: Split out the 'push all extra regs' part ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
+Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S   |  123 --------------------------------------------
+ arch/x86/entry/syscall_64.c |    7 --
+ 2 files changed, 3 insertions(+), 127 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -179,94 +179,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
+ 	pushq	%r11				/* pt_regs->r11 */
+ 	sub	$(6*8), %rsp			/* pt_regs->bp, bx, r12-15 not saved */
+ 
+-	/*
+-	 * If we need to do entry work or if we guess we'll need to do
+-	 * exit work, go straight to the slow path.
+-	 */
+-	movq	PER_CPU_VAR(current_task), %r11
+-	testl	$_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
+-	jnz	entry_SYSCALL64_slow_path
+-
+-entry_SYSCALL_64_fastpath:
+-	/*
+-	 * Easy case: enable interrupts and issue the syscall.  If the syscall
+-	 * needs pt_regs, we'll call a stub that disables interrupts again
+-	 * and jumps to the slow path.
+-	 */
+-	TRACE_IRQS_ON
+-	ENABLE_INTERRUPTS(CLBR_NONE)
+-#if __SYSCALL_MASK == ~0
+-	cmpq	$__NR_syscall_max, %rax
+-#else
+-	andl	$__SYSCALL_MASK, %eax
+-	cmpl	$__NR_syscall_max, %eax
+-#endif
+-	ja	1f				/* return -ENOSYS (already in pt_regs->ax) */
+-	movq	%r10, %rcx
+-
+-	/*
+-	 * This call instruction is handled specially in stub_ptregs_64.
+-	 * It might end up jumping to the slow path.  If it jumps, RAX
+-	 * and all argument registers are clobbered.
+-	 */
+-#ifdef CONFIG_RETPOLINE
+-	movq	sys_call_table(, %rax, 8), %rax
+-	call	__x86_indirect_thunk_rax
+-#else
+-	call	*sys_call_table(, %rax, 8)
+-#endif
+-.Lentry_SYSCALL_64_after_fastpath_call:
+-
+-	movq	%rax, RAX(%rsp)
+-1:
+-
+-	/*
+-	 * If we get here, then we know that pt_regs is clean for SYSRET64.
+-	 * If we see that no exit work is required (which we are required
+-	 * to check with IRQs off), then we can go straight to SYSRET64.
+-	 */
+-	DISABLE_INTERRUPTS(CLBR_NONE)
+-	TRACE_IRQS_OFF
+-	movq	PER_CPU_VAR(current_task), %r11
+-	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
+-	jnz	1f
+-
+-	LOCKDEP_SYS_EXIT
+-	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
+-	movq	RIP(%rsp), %rcx
+-	movq	EFLAGS(%rsp), %r11
+-	RESTORE_C_REGS_EXCEPT_RCX_R11
+-	/*
+-	 * This opens a window where we have a user CR3, but are
+-	 * running in the kernel.  This makes using the CS
+-	 * register useless for telling whether or not we need to
+-	 * switch CR3 in NMIs.  Normal interrupts are OK because
+-	 * they are off here.
+-	 */
+-	SWITCH_USER_CR3
+-	movq	RSP(%rsp), %rsp
+-	USERGS_SYSRET64
+-
+-1:
+-	/*
+-	 * The fast path looked good when we started, but something changed
+-	 * along the way and we need to switch to the slow path.  Calling
+-	 * raise(3) will trigger this, for example.  IRQs are off.
+-	 */
+-	TRACE_IRQS_ON
+-	ENABLE_INTERRUPTS(CLBR_NONE)
+-	SAVE_EXTRA_REGS
+-	movq	%rsp, %rdi
+-	call	syscall_return_slowpath	/* returns with IRQs disabled */
+-	jmp	return_from_SYSCALL_64
+-
+-entry_SYSCALL64_slow_path:
+ 	/* IRQs are off. */
+ 	SAVE_EXTRA_REGS
+ 	movq	%rsp, %rdi
+ 	call	do_syscall_64		/* returns with IRQs disabled */
+ 
+-return_from_SYSCALL_64:
+ 	RESTORE_EXTRA_REGS
+ 	TRACE_IRQS_IRETQ		/* we're about to change IF */
+ 
+@@ -339,6 +256,7 @@ return_from_SYSCALL_64:
+ syscall_return_via_sysret:
+ 	/* rcx and r11 are already restored (see code above) */
+ 	RESTORE_C_REGS_EXCEPT_RCX_R11
++
+ 	/*
+ 	 * This opens a window where we have a user CR3, but are
+ 	 * running in the kernel.  This makes using the CS
+@@ -363,45 +281,6 @@ opportunistic_sysret_failed:
+ 	jmp	restore_c_regs_and_iret
+ END(entry_SYSCALL_64)
+ 
+-ENTRY(stub_ptregs_64)
+-	/*
+-	 * Syscalls marked as needing ptregs land here.
+-	 * If we are on the fast path, we need to save the extra regs,
+-	 * which we achieve by trying again on the slow path.  If we are on
+-	 * the slow path, the extra regs are already saved.
+-	 *
+-	 * RAX stores a pointer to the C function implementing the syscall.
+-	 * IRQs are on.
+-	 */
+-	cmpq	$.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
+-	jne	1f
+-
+-	/*
+-	 * Called from fast path -- disable IRQs again, pop return address
+-	 * and jump to slow path
+-	 */
+-	DISABLE_INTERRUPTS(CLBR_NONE)
+-	TRACE_IRQS_OFF
+-	popq	%rax
+-	jmp	entry_SYSCALL64_slow_path
+-
+-1:
+-	JMP_NOSPEC %rax				/* Called from C */
+-END(stub_ptregs_64)
+-
+-.macro ptregs_stub func
+-ENTRY(ptregs_\func)
+-	leaq	\func(%rip), %rax
+-	jmp	stub_ptregs_64
+-END(ptregs_\func)
+-.endm
+-
+-/* Instantiate ptregs_stub for each ptregs-using syscall */
+-#define __SYSCALL_64_QUAL_(sym)
+-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+-#include <asm/syscalls_64.h>
+-
+ /*
+  * %rdi: prev task
+  * %rsi: next task
+--- a/arch/x86/entry/syscall_64.c
++++ b/arch/x86/entry/syscall_64.c
+@@ -6,14 +6,11 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/syscall.h>
+ 
+-#define __SYSCALL_64_QUAL_(sym) sym
+-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
+-
+-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
++#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+ #include <asm/syscalls_64.h>
+ #undef __SYSCALL_64
+ 
+-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
++#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
+ 
+ extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+ 
diff --git a/queue/x86-get_user-use-pointer-masking-to-limit-speculation.patch b/queue/x86-get_user-use-pointer-masking-to-limit-speculation.patch
new file mode 100644
index 0000000..e81910d
--- /dev/null
+++ b/queue/x86-get_user-use-pointer-masking-to-limit-speculation.patch
@@ -0,0 +1,98 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:54 -0800
+Subject: x86/get_user: Use pointer masking to limit speculation
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit c7f631cb07e7da06ac1d231ca178452339e32a94)
+
+Quoting Linus:
+
+    I do think that it would be a good idea to very expressly document
+    the fact that it's not that the user access itself is unsafe. I do
+    agree that things like "get_user()" want to be protected, but not
+    because of any direct bugs or problems with get_user() and friends,
+    but simply because get_user() is an excellent source of a pointer
+    that is obviously controlled from a potentially attacking user
+    space. So it's a prime candidate for then finding _subsequent_
+    accesses that can then be used to perturb the cache.
+
+Unlike the __get_user() case get_user() includes the address limit check
+near the pointer de-reference. With that locality the speculation can be
+mitigated with pointer narrowing rather than a barrier, i.e.
+array_index_nospec(). Where the narrowing is performed by:
+
+	cmp %limit, %ptr
+	sbb %mask, %mask
+	and %mask, %ptr
+
+With respect to speculation the value of %ptr is either less than %limit
+or NULL.
+
+Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: Kees Cook <keescook@chromium.org>
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: torvalds@linux-foundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727417469.33451.11804043010080838495.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/getuser.S |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/x86/lib/getuser.S
++++ b/arch/x86/lib/getuser.S
+@@ -39,6 +39,8 @@ ENTRY(__get_user_1)
+ 	mov PER_CPU_VAR(current_task), %_ASM_DX
+ 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ 	jae bad_get_user
++	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
++	and %_ASM_DX, %_ASM_AX
+ 	ASM_STAC
+ 1:	movzbl (%_ASM_AX),%edx
+ 	xor %eax,%eax
+@@ -53,6 +55,8 @@ ENTRY(__get_user_2)
+ 	mov PER_CPU_VAR(current_task), %_ASM_DX
+ 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ 	jae bad_get_user
++	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
++	and %_ASM_DX, %_ASM_AX
+ 	ASM_STAC
+ 2:	movzwl -1(%_ASM_AX),%edx
+ 	xor %eax,%eax
+@@ -67,6 +71,8 @@ ENTRY(__get_user_4)
+ 	mov PER_CPU_VAR(current_task), %_ASM_DX
+ 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ 	jae bad_get_user
++	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
++	and %_ASM_DX, %_ASM_AX
+ 	ASM_STAC
+ 3:	movl -3(%_ASM_AX),%edx
+ 	xor %eax,%eax
+@@ -82,6 +88,8 @@ ENTRY(__get_user_8)
+ 	mov PER_CPU_VAR(current_task), %_ASM_DX
+ 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ 	jae bad_get_user
++	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
++	and %_ASM_DX, %_ASM_AX
+ 	ASM_STAC
+ 4:	movq -7(%_ASM_AX),%rdx
+ 	xor %eax,%eax
+@@ -93,6 +101,8 @@ ENTRY(__get_user_8)
+ 	mov PER_CPU_VAR(current_task), %_ASM_DX
+ 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ 	jae bad_get_user_8
++	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
++	and %_ASM_DX, %_ASM_AX
+ 	ASM_STAC
+ 4:	movl -7(%_ASM_AX),%edx
+ 5:	movl -3(%_ASM_AX),%ecx
diff --git a/queue/x86-implement-array_index_mask_nospec.patch b/queue/x86-implement-array_index_mask_nospec.patch
new file mode 100644
index 0000000..23b2cba
--- /dev/null
+++ b/queue/x86-implement-array_index_mask_nospec.patch
@@ -0,0 +1,66 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:28 -0800
+Subject: x86: Implement array_index_mask_nospec
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit babdde2698d482b6c0de1eab4f697cf5856c5859)
+
+array_index_nospec() uses a mask to sanitize user controllable array
+indexes, i.e. generate a 0 mask if 'index' >= 'size', and a ~0 mask
+otherwise. While the default array_index_mask_nospec() handles the
+carry-bit from the (index - size) result in software.
+
+The x86 array_index_mask_nospec() does the same, but the carry-bit is
+handled in the processor CF flag without conditional instructions in the
+control flow.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727414808.33451.1873237130672785331.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/barrier.h |   24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -23,6 +23,30 @@
+ #define wmb()	asm volatile("sfence" ::: "memory")
+ #endif
+ 
++/**
++ * array_index_mask_nospec() - generate a mask that is ~0UL when the
++ * 	bounds check succeeds and 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * Returns:
++ *     0 - (index < size)
++ */
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++		unsigned long size)
++{
++	unsigned long mask;
++
++	asm ("cmp %1,%2; sbb %0,%0;"
++			:"=r" (mask)
++			:"r"(size),"r" (index)
++			:"cc");
++	return mask;
++}
++
++/* Override the default implementation from linux/nospec.h. */
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #ifdef CONFIG_X86_PPRO_FENCE
+ #define dma_rmb()	rmb()
+ #else
diff --git a/queue/x86-introduce-__uaccess_begin_nospec-and-uaccess_try_nospec.patch b/queue/x86-introduce-__uaccess_begin_nospec-and-uaccess_try_nospec.patch
new file mode 100644
index 0000000..885922d
--- /dev/null
+++ b/queue/x86-introduce-__uaccess_begin_nospec-and-uaccess_try_nospec.patch
@@ -0,0 +1,80 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:39 -0800
+Subject: x86: Introduce __uaccess_begin_nospec() and uaccess_try_nospec
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit b3bbfb3fb5d25776b8e3f361d2eedaabb0b496cd)
+
+For __get_user() paths, do not allow the kernel to speculate on the value
+of a user controlled pointer. In addition to the 'stac' instruction for
+Supervisor Mode Access Protection (SMAP), a barrier_nospec() causes the
+access_ok() result to resolve in the pipeline before the CPU might take any
+speculative action on the pointer value. Given the cost of 'stac' the
+speculation barrier is placed after 'stac' to hopefully overlap the cost of
+disabling SMAP with the cost of flushing the instruction pipeline.
+
+Since __get_user is a major kernel interface that deals with user
+controlled pointers, the __uaccess_begin_nospec() mechanism will prevent
+speculative execution past an access_ok() permission check. While
+speculative execution past access_ok() is not enough to lead to a kernel
+memory leak, it is a necessary precondition.
+
+To be clear, __uaccess_begin_nospec() is addressing a class of potential
+problems near __get_user() usages.
+
+Note, that while the barrier_nospec() in __uaccess_begin_nospec() is used
+to protect __get_user(), pointer masking similar to array_index_nospec()
+will be used for get_user() since it incorporates a bounds check near the
+usage.
+
+uaccess_try_nospec provides the same mechanism for get_user_try.
+
+No functional changes.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Suggested-by: Andi Kleen <ak@linux.intel.com>
+Suggested-by: Ingo Molnar <mingo@redhat.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727415922.33451.5796614273104346583.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/uaccess.h |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -123,6 +123,11 @@ extern int __get_user_bad(void);
+ 
+ #define __uaccess_begin() stac()
+ #define __uaccess_end()   clac()
++#define __uaccess_begin_nospec()	\
++({					\
++	stac();				\
++	barrier_nospec();		\
++})
+ 
+ /*
+  * This is a type: either unsigned long, if the argument fits into
+@@ -474,6 +479,10 @@ struct __large_struct { unsigned long bu
+ 	__uaccess_begin();						\
+ 	barrier();
+ 
++#define uaccess_try_nospec do {						\
++	current->thread.uaccess_err = 0;				\
++	__uaccess_begin_nospec();					\
++
+ #define uaccess_catch(err)						\
+ 	__uaccess_end();						\
+ 	(err) |= (current->thread.uaccess_err ? -EFAULT : 0);		\
diff --git a/queue/x86-introduce-barrier_nospec.patch b/queue/x86-introduce-barrier_nospec.patch
new file mode 100644
index 0000000..ae6aa42
--- /dev/null
+++ b/queue/x86-introduce-barrier_nospec.patch
@@ -0,0 +1,66 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:33 -0800
+Subject: x86: Introduce barrier_nospec
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit b3d7ad85b80bbc404635dca80f5b129f6242bc7a)
+
+Rename the open coded form of this instruction sequence from
+rdtsc_ordered() into a generic barrier primitive, barrier_nospec().
+
+One of the mitigations for Spectre variant1 vulnerabilities is to fence
+speculative execution after successfully validating a bounds check. I.e.
+force the result of a bounds check to resolve in the instruction pipeline
+to ensure speculative execution honors that result before potentially
+operating on out-of-bounds data.
+
+No functional changes.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Suggested-by: Andi Kleen <ak@linux.intel.com>
+Suggested-by: Ingo Molnar <mingo@redhat.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727415361.33451.9049453007262764675.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/barrier.h |    4 ++++
+ arch/x86/include/asm/msr.h     |    3 +--
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -47,6 +47,10 @@ static inline unsigned long array_index_
+ /* Override the default implementation from linux/nospec.h. */
+ #define array_index_mask_nospec array_index_mask_nospec
+ 
++/* Prevent speculative execution past this barrier. */
++#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
++					   "lfence", X86_FEATURE_LFENCE_RDTSC)
++
+ #ifdef CONFIG_X86_PPRO_FENCE
+ #define dma_rmb()	rmb()
+ #else
+--- a/arch/x86/include/asm/msr.h
++++ b/arch/x86/include/asm/msr.h
+@@ -188,8 +188,7 @@ static __always_inline unsigned long lon
+ 	 * that some other imaginary CPU is updating continuously with a
+ 	 * time stamp.
+ 	 */
+-	alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+-			  "lfence", X86_FEATURE_LFENCE_RDTSC);
++	barrier_nospec();
+ 	return rdtsc();
+ }
+ 
diff --git a/queue/x86-kaiser-check-boottime-cmdline-params.patch b/queue/x86-kaiser-check-boottime-cmdline-params.patch
new file mode 100644
index 0000000..3476e16
--- /dev/null
+++ b/queue/x86-kaiser-check-boottime-cmdline-params.patch
@@ -0,0 +1,123 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 2 Jan 2018 14:19:48 +0100
+Subject: x86/kaiser: Check boottime cmdline params
+
+From: Borislav Petkov <bp@suse.de>
+
+
+AMD (and possibly other vendors) are not affected by the leak
+KAISER is protecting against.
+
+Keep the "nopti" for traditional reasons and add pti=<on|off|auto>
+like upstream.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt |    6 +++
+ arch/x86/mm/kaiser.c                |   59 +++++++++++++++++++++++++-----------
+ 2 files changed, 47 insertions(+), 18 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -3327,6 +3327,12 @@ bytes respectively. Such letter suffixes
+ 	pt.		[PARIDE]
+ 			See Documentation/blockdev/paride.txt.
+ 
++	pti=		[X86_64]
++			Control KAISER user/kernel address space isolation:
++			on - enable
++			off - disable
++			auto - default setting
++
+ 	pty.legacy_count=
+ 			[KNL] Number of legacy pty's. Overwrites compiled-in
+ 			default number.
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -15,6 +15,7 @@
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
++#include <asm/cmdline.h>
+ 
+ int kaiser_enabled __read_mostly = 1;
+ EXPORT_SYMBOL(kaiser_enabled);	/* for inlined TLB flush functions */
+@@ -263,6 +264,43 @@ static void __init kaiser_init_all_pgds(
+ 	WARN_ON(__ret);							\
+ } while (0)
+ 
++void __init kaiser_check_boottime_disable(void)
++{
++	bool enable = true;
++	char arg[5];
++	int ret;
++
++	ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
++	if (ret > 0) {
++		if (!strncmp(arg, "on", 2))
++			goto enable;
++
++		if (!strncmp(arg, "off", 3))
++			goto disable;
++
++		if (!strncmp(arg, "auto", 4))
++			goto skip;
++	}
++
++	if (cmdline_find_option_bool(boot_command_line, "nopti"))
++		goto disable;
++
++skip:
++	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
++		goto disable;
++
++enable:
++	if (enable)
++		setup_force_cpu_cap(X86_FEATURE_KAISER);
++
++	return;
++
++disable:
++	pr_info("Kernel/User page tables isolation: disabled\n");
++	kaiser_enabled = 0;
++	setup_clear_cpu_cap(X86_FEATURE_KAISER);
++}
++
+ /*
+  * If anything in here fails, we will likely die on one of the
+  * first kernel->user transitions and init will die.  But, we
+@@ -274,12 +312,10 @@ void __init kaiser_init(void)
+ {
+ 	int cpu;
+ 
+-	if (!kaiser_enabled) {
+-		setup_clear_cpu_cap(X86_FEATURE_KAISER);
+-		return;
+-	}
++	kaiser_check_boottime_disable();
+ 
+-	setup_force_cpu_cap(X86_FEATURE_KAISER);
++	if (!kaiser_enabled)
++		return;
+ 
+ 	kaiser_init_all_pgds();
+ 
+@@ -423,16 +459,3 @@ void kaiser_flush_tlb_on_return_to_user(
+ 			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+-
+-static int __init x86_nokaiser_setup(char *s)
+-{
+-	/* nopti doesn't accept parameters */
+-	if (s)
+-		return -EINVAL;
+-
+-	kaiser_enabled = 0;
+-	pr_info("Kernel/User page tables isolation: disabled\n");
+-
+-	return 0;
+-}
+-early_param("nopti", x86_nokaiser_setup);
diff --git a/queue/x86-kaiser-move-feature-detection-up.patch b/queue/x86-kaiser-move-feature-detection-up.patch
new file mode 100644
index 0000000..5d61f21
--- /dev/null
+++ b/queue/x86-kaiser-move-feature-detection-up.patch
@@ -0,0 +1,79 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 25 Dec 2017 13:57:16 +0100
+Subject: x86/kaiser: Move feature detection up
+
+From: Borislav Petkov <bp@suse.de>
+
+
+... before the first use of kaiser_enabled as otherwise funky
+things happen:
+
+  about to get started...
+  (XEN) d0v0 Unhandled page fault fault/trap [#14, ec=0000]
+  (XEN) Pagetable walk from ffff88022a449090:
+  (XEN)  L4[0x110] = 0000000229e0e067 0000000000001e0e
+  (XEN)  L3[0x008] = 0000000000000000 ffffffffffffffff
+  (XEN) domain_crash_sync called from entry.S: fault at ffff82d08033fd08
+  entry.o#create_bounce_frame+0x135/0x14d
+  (XEN) Domain 0 (vcpu#0) crashed on cpu#0:
+  (XEN) ----[ Xen-4.9.1_02-3.21  x86_64  debug=n   Not tainted ]----
+  (XEN) CPU:    0
+  (XEN) RIP:    e033:[<ffffffff81007460>]
+  (XEN) RFLAGS: 0000000000000286   EM: 1   CONTEXT: pv guest (d0v0)
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kaiser.h |    2 ++
+ arch/x86/kernel/setup.c       |    7 +++++++
+ arch/x86/mm/kaiser.c          |    2 --
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -96,8 +96,10 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_p
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+ 
+ extern int kaiser_enabled;
++extern void __init kaiser_check_boottime_disable(void);
+ #else
+ #define kaiser_enabled	0
++static inline void __init kaiser_check_boottime_disable(void) {}
+ #endif /* CONFIG_KAISER */
+ 
+ /*
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -114,6 +114,7 @@
+ #include <asm/microcode.h>
+ #include <asm/mmu_context.h>
+ #include <asm/kaslr.h>
++#include <asm/kaiser.h>
+ 
+ /*
+  * max_low_pfn_mapped: highest direct mapped pfn under 4GB
+@@ -1019,6 +1020,12 @@ void __init setup_arch(char **cmdline_p)
+ 	 */
+ 	init_hypervisor_platform();
+ 
++	/*
++	 * This needs to happen right after XENPV is set on xen and
++	 * kaiser_enabled is checked below in cleanup_highmap().
++	 */
++	kaiser_check_boottime_disable();
++
+ 	x86_init.resources.probe_roms();
+ 
+ 	/* after parse_early_param, so could debug it */
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -310,8 +310,6 @@ void __init kaiser_init(void)
+ {
+ 	int cpu;
+ 
+-	kaiser_check_boottime_disable();
+-
+ 	if (!kaiser_enabled)
+ 		return;
+ 
diff --git a/queue/x86-kaiser-reenable-paravirt.patch b/queue/x86-kaiser-reenable-paravirt.patch
new file mode 100644
index 0000000..d081b61
--- /dev/null
+++ b/queue/x86-kaiser-reenable-paravirt.patch
@@ -0,0 +1,28 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 2 Jan 2018 14:19:49 +0100
+Subject: x86/kaiser: Reenable PARAVIRT
+
+From: Borislav Petkov <bp@suse.de>
+
+
+Now that the required bits have been addressed, reenable
+PARAVIRT.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/Kconfig |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -34,7 +34,7 @@ config SECURITY
+ config KAISER
+ 	bool "Remove the kernel mapping in user mode"
+ 	default y
+-	depends on X86_64 && SMP && !PARAVIRT
++	depends on X86_64 && SMP
+ 	help
+ 	  This enforces a strict kernel and user space isolation, in order
+ 	  to close hardware side channels on kernel address information.
diff --git a/queue/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch b/queue/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch
new file mode 100644
index 0000000..804ffac
--- /dev/null
+++ b/queue/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch
@@ -0,0 +1,97 @@
+From foo@baz Wed Jan  3 20:37:21 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 2 Jan 2018 14:19:48 +0100
+Subject: x86/kaiser: Rename and simplify X86_FEATURE_KAISER handling
+
+From: Borislav Petkov <bp@suse.de>
+
+
+Concentrate it in arch/x86/mm/kaiser.c and use the upstream string "nopti".
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt |    2 +-
+ arch/x86/kernel/cpu/common.c        |   18 ------------------
+ arch/x86/mm/kaiser.c                |   20 +++++++++++++++++++-
+ 3 files changed, 20 insertions(+), 20 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2763,7 +2763,7 @@ bytes respectively. Such letter suffixes
+ 
+ 	nojitter	[IA-64] Disables jitter checking for ITC timers.
+ 
+-	nokaiser	[X86-64] Disable KAISER isolation of kernel from user.
++	nopti		[X86-64] Disable KAISER isolation of kernel from user.
+ 
+ 	no-kvmclock	[X86,KVM] Disable paravirtualized KVM clock driver
+ 
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -179,20 +179,6 @@ static int __init x86_pcid_setup(char *s
+ 	return 1;
+ }
+ __setup("nopcid", x86_pcid_setup);
+-
+-static int __init x86_nokaiser_setup(char *s)
+-{
+-	/* nokaiser doesn't accept parameters */
+-	if (s)
+-		return -EINVAL;
+-#ifdef CONFIG_KAISER
+-	kaiser_enabled = 0;
+-	setup_clear_cpu_cap(X86_FEATURE_KAISER);
+-	pr_info("nokaiser: KAISER feature disabled\n");
+-#endif
+-	return 0;
+-}
+-early_param("nokaiser", x86_nokaiser_setup);
+ #endif
+ 
+ static int __init x86_noinvpcid_setup(char *s)
+@@ -813,10 +799,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ 		c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+ 
+ 	init_scattered_cpuid_features(c);
+-#ifdef CONFIG_KAISER
+-	if (kaiser_enabled)
+-		set_cpu_cap(c, X86_FEATURE_KAISER);
+-#endif
+ }
+ 
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -274,8 +274,13 @@ void __init kaiser_init(void)
+ {
+ 	int cpu;
+ 
+-	if (!kaiser_enabled)
++	if (!kaiser_enabled) {
++		setup_clear_cpu_cap(X86_FEATURE_KAISER);
+ 		return;
++	}
++
++	setup_force_cpu_cap(X86_FEATURE_KAISER);
++
+ 	kaiser_init_all_pgds();
+ 
+ 	for_each_possible_cpu(cpu) {
+@@ -418,3 +423,16 @@ void kaiser_flush_tlb_on_return_to_user(
+ 			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
++
++static int __init x86_nokaiser_setup(char *s)
++{
++	/* nopti doesn't accept parameters */
++	if (s)
++		return -EINVAL;
++
++	kaiser_enabled = 0;
++	pr_info("Kernel/User page tables isolation: disabled\n");
++
++	return 0;
++}
++early_param("nopti", x86_nokaiser_setup);
diff --git a/queue/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch b/queue/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
new file mode 100644
index 0000000..f7b9b05
--- /dev/null
+++ b/queue/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
@@ -0,0 +1,62 @@
+From b8b7abaed7a49b350f8ba659ddc264b04931d581 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 17 Sep 2017 09:03:50 -0700
+Subject: x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit b8b7abaed7a49b350f8ba659ddc264b04931d581 upstream.
+
+Otherwise we might have the PCID feature bit set during cpu_init().
+
+This is just for robustness.  I haven't seen any actual bugs here.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels")
+Link: http://lkml.kernel.org/r/b16dae9d6b0db5d9801ddbebbfd83384097c61f3.1505663533.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c   |    8 --------
+ arch/x86/kernel/cpu/common.c |    8 ++++++++
+ 2 files changed, 8 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -22,14 +22,6 @@
+ 
+ void __init check_bugs(void)
+ {
+-#ifdef CONFIG_X86_32
+-	/*
+-	 * Regardless of whether PCID is enumerated, the SDM says
+-	 * that it can't be enabled in 32-bit mode.
+-	 */
+-	setup_clear_cpu_cap(X86_FEATURE_PCID);
+-#endif
+-
+ 	identify_boot_cpu();
+ 
+ 	if (!IS_ENABLED(CONFIG_SMP)) {
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -890,6 +890,14 @@ static void __init early_identify_cpu(st
+ 	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ 
+ 	fpu__init_system(c);
++
++#ifdef CONFIG_X86_32
++	/*
++	 * Regardless of whether PCID is enumerated, the SDM says
++	 * that it can't be enabled in 32-bit mode.
++	 */
++	setup_clear_cpu_cap(X86_FEATURE_PCID);
++#endif
+ }
+ 
+ void __init early_cpu_init(void)
diff --git a/queue/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch b/queue/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
new file mode 100644
index 0000000..d110708
--- /dev/null
+++ b/queue/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
@@ -0,0 +1,43 @@
+From 924c6b900cfdf376b07bccfd80e62b21914f8a5a Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 8 Oct 2017 21:53:05 -0700
+Subject: x86/mm/64: Fix reboot interaction with CR4.PCIDE
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 924c6b900cfdf376b07bccfd80e62b21914f8a5a upstream.
+
+Trying to reboot via real mode fails with PCID on: long mode cannot
+be exited while CR4.PCIDE is set.  (No, I have no idea why, but the
+SDM and actual CPUs are in agreement here.)  The result is a GPF and
+a hang instead of a reboot.
+
+I didn't catch this in testing because neither my computer nor my VM
+reboots this way.  I can trigger it with reboot=bios, though.
+
+Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems")
+Reported-and-tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Link: https://lkml.kernel.org/r/f1e7d965998018450a7a70c2823873686a8b21c0.1507524746.git.luto@kernel.org
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/reboot.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kernel/reboot.c
++++ b/arch/x86/kernel/reboot.c
+@@ -106,6 +106,10 @@ void __noreturn machine_real_restart(uns
+ 	load_cr3(initial_page_table);
+ #else
+ 	write_cr3(real_mode_header->trampoline_pgd);
++
++	/* Exiting long mode will fail if CR4.PCIDE is set. */
++	if (static_cpu_has(X86_FEATURE_PCID))
++		cr4_clear_bits(X86_CR4_PCIDE);
+ #endif
+ 
+ 	/* Jump to the identity-mapped low memory code */
diff --git a/queue/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch b/queue/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
new file mode 100644
index 0000000..4bbaccd
--- /dev/null
+++ b/queue/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
@@ -0,0 +1,74 @@
+From 0790c9aad84901ca1bdc14746175549c8b5da215 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:20 -0700
+Subject: x86/mm: Add the 'nopcid' boot option to turn off PCID
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 0790c9aad84901ca1bdc14746175549c8b5da215 upstream.
+
+The parameter is only present on x86_64 systems to save a few bytes,
+as PCID is always disabled on x86_32.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ Documentation/kernel-parameters.txt |    2 ++
+ arch/x86/kernel/cpu/common.c        |   18 ++++++++++++++++++
+ 2 files changed, 20 insertions(+)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2795,6 +2795,8 @@ bytes respectively. Such letter suffixes
+ 	nopat		[X86] Disable PAT (page attribute table extension of
+ 			pagetables) support.
+ 
++	nopcid		[X86-64] Disable the PCID cpu feature.
++
+ 	norandmaps	Don't use address space randomization.  Equivalent to
+ 			echo 0 > /proc/sys/kernel/randomize_va_space
+ 
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -163,6 +163,24 @@ static int __init x86_mpx_setup(char *s)
+ }
+ __setup("nompx", x86_mpx_setup);
+ 
++#ifdef CONFIG_X86_64
++static int __init x86_pcid_setup(char *s)
++{
++	/* require an exact match without trailing characters */
++	if (strlen(s))
++		return 0;
++
++	/* do not emit a message if the feature is not present */
++	if (!boot_cpu_has(X86_FEATURE_PCID))
++		return 1;
++
++	setup_clear_cpu_cap(X86_FEATURE_PCID);
++	pr_info("nopcid: PCID feature disabled\n");
++	return 1;
++}
++__setup("nopcid", x86_pcid_setup);
++#endif
++
+ static int __init x86_noinvpcid_setup(char *s)
+ {
+ 	/* noinvpcid doesn't accept parameters */
diff --git a/queue/x86-mm-disable-pcid-on-32-bit-kernels.patch b/queue/x86-mm-disable-pcid-on-32-bit-kernels.patch
new file mode 100644
index 0000000..58abb0a
--- /dev/null
+++ b/queue/x86-mm-disable-pcid-on-32-bit-kernels.patch
@@ -0,0 +1,78 @@
+From cba4671af7550e008f7a7835f06df0763825bf3e Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:19 -0700
+Subject: x86/mm: Disable PCID on 32-bit kernels
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit cba4671af7550e008f7a7835f06df0763825bf3e upstream.
+
+32-bit kernels on new hardware will see PCID in CPUID, but PCID can
+only be used in 64-bit mode.  Rather than making all PCID code
+conditional, just disable the feature on 32-bit builds.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/2e391769192a4d31b808410c383c6bf0734bc6ea.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/disabled-features.h |    4 +++-
+ arch/x86/kernel/cpu/bugs.c               |    8 ++++++++
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -21,11 +21,13 @@
+ # define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
+ # define DISABLE_CYRIX_ARR	(1<<(X86_FEATURE_CYRIX_ARR & 31))
+ # define DISABLE_CENTAUR_MCR	(1<<(X86_FEATURE_CENTAUR_MCR & 31))
++# define DISABLE_PCID		0
+ #else
+ # define DISABLE_VME		0
+ # define DISABLE_K6_MTRR	0
+ # define DISABLE_CYRIX_ARR	0
+ # define DISABLE_CENTAUR_MCR	0
++# define DISABLE_PCID		(1<<(X86_FEATURE_PCID & 31))
+ #endif /* CONFIG_X86_64 */
+ 
+ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+@@ -43,7 +45,7 @@
+ #define DISABLED_MASK1	0
+ #define DISABLED_MASK2	0
+ #define DISABLED_MASK3	(DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
+-#define DISABLED_MASK4	0
++#define DISABLED_MASK4	(DISABLE_PCID)
+ #define DISABLED_MASK5	0
+ #define DISABLED_MASK6	0
+ #define DISABLED_MASK7	0
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -19,6 +19,14 @@
+ 
+ void __init check_bugs(void)
+ {
++#ifdef CONFIG_X86_32
++	/*
++	 * Regardless of whether PCID is enumerated, the SDM says
++	 * that it can't be enabled in 32-bit mode.
++	 */
++	setup_clear_cpu_cap(X86_FEATURE_PCID);
++#endif
++
+ 	identify_boot_cpu();
+ #ifndef CONFIG_SMP
+ 	pr_info("CPU: ");
diff --git a/queue/x86-mm-enable-cr4.pcide-on-supported-systems.patch b/queue/x86-mm-enable-cr4.pcide-on-supported-systems.patch
new file mode 100644
index 0000000..8722495
--- /dev/null
+++ b/queue/x86-mm-enable-cr4.pcide-on-supported-systems.patch
@@ -0,0 +1,108 @@
+From 660da7c9228f685b2ebe664f9fd69aaddcc420b5 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:21 -0700
+Subject: x86/mm: Enable CR4.PCIDE on supported systems
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5 upstream.
+
+We can use PCID if the CPU has PCID and PGE and we're not on Xen.
+
+By itself, this has no effect. A followup patch will start using PCID.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h |    8 ++++++++
+ arch/x86/kernel/cpu/common.c    |   22 ++++++++++++++++++++++
+ arch/x86/xen/enlighten.c        |    6 ++++++
+ 3 files changed, 36 insertions(+)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -191,6 +191,14 @@ static inline void __flush_tlb_all(void)
+ 		__flush_tlb_global();
+ 	else
+ 		__flush_tlb();
++
++	/*
++	 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
++	 * we'd end up flushing kernel translations for the current ASID but
++	 * we might fail to flush kernel translations for other cached ASIDs.
++	 *
++	 * To avoid this issue, we force PCID off if PGE is off.
++	 */
+ }
+ 
+ static inline void __flush_tlb_one(unsigned long addr)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -324,6 +324,25 @@ static __always_inline void setup_smap(s
+ 	}
+ }
+ 
++static void setup_pcid(struct cpuinfo_x86 *c)
++{
++	if (cpu_has(c, X86_FEATURE_PCID)) {
++		if (cpu_has(c, X86_FEATURE_PGE)) {
++			cr4_set_bits(X86_CR4_PCIDE);
++		} else {
++			/*
++			 * flush_tlb_all(), as currently implemented, won't
++			 * work if PCID is on but PGE is not.  Since that
++			 * combination doesn't exist on real hardware, there's
++			 * no reason to try to fully support it, but it's
++			 * polite to avoid corrupting data if we're on
++			 * an improperly configured VM.
++			 */
++			clear_cpu_cap(c, X86_FEATURE_PCID);
++		}
++	}
++}
++
+ /*
+  * Protection Keys are not available in 32-bit mode.
+  */
+@@ -1082,6 +1101,9 @@ static void identify_cpu(struct cpuinfo_
+ 	setup_smep(c);
+ 	setup_smap(c);
+ 
++	/* Set up PCID */
++	setup_pcid(c);
++
+ 	/*
+ 	 * The vendor-specific functions might have changed features.
+ 	 * Now we do "generic changes."
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -444,6 +444,12 @@ static void __init xen_init_cpuid_mask(v
+ 		~((1 << X86_FEATURE_MTRR) |  /* disable MTRR */
+ 		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+ 
++	/*
++	 * Xen PV would need some work to support PCID: CR3 handling as well
++	 * as xen_flush_tlb_others() would need updating.
++	 */
++	cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_PCID % 32));  /* disable PCID */
++
+ 	if (!xen_initial_domain())
+ 		cpuid_leaf1_edx_mask &=
+ 			~((1 << X86_FEATURE_ACPI));  /* disable ACPI */
diff --git a/queue/x86-mm-make-flush_tlb_mm_range-more-predictable.patch b/queue/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
new file mode 100644
index 0000000..4f7f58e
--- /dev/null
+++ b/queue/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
@@ -0,0 +1,81 @@
+From ce27374fabf553153c3f53efcaa9bfab9216bd8c Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 22 Apr 2017 00:01:21 -0700
+Subject: x86/mm: Make flush_tlb_mm_range() more predictable
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit ce27374fabf553153c3f53efcaa9bfab9216bd8c upstream.
+
+I'm about to rewrite the function almost completely, but first I
+want to get a functional change out of the way.  Currently, if
+flush_tlb_mm_range() does not flush the local TLB at all, it will
+never do individual page flushes on remote CPUs.  This seems to be
+an accident, and preserving it will be awkward.  Let's change it
+first so that any regressions in the rewrite will be easier to
+bisect and so that the rewrite can attempt to change no visible
+behavior at all.
+
+The fix is simple: we can simply avoid short-circuiting the
+calculation of base_pages_to_flush.
+
+As a side effect, this also eliminates a potential corner case: if
+tlb_single_page_flush_ceiling == TLB_FLUSH_ALL, flush_tlb_mm_range()
+could have ended up flushing the entire address space one page at a
+time.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/4b29b771d9975aad7154c314534fec235618175a.1492844372.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/tlb.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -307,6 +307,12 @@ void flush_tlb_mm_range(struct mm_struct
+ 	unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
+ 
+ 	preempt_disable();
++
++	if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
++		base_pages_to_flush = (end - start) >> PAGE_SHIFT;
++	if (base_pages_to_flush > tlb_single_page_flush_ceiling)
++		base_pages_to_flush = TLB_FLUSH_ALL;
++
+ 	if (current->active_mm != mm) {
+ 		/* Synchronize with switch_mm. */
+ 		smp_mb();
+@@ -323,15 +329,11 @@ void flush_tlb_mm_range(struct mm_struct
+ 		goto out;
+ 	}
+ 
+-	if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
+-		base_pages_to_flush = (end - start) >> PAGE_SHIFT;
+-
+ 	/*
+ 	 * Both branches below are implicit full barriers (MOV to CR or
+ 	 * INVLPG) that synchronize with switch_mm.
+ 	 */
+-	if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
+-		base_pages_to_flush = TLB_FLUSH_ALL;
++	if (base_pages_to_flush == TLB_FLUSH_ALL) {
+ 		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ 		local_flush_tlb();
+ 	} else {
diff --git a/queue/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch b/queue/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
new file mode 100644
index 0000000..2153f10
--- /dev/null
+++ b/queue/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
@@ -0,0 +1,105 @@
+From d698c90a07e8c70354dad23e61434edf7de2c91c Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 22 May 2017 15:30:01 -0700
+Subject: [PATCH] x86/mm: Reimplement flush_tlb_page() using
+ flush_tlb_mm_range()
+
+commit ca6c99c0794875c6d1db6e22f246699691ab7e6b upstream.
+
+flush_tlb_page() was very similar to flush_tlb_mm_range() except that
+it had a couple of issues:
+
+ - It was missing an smp_mb() in the case where
+   current->active_mm != mm.  (This is a longstanding bug reported by Nadav Amit)
+
+ - It was missing tracepoints and vm counter updates.
+
+The only reason that I can see for keeping it at as a separate
+function is that it could avoid a few branches that
+flush_tlb_mm_range() needs to decide to flush just one page.  This
+hardly seems worthwhile.  If we decide we want to get rid of those
+branches again, a better way would be to introduce an
+__flush_tlb_mm_range() helper and make both flush_tlb_page() and
+flush_tlb_mm_range() use it.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/3cc3847cf888d8907577569b8bac3f01992ef8f9.1495492063.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 8e7ae9e6c59a..abcd615ea27e 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -297,11 +297,15 @@ static inline void flush_tlb_kernel_range(unsigned long start,
+ 		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+ 
+ extern void flush_tlb_all(void);
+-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 				unsigned long end, unsigned long vmflag);
+ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+ 
++static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
++{
++	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
++}
++
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+ 				struct mm_struct *mm,
+ 				unsigned long start, unsigned long end);
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 9db9260a5e9f..38f6e37959af 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -356,33 +356,6 @@ out:
+ 	preempt_enable();
+ }
+ 
+-void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
+-{
+-	struct mm_struct *mm = vma->vm_mm;
+-
+-	preempt_disable();
+-
+-	if (current->active_mm == mm) {
+-		if (current->mm) {
+-			/*
+-			 * Implicit full barrier (INVLPG) that synchronizes
+-			 * with switch_mm.
+-			 */
+-			__flush_tlb_one(start);
+-		} else {
+-			leave_mm(smp_processor_id());
+-
+-			/* Synchronize with switch_mm. */
+-			smp_mb();
+-		}
+-	}
+-
+-	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+-		flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
+-
+-	preempt_enable();
+-}
+-
+ static void do_flush_tlb_all(void *info)
+ {
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+-- 
+2.15.0
+
diff --git a/queue/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch b/queue/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
new file mode 100644
index 0000000..1f63ed5
--- /dev/null
+++ b/queue/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
@@ -0,0 +1,101 @@
+From 29961b59a51f8c6838a26a45e871a7ed6771809b Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 22 Apr 2017 00:01:20 -0700
+Subject: x86/mm: Remove flush_tlb() and flush_tlb_current_task()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 29961b59a51f8c6838a26a45e871a7ed6771809b upstream.
+
+I was trying to figure out what how flush_tlb_current_task() would
+possibly work correctly if current->mm != current->active_mm, but I
+realized I could spare myself the effort: it has no callers except
+the unused flush_tlb() macro.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/e52d64c11690f85e9f1d69d7b48cc2269cd2e94b.1492844372.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h |    9 ---------
+ arch/x86/mm/tlb.c               |   17 -----------------
+ 2 files changed, 26 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -205,7 +205,6 @@ static inline void __flush_tlb_one(unsig
+ /*
+  * TLB flushing:
+  *
+- *  - flush_tlb() flushes the current mm struct TLBs
+  *  - flush_tlb_all() flushes all processes TLBs
+  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+  *  - flush_tlb_page(vma, vmaddr) flushes one page
+@@ -237,11 +236,6 @@ static inline void flush_tlb_all(void)
+ 	__flush_tlb_all();
+ }
+ 
+-static inline void flush_tlb(void)
+-{
+-	__flush_tlb_up();
+-}
+-
+ static inline void local_flush_tlb(void)
+ {
+ 	__flush_tlb_up();
+@@ -303,14 +297,11 @@ static inline void flush_tlb_kernel_rang
+ 		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+ 
+ extern void flush_tlb_all(void);
+-extern void flush_tlb_current_task(void);
+ extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 				unsigned long end, unsigned long vmflag);
+ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+ 
+-#define flush_tlb()	flush_tlb_current_task()
+-
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+ 				struct mm_struct *mm,
+ 				unsigned long start, unsigned long end);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -287,23 +287,6 @@ void native_flush_tlb_others(const struc
+ 	smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
+ }
+ 
+-void flush_tlb_current_task(void)
+-{
+-	struct mm_struct *mm = current->mm;
+-
+-	preempt_disable();
+-
+-	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-
+-	/* This is an implicit full barrier that synchronizes with switch_mm. */
+-	local_flush_tlb();
+-
+-	trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
+-	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+-		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
+-	preempt_enable();
+-}
+-
+ /*
+  * See Documentation/x86/tlb.txt for details.  We choose 33
+  * because it is large enough to cover the vast majority (at
diff --git a/queue/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch b/queue/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
new file mode 100644
index 0000000..649fa38
--- /dev/null
+++ b/queue/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
@@ -0,0 +1,305 @@
+From fd56dcc62b454fbbc7d9d6822b55953e5e945976 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:14 -0700
+Subject: [PATCH] x86/mm: Remove the UP asm/tlbflush.h code, always use the
+ (formerly) SMP code
+
+commit ce4a4e565f5264909a18c733b864c3f74467f69e upstream.
+
+The UP asm/tlbflush.h generates somewhat nicer code than the SMP version.
+Aside from that, it's fallen quite a bit behind the SMP code:
+
+ - flush_tlb_mm_range() didn't flush individual pages if the range
+   was small.
+
+ - The lazy TLB code was much weaker.  This usually wouldn't matter,
+   but, if a kernel thread flushed its lazy "active_mm" more than
+   once (due to reclaim or similar), it wouldn't be unlazied and
+   would instead pointlessly flush repeatedly.
+
+ - Tracepoints were missing.
+
+Aside from that, simply having the UP code around was a maintanence
+burden, since it means that any change to the TLB flush code had to
+make sure not to break it.
+
+Simplify everything by deleting the UP code.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 951dc26b1a5e..63e83fe8987c 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -45,7 +45,7 @@ config X86
+ 	select ARCH_USE_CMPXCHG_LOCKREF		if X86_64
+ 	select ARCH_USE_QUEUED_RWLOCKS
+ 	select ARCH_USE_QUEUED_SPINLOCKS
+-	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
++	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+ 	select ARCH_WANT_FRAME_POINTERS
+ 	select ARCH_WANT_IPC_PARSE_VERSION	if X86_32
+diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
+index 59405a248fc2..9b76cd331990 100644
+--- a/arch/x86/include/asm/hardirq.h
++++ b/arch/x86/include/asm/hardirq.h
+@@ -22,8 +22,8 @@ typedef struct {
+ #ifdef CONFIG_SMP
+ 	unsigned int irq_resched_count;
+ 	unsigned int irq_call_count;
+-	unsigned int irq_tlb_count;
+ #endif
++	unsigned int irq_tlb_count;
+ #ifdef CONFIG_X86_THERMAL_VECTOR
+ 	unsigned int irq_thermal_count;
+ #endif
+diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
+index 1ea0baef1175..6deccb456060 100644
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -25,12 +25,6 @@ typedef struct {
+ 	atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */
+ } mm_context_t;
+ 
+-#ifdef CONFIG_SMP
+ void leave_mm(int cpu);
+-#else
+-static inline void leave_mm(int cpu)
+-{
+-}
+-#endif
+ 
+ #endif /* _ASM_X86_MMU_H */
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index d8abfcf524d1..d15f740111c9 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -98,10 +98,8 @@ static inline void load_mm_ldt(struct mm_struct *mm)
+ 
+ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ {
+-#ifdef CONFIG_SMP
+ 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+ 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+-#endif
+ }
+ 
+ static inline int init_new_context(struct task_struct *tsk,
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index abcd615ea27e..12dedd6c9e42 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -7,6 +7,7 @@
+ #include <asm/processor.h>
+ #include <asm/cpufeature.h>
+ #include <asm/special_insns.h>
++#include <asm/smp.h>
+ 
+ static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ 			     unsigned long type)
+@@ -65,10 +66,8 @@ static inline void invpcid_flush_all_nonglobals(void)
+ #endif
+ 
+ struct tlb_state {
+-#ifdef CONFIG_SMP
+ 	struct mm_struct *active_mm;
+ 	int state;
+-#endif
+ 
+ 	/*
+ 	 * Access to this CR4 shadow and to H/W CR4 is protected by
+@@ -216,79 +215,6 @@ static inline void __flush_tlb_one(unsigned long addr)
+  * and page-granular flushes are available only on i486 and up.
+  */
+ 
+-#ifndef CONFIG_SMP
+-
+-/* "_up" is for UniProcessor.
+- *
+- * This is a helper for other header functions.  *Not* intended to be called
+- * directly.  All global TLB flushes need to either call this, or to bump the
+- * vm statistics themselves.
+- */
+-static inline void __flush_tlb_up(void)
+-{
+-	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-	__flush_tlb();
+-}
+-
+-static inline void flush_tlb_all(void)
+-{
+-	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-	__flush_tlb_all();
+-}
+-
+-static inline void local_flush_tlb(void)
+-{
+-	__flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_mm(struct mm_struct *mm)
+-{
+-	if (mm == current->active_mm)
+-		__flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_page(struct vm_area_struct *vma,
+-				  unsigned long addr)
+-{
+-	if (vma->vm_mm == current->active_mm)
+-		__flush_tlb_one(addr);
+-}
+-
+-static inline void flush_tlb_range(struct vm_area_struct *vma,
+-				   unsigned long start, unsigned long end)
+-{
+-	if (vma->vm_mm == current->active_mm)
+-		__flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_mm_range(struct mm_struct *mm,
+-	   unsigned long start, unsigned long end, unsigned long vmflag)
+-{
+-	if (mm == current->active_mm)
+-		__flush_tlb_up();
+-}
+-
+-static inline void native_flush_tlb_others(const struct cpumask *cpumask,
+-					   struct mm_struct *mm,
+-					   unsigned long start,
+-					   unsigned long end)
+-{
+-}
+-
+-static inline void reset_lazy_tlbstate(void)
+-{
+-}
+-
+-static inline void flush_tlb_kernel_range(unsigned long start,
+-					  unsigned long end)
+-{
+-	flush_tlb_all();
+-}
+-
+-#else  /* SMP */
+-
+-#include <asm/smp.h>
+-
+ #define local_flush_tlb() __flush_tlb()
+ 
+ #define flush_tlb_mm(mm)	flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
+@@ -319,8 +245,6 @@ static inline void reset_lazy_tlbstate(void)
+ 	this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
+ }
+ 
+-#endif	/* SMP */
+-
+ #ifndef CONFIG_PARAVIRT
+ #define flush_tlb_others(mask, mm, start, end)	\
+ 	native_flush_tlb_others(mask, mm, start, end)
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 8a427715f541..0381e949de17 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -762,10 +762,8 @@ void __init zone_sizes_init(void)
+ }
+ 
+ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+-#ifdef CONFIG_SMP
+ 	.active_mm = &init_mm,
+ 	.state = 0,
+-#endif
+ 	.cr4 = ~0UL,	/* fail hard if we screw up cr4 shadow initialization */
+ };
+ EXPORT_SYMBOL_GPL(cpu_tlbstate);
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 38f6e37959af..7882e4e3c113 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -15,7 +15,7 @@
+ #include <linux/debugfs.h>
+ 
+ /*
+- *	Smarter SMP flushing macros.
++ *	TLB flushing, formerly SMP-only
+  *		c/o Linus Torvalds.
+  *
+  *	These mean you can really definitely utterly forget about
+@@ -28,8 +28,6 @@
+  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+  */
+ 
+-#ifdef CONFIG_SMP
+-
+ struct flush_tlb_info {
+ 	struct mm_struct *flush_mm;
+ 	unsigned long flush_start;
+@@ -59,8 +57,6 @@ void leave_mm(int cpu)
+ }
+ EXPORT_SYMBOL_GPL(leave_mm);
+ 
+-#endif /* CONFIG_SMP */
+-
+ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ 	       struct task_struct *tsk)
+ {
+@@ -91,10 +87,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ 				set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+ 		}
+ 
+-#ifdef CONFIG_SMP
+ 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ 		this_cpu_write(cpu_tlbstate.active_mm, next);
+-#endif
+ 
+ 		cpumask_set_cpu(cpu, mm_cpumask(next));
+ 
+@@ -152,9 +146,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ 		if (unlikely(prev->context.ldt != next->context.ldt))
+ 			load_mm_ldt(next);
+ #endif
+-	}
+-#ifdef CONFIG_SMP
+-	  else {
++	} else {
+ 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ 		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
+ 
+@@ -181,11 +173,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ 			load_mm_ldt(next);
+ 		}
+ 	}
+-#endif
+ }
+ 
+-#ifdef CONFIG_SMP
+-
+ /*
+  * The flush IPI assumes that a thread switch happens in this order:
+  * [cpu0: the cpu that switches]
+@@ -440,5 +429,3 @@ static int __init create_tlb_single_page_flush_ceiling(void)
+ 	return 0;
+ }
+ late_initcall(create_tlb_single_page_flush_ceiling);
+-
+-#endif /* CONFIG_SMP */
+-- 
+2.15.0
+
diff --git a/queue/x86-msr-add-definitions-for-new-speculation-control-msrs.patch b/queue/x86-msr-add-definitions-for-new-speculation-control-msrs.patch
new file mode 100644
index 0000000..884d448
--- /dev/null
+++ b/queue/x86-msr-add-definitions-for-new-speculation-control-msrs.patch
@@ -0,0 +1,63 @@
+From foo@baz Wed Feb  7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:12 +0000
+Subject: x86/msr: Add definitions for new speculation control MSRs
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit 1e340c60d0dd3ae07b5bedc16a0469c14b9f3410)
+
+Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES.
+
+See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-5-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -37,6 +37,13 @@
+ #define EFER_FFXSR		(1<<_EFER_FFXSR)
+ 
+ /* Intel MSRs. Some also available on other CPUs */
++#define MSR_IA32_SPEC_CTRL		0x00000048 /* Speculation Control */
++#define SPEC_CTRL_IBRS			(1 << 0)   /* Indirect Branch Restricted Speculation */
++#define SPEC_CTRL_STIBP			(1 << 1)   /* Single Thread Indirect Branch Predictors */
++
++#define MSR_IA32_PRED_CMD		0x00000049 /* Prediction Command */
++#define PRED_CMD_IBPB			(1 << 0)   /* Indirect Branch Prediction Barrier */
++
+ #define MSR_IA32_PERFCTR0		0x000000c1
+ #define MSR_IA32_PERFCTR1		0x000000c2
+ #define MSR_FSB_FREQ			0x000000cd
+@@ -50,6 +57,11 @@
+ #define SNB_C3_AUTO_UNDEMOTE		(1UL << 28)
+ 
+ #define MSR_MTRRcap			0x000000fe
++
++#define MSR_IA32_ARCH_CAPABILITIES	0x0000010a
++#define ARCH_CAP_RDCL_NO		(1 << 0)   /* Not susceptible to Meltdown */
++#define ARCH_CAP_IBRS_ALL		(1 << 1)   /* Enhanced IBRS support */
++
+ #define MSR_IA32_BBL_CR_CTL		0x00000119
+ #define MSR_IA32_BBL_CR_CTL3		0x0000011e
+ 
diff --git a/queue/x86-nospec-fix-header-guards-names.patch b/queue/x86-nospec-fix-header-guards-names.patch
new file mode 100644
index 0000000..a9bcc71
--- /dev/null
+++ b/queue/x86-nospec-fix-header-guards-names.patch
@@ -0,0 +1,53 @@
+From foo@baz Thu Feb  8 03:30:27 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 26 Jan 2018 13:11:37 +0100
+Subject: x86/nospec: Fix header guards names
+
+From: Borislav Petkov <bp@suse.de>
+
+(cherry picked from commit 7a32fc51ca938e67974cbb9db31e1a43f98345a9)
+
+... to adhere to the _ASM_X86_ naming scheme.
+
+No functional change.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: riel@redhat.com
+Cc: ak@linux.intel.com
+Cc: peterz@infradead.org
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: jikos@kernel.org
+Cc: luto@amacapital.net
+Cc: dave.hansen@intel.com
+Cc: torvalds@linux-foundation.org
+Cc: keescook@google.com
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Cc: pjt@google.com
+Link: https://lkml.kernel.org/r/20180126121139.31959-3-bp@alien8.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -1,7 +1,7 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+ 
+-#ifndef __NOSPEC_BRANCH_H__
+-#define __NOSPEC_BRANCH_H__
++#ifndef _ASM_X86_NOSPEC_BRANCH_H_
++#define _ASM_X86_NOSPEC_BRANCH_H_
+ 
+ #include <asm/alternative.h>
+ #include <asm/alternative-asm.h>
+@@ -232,4 +232,4 @@ static inline void indirect_branch_predi
+ }
+ 
+ #endif /* __ASSEMBLY__ */
+-#endif /* __NOSPEC_BRANCH_H__ */
++#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/queue/x86-paravirt-dont-patch-flush_tlb_single.patch b/queue/x86-paravirt-dont-patch-flush_tlb_single.patch
new file mode 100644
index 0000000..4783222
--- /dev/null
+++ b/queue/x86-paravirt-dont-patch-flush_tlb_single.patch
@@ -0,0 +1,68 @@
+From 7f6999b379b7f1c378345e436be46df760668145 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:30 +0100
+Subject: [PATCH] x86/paravirt: Dont patch flush_tlb_single
+
+commit a035795499ca1c2bd1928808d1a156eda1420383 upstream
+
+native_flush_tlb_single() will be changed with the upcoming
+PAGE_TABLE_ISOLATION feature. This requires to have more code in
+there than INVLPG.
+
+Remove the paravirt patching for it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Cc: michael.schwarz@iaik.tugraz.at
+Cc: moritz.lipp@iaik.tugraz.at
+Cc: richard.fellner@student.tugraz.at
+Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
+index e70087a04cc8..68f8273b152e 100644
+--- a/arch/x86/kernel/paravirt_patch_64.c
++++ b/arch/x86/kernel/paravirt_patch_64.c
+@@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
+ DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
+ DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
+ DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
+-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
+ DEF_NATIVE(pv_cpu_ops, clts, "clts");
+ DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
+ 
+@@ -59,7 +58,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+ 		PATCH_SITE(pv_mmu_ops, read_cr3);
+ 		PATCH_SITE(pv_mmu_ops, write_cr3);
+ 		PATCH_SITE(pv_cpu_ops, clts);
+-		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
+ 		PATCH_SITE(pv_cpu_ops, wbinvd);
+ #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+ 		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+-- 
+2.15.0
+
diff --git a/queue/x86-paravirt-remove-noreplace-paravirt-cmdline-option.patch b/queue/x86-paravirt-remove-noreplace-paravirt-cmdline-option.patch
new file mode 100644
index 0000000..a670a21
--- /dev/null
+++ b/queue/x86-paravirt-remove-noreplace-paravirt-cmdline-option.patch
@@ -0,0 +1,91 @@
+From foo@baz Thu Feb  8 03:32:24 CET 2018
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Tue, 30 Jan 2018 22:13:33 -0600
+Subject: x86/paravirt: Remove 'noreplace-paravirt' cmdline option
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+
+(cherry picked from commit 12c69f1e94c89d40696e83804dd2f0965b5250cd)
+
+The 'noreplace-paravirt' option disables paravirt patching, leaving the
+original pv indirect calls in place.
+
+That's highly incompatible with retpolines, unless we want to uglify
+paravirt even further and convert the paravirt calls to retpolines.
+
+As far as I can tell, the option doesn't seem to be useful for much
+other than introducing surprising corner cases and making the kernel
+vulnerable to Spectre v2.  It was probably a debug option from the early
+paravirt days.  So just remove it.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Rusty Russell <rusty@rustcorp.com.au>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Alok Kataria <akataria@vmware.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Link: https://lkml.kernel.org/r/20180131041333.2x6blhxirc2kclrq@treble
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt |    2 --
+ arch/x86/kernel/alternative.c       |   14 --------------
+ 2 files changed, 16 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2805,8 +2805,6 @@ bytes respectively. Such letter suffixes
+ 	norandmaps	Don't use address space randomization.  Equivalent to
+ 			echo 0 > /proc/sys/kernel/randomize_va_space
+ 
+-	noreplace-paravirt	[X86,IA-64,PV_OPS] Don't patch paravirt_ops
+-
+ 	noreplace-smp	[X86-32,SMP] Don't replace SMP instructions
+ 			with UP alternatives
+ 
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(ch
+ }
+ __setup("noreplace-smp", setup_noreplace_smp);
+ 
+-#ifdef CONFIG_PARAVIRT
+-static int __initdata_or_module noreplace_paravirt = 0;
+-
+-static int __init setup_noreplace_paravirt(char *str)
+-{
+-	noreplace_paravirt = 1;
+-	return 1;
+-}
+-__setup("noreplace-paravirt", setup_noreplace_paravirt);
+-#endif
+-
+ #define DPRINTK(fmt, args...)						\
+ do {									\
+ 	if (debug_alternative)						\
+@@ -588,9 +577,6 @@ void __init_or_module apply_paravirt(str
+ 	struct paravirt_patch_site *p;
+ 	char insnbuf[MAX_PATCH_LEN];
+ 
+-	if (noreplace_paravirt)
+-		return;
+-
+ 	for (p = start; p < end; p++) {
+ 		unsigned int used;
+ 
diff --git a/queue/x86-pti-do-not-enable-pti-on-cpus-which-are-not-vulnerable-to-meltdown.patch b/queue/x86-pti-do-not-enable-pti-on-cpus-which-are-not-vulnerable-to-meltdown.patch
new file mode 100644
index 0000000..ffbcbc7
--- /dev/null
+++ b/queue/x86-pti-do-not-enable-pti-on-cpus-which-are-not-vulnerable-to-meltdown.patch
@@ -0,0 +1,112 @@
+From foo@baz Wed Feb  7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:13 +0000
+Subject: x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit fec9434a12f38d3aeafeb75711b71d8a1fdef621)
+
+Also, for CPUs which don't speculate at all, don't report that they're
+vulnerable to the Spectre variants either.
+
+Leave the cpu_no_meltdown[] match table with just X86_VENDOR_AMD in it
+for now, even though that could be done with a simple comparison, on the
+assumption that we'll have more to add.
+
+Based on suggestions from Dave Hansen and Alan Cox.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-6-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |   48 ++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 43 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -44,6 +44,8 @@
+ #include <asm/pat.h>
+ #include <asm/microcode.h>
+ #include <asm/microcode_intel.h>
++#include <asm/intel-family.h>
++#include <asm/cpu_device_id.h>
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+ #include <asm/uv/uv.h>
+@@ -838,6 +840,41 @@ static void identify_cpu_without_cpuid(s
+ #endif
+ }
+ 
++static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
++	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CEDARVIEW,	X86_FEATURE_ANY },
++	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CLOVERVIEW,	X86_FEATURE_ANY },
++	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_LINCROFT,	X86_FEATURE_ANY },
++	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_PENWELL,	X86_FEATURE_ANY },
++	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_PINEVIEW,	X86_FEATURE_ANY },
++	{ X86_VENDOR_CENTAUR,	5 },
++	{ X86_VENDOR_INTEL,	5 },
++	{ X86_VENDOR_NSC,	5 },
++	{ X86_VENDOR_ANY,	4 },
++	{}
++};
++
++static const __initdata struct x86_cpu_id cpu_no_meltdown[] = {
++	{ X86_VENDOR_AMD },
++	{}
++};
++
++static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
++{
++	u64 ia32_cap = 0;
++
++	if (x86_match_cpu(cpu_no_meltdown))
++		return false;
++
++	if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
++		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
++
++	/* Rogue Data Cache Load? No! */
++	if (ia32_cap & ARCH_CAP_RDCL_NO)
++		return false;
++
++	return true;
++}
++
+ /*
+  * Do minimum CPU detection early.
+  * Fields really needed: vendor, cpuid_level, family, model, mask,
+@@ -884,11 +921,12 @@ static void __init early_identify_cpu(st
+ 
+ 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+ 
+-	if (c->x86_vendor != X86_VENDOR_AMD)
+-		setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+-
+-	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+-	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++	if (!x86_match_cpu(cpu_no_speculation)) {
++		if (cpu_vulnerable_to_meltdown(c))
++			setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
++		setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
++		setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++	}
+ 
+ 	fpu__init_system(c);
+ 
diff --git a/queue/x86-pti-document-fix-wrong-index.patch b/queue/x86-pti-document-fix-wrong-index.patch
new file mode 100644
index 0000000..cb7b93a
--- /dev/null
+++ b/queue/x86-pti-document-fix-wrong-index.patch
@@ -0,0 +1,32 @@
+From 98f0fceec7f84d80bc053e49e596088573086421 Mon Sep 17 00:00:00 2001
+From: "zhenwei.pi" <zhenwei.pi@youruncloud.com>
+Date: Thu, 18 Jan 2018 09:04:52 +0800
+Subject: x86/pti: Document fix wrong index
+
+From: zhenwei.pi <zhenwei.pi@youruncloud.com>
+
+commit 98f0fceec7f84d80bc053e49e596088573086421 upstream.
+
+In section <2. Runtime Cost>, fix wrong index.
+
+Signed-off-by: zhenwei.pi <zhenwei.pi@youruncloud.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: dave.hansen@linux.intel.com
+Link: https://lkml.kernel.org/r/1516237492-27739-1-git-send-email-zhenwei.pi@youruncloud.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/x86/pti.txt |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/Documentation/x86/pti.txt
++++ b/Documentation/x86/pti.txt
+@@ -78,7 +78,7 @@ this protection comes at a cost:
+      non-PTI SYSCALL entry code, so requires mapping fewer
+      things into the userspace page tables.  The downside is
+      that stacks must be switched at entry time.
+-  d. Global pages are disabled for all kernel structures not
++  c. Global pages are disabled for all kernel structures not
+      mapped into both kernel and userspace page tables.  This
+      feature of the MMU allows different processes to share TLB
+      entries mapping the kernel.  Losing the feature means more
diff --git a/queue/x86-pti-efi-broken-conversion-from-efi-to-kernel-page-table.patch b/queue/x86-pti-efi-broken-conversion-from-efi-to-kernel-page-table.patch
new file mode 100644
index 0000000..b38321d
--- /dev/null
+++ b/queue/x86-pti-efi-broken-conversion-from-efi-to-kernel-page-table.patch
@@ -0,0 +1,76 @@
+From pasha.tatashin@oracle.com  Mon Jan 15 18:48:49 2018
+From: Pavel Tatashin <pasha.tatashin@oracle.com>
+Date: Mon, 15 Jan 2018 11:44:14 -0500
+Subject: x86/pti/efi: broken conversion from efi to kernel page table
+To: steven.sistare@oracle.com, linux-kernel@vger.kernel.org, tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com, x86@kernel.org, gregkh@linuxfoundation.org, jkosina@suse.cz, hughd@google.com, dave.hansen@linux.intel.com, luto@kernel.org, stable@vger.kernel.org
+Message-ID: <20180115164414.19778-1-pasha.tatashin@oracle.com>
+
+From: Pavel Tatashin <pasha.tatashin@oracle.com>
+
+The page table order must be increased for EFI table in order to avoid a
+bug where NMI tries to change the page table to kernel page table, while
+efi page table is active.
+
+For more disccussion about this bug, see this thread:
+http://lkml.iu.edu/hypermail/linux/kernel/1801.1/00951.html
+
+Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
+Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgalloc.h |   11 +++++++++++
+ arch/x86/mm/pgtable.c          |    7 -------
+ arch/x86/platform/efi/efi_64.c |    2 +-
+ 3 files changed, 12 insertions(+), 8 deletions(-)
+
+Changelog:
+	v1 - v2: Fixed compiling warning
+
+--- a/arch/x86/include/asm/pgalloc.h
++++ b/arch/x86/include/asm/pgalloc.h
+@@ -27,6 +27,17 @@ static inline void paravirt_release_pud(
+  */
+ extern gfp_t __userpte_alloc_gfp;
+ 
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++/*
++ * Instead of one PGD, we acquire two PGDs.  Being order-1, it is
++ * both 8k in size and 8k-aligned.  That lets us just flip bit 12
++ * in a pointer to swap between the two 4k halves.
++ */
++#define PGD_ALLOCATION_ORDER 1
++#else
++#define PGD_ALLOCATION_ORDER 0
++#endif
++
+ /*
+  * Allocate and free page tables.
+  */
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -345,13 +345,6 @@ static inline void _pgd_free(pgd_t *pgd)
+ }
+ #else
+ 
+-/*
+- * Instead of one pgd, Kaiser acquires two pgds.  Being order-1, it is
+- * both 8k in size and 8k-aligned.  That lets us just flip bit 12
+- * in a pointer to swap between the two 4k halves.
+- */
+-#define PGD_ALLOCATION_ORDER	kaiser_enabled
+-
+ static inline pgd_t *_pgd_alloc(void)
+ {
+ 	return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -142,7 +142,7 @@ int __init efi_alloc_page_tables(void)
+ 		return 0;
+ 
+ 	gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
+-	efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
++	efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
+ 	if (!efi_pgd)
+ 		return -ENOMEM;
+ 
diff --git a/queue/x86-pti-mark-constant-arrays-as-__initconst.patch b/queue/x86-pti-mark-constant-arrays-as-__initconst.patch
new file mode 100644
index 0000000..d9843e5
--- /dev/null
+++ b/queue/x86-pti-mark-constant-arrays-as-__initconst.patch
@@ -0,0 +1,53 @@
+From foo@baz Thu Feb  8 03:33:09 CET 2018
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Fri, 2 Feb 2018 22:39:23 +0100
+Subject: x86/pti: Mark constant arrays as __initconst
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+
+(cherry picked from commit 4bf5d56d429cbc96c23d809a08f63cd29e1a702e)
+
+I'm seeing build failures from the two newly introduced arrays that
+are marked 'const' and '__initdata', which are mutually exclusive:
+
+arch/x86/kernel/cpu/common.c:882:43: error: 'cpu_no_speculation' causes a section type conflict with 'e820_table_firmware_init'
+arch/x86/kernel/cpu/common.c:895:43: error: 'cpu_no_meltdown' causes a section type conflict with 'e820_table_firmware_init'
+
+The correct annotation is __initconst.
+
+Fixes: fec9434a12f3 ("x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Thomas Garnier <thgarnie@google.com>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Link: https://lkml.kernel.org/r/20180202213959.611210-1-arnd@arndb.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -861,7 +861,7 @@ static void identify_cpu_without_cpuid(s
+ #endif
+ }
+ 
+-static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
++static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
+ 	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CEDARVIEW,	X86_FEATURE_ANY },
+ 	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CLOVERVIEW,	X86_FEATURE_ANY },
+ 	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_LINCROFT,	X86_FEATURE_ANY },
+@@ -874,7 +874,7 @@ static const __initdata struct x86_cpu_i
+ 	{}
+ };
+ 
+-static const __initdata struct x86_cpu_id cpu_no_meltdown[] = {
++static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
+ 	{ X86_VENDOR_AMD },
+ 	{}
+ };
diff --git a/queue/x86-pti-rename-bug_cpu_insecure-to-bug_cpu_meltdown.patch b/queue/x86-pti-rename-bug_cpu_insecure-to-bug_cpu_meltdown.patch
new file mode 100644
index 0000000..5dce2f6
--- /dev/null
+++ b/queue/x86-pti-rename-bug_cpu_insecure-to-bug_cpu_meltdown.patch
@@ -0,0 +1,56 @@
+From de791821c295cc61419a06fe5562288417d1bc58 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 5 Jan 2018 15:27:34 +0100
+Subject: x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit de791821c295cc61419a06fe5562288417d1bc58 upstream.
+
+Use the name associated with the particular attack which needs page table
+isolation for mitigation.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
+Cc: Jiri Koshina <jikos@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Andi Lutomirski  <luto@amacapital.net>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Paul Turner <pjt@google.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Greg KH <gregkh@linux-foundation.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801051525300.1724@nanos
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    2 +-
+ arch/x86/kernel/cpu/common.c       |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -316,6 +316,6 @@
+ #define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
+ #define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400	X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+-#define X86_BUG_CPU_INSECURE	X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
++#define X86_BUG_CPU_MELTDOWN	X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+ 
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -884,7 +884,7 @@ static void __init early_identify_cpu(st
+ 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+ 
+ 	/* Assume for now that ALL x86 CPUs are insecure */
+-	setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
++	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ 
+ 	fpu__init_system(c);
+ }
diff --git a/queue/x86-retpoline-add-initial-retpoline-support.patch b/queue/x86-retpoline-add-initial-retpoline-support.patch
new file mode 100644
index 0000000..7ea928b
--- /dev/null
+++ b/queue/x86-retpoline-add-initial-retpoline-support.patch
@@ -0,0 +1,359 @@
+From 76b043848fd22dbf7f8bf3a1452f8c70d557b860 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 11 Jan 2018 21:46:25 +0000
+Subject: x86/retpoline: Add initial retpoline support
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 76b043848fd22dbf7f8bf3a1452f8c70d557b860 upstream.
+
+Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide
+the corresponding thunks. Provide assembler macros for invoking the thunks
+in the same way that GCC does, from native and inline assembler.
+
+This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In
+some circumstances, IBRS microcode features may be used instead, and the
+retpoline can be disabled.
+
+On AMD CPUs if lfence is serialising, the retpoline can be dramatically
+simplified to a simple "lfence; jmp *\reg". A future patch, after it has
+been verified that lfence really is serialising in all circumstances, can
+enable this by setting the X86_FEATURE_RETPOLINE_AMD feature bit in addition
+to X86_FEATURE_RETPOLINE.
+
+Do not align the retpoline in the altinstr section, because there is no
+guarantee that it stays aligned when it's copied over the oldinstr during
+alternative patching.
+
+[ Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks]
+[ tglx: Put actual function CALL/JMP in front of the macros, convert to
+  	symbolic labels ]
+[ dwmw2: Convert back to numeric labels, merge objtool fixes ]
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-4-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Kconfig                      |   13 +++
+ arch/x86/Makefile                     |   10 ++
+ arch/x86/include/asm/asm-prototypes.h |   25 ++++++
+ arch/x86/include/asm/cpufeatures.h    |    3 
+ arch/x86/include/asm/nospec-branch.h  |  128 ++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/common.c          |    4 +
+ arch/x86/lib/Makefile                 |    1 
+ arch/x86/lib/retpoline.S              |   48 ++++++++++++
+ 8 files changed, 232 insertions(+)
+ create mode 100644 arch/x86/include/asm/nospec-branch.h
+ create mode 100644 arch/x86/lib/retpoline.S
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -408,6 +408,19 @@ config GOLDFISH
+        def_bool y
+        depends on X86_GOLDFISH
+ 
++config RETPOLINE
++	bool "Avoid speculative indirect branches in kernel"
++	default y
++	---help---
++	  Compile kernel with the retpoline compiler options to guard against
++	  kernel-to-user data leaks by avoiding speculative indirect
++	  branches. Requires a compiler with -mindirect-branch=thunk-extern
++	  support for full protection. The kernel may run slower.
++
++	  Without compiler support, at least indirect branches in assembler
++	  code are eliminated. Since this includes the syscall entry path,
++	  it is not entirely pointless.
++
+ if X86_32
+ config X86_EXTENDED_PLATFORM
+ 	bool "Support for extended (non-PC) x86 platforms"
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -182,6 +182,16 @@ KBUILD_CFLAGS += -fno-asynchronous-unwin
+ KBUILD_CFLAGS += $(mflags-y)
+ KBUILD_AFLAGS += $(mflags-y)
+ 
++# Avoid indirect branches in kernel to deal with Spectre
++ifdef CONFIG_RETPOLINE
++    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
++    ifneq ($(RETPOLINE_CFLAGS),)
++        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
++    else
++        $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.)
++    endif
++endif
++
+ archscripts: scripts_basic
+ 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
+ 
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -10,7 +10,32 @@
+ #include <asm/pgtable.h>
+ #include <asm/special_insns.h>
+ #include <asm/preempt.h>
++#include <asm/asm.h>
+ 
+ #ifndef CONFIG_X86_CMPXCHG64
+ extern void cmpxchg8b_emu(void);
+ #endif
++
++#ifdef CONFIG_RETPOLINE
++#ifdef CONFIG_X86_32
++#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
++#else
++#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
++INDIRECT_THUNK(8)
++INDIRECT_THUNK(9)
++INDIRECT_THUNK(10)
++INDIRECT_THUNK(11)
++INDIRECT_THUNK(12)
++INDIRECT_THUNK(13)
++INDIRECT_THUNK(14)
++INDIRECT_THUNK(15)
++#endif
++INDIRECT_THUNK(ax)
++INDIRECT_THUNK(bx)
++INDIRECT_THUNK(cx)
++INDIRECT_THUNK(dx)
++INDIRECT_THUNK(si)
++INDIRECT_THUNK(di)
++INDIRECT_THUNK(bp)
++INDIRECT_THUNK(sp)
++#endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -194,6 +194,9 @@
+ #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ 
++#define X86_FEATURE_RETPOLINE	( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
++
+ #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+--- /dev/null
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -0,0 +1,128 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++#ifndef __NOSPEC_BRANCH_H__
++#define __NOSPEC_BRANCH_H__
++
++#include <asm/alternative.h>
++#include <asm/alternative-asm.h>
++#include <asm/cpufeatures.h>
++
++#ifdef __ASSEMBLY__
++
++/*
++ * This should be used immediately before a retpoline alternative.  It tells
++ * objtool where the retpolines are so that it can make sense of the control
++ * flow by just reading the original instruction(s) and ignoring the
++ * alternatives.
++ */
++.macro ANNOTATE_NOSPEC_ALTERNATIVE
++	.Lannotate_\@:
++	.pushsection .discard.nospec
++	.long .Lannotate_\@ - .
++	.popsection
++.endm
++
++/*
++ * These are the bare retpoline primitives for indirect jmp and call.
++ * Do not use these directly; they only exist to make the ALTERNATIVE
++ * invocation below less ugly.
++ */
++.macro RETPOLINE_JMP reg:req
++	call	.Ldo_rop_\@
++.Lspec_trap_\@:
++	pause
++	jmp	.Lspec_trap_\@
++.Ldo_rop_\@:
++	mov	\reg, (%_ASM_SP)
++	ret
++.endm
++
++/*
++ * This is a wrapper around RETPOLINE_JMP so the called function in reg
++ * returns to the instruction after the macro.
++ */
++.macro RETPOLINE_CALL reg:req
++	jmp	.Ldo_call_\@
++.Ldo_retpoline_jmp_\@:
++	RETPOLINE_JMP \reg
++.Ldo_call_\@:
++	call	.Ldo_retpoline_jmp_\@
++.endm
++
++/*
++ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
++ * indirect jmp/call which may be susceptible to the Spectre variant 2
++ * attack.
++ */
++.macro JMP_NOSPEC reg:req
++#ifdef CONFIG_RETPOLINE
++	ANNOTATE_NOSPEC_ALTERNATIVE
++	ALTERNATIVE_2 __stringify(jmp *\reg),				\
++		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\
++		__stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
++#else
++	jmp	*\reg
++#endif
++.endm
++
++.macro CALL_NOSPEC reg:req
++#ifdef CONFIG_RETPOLINE
++	ANNOTATE_NOSPEC_ALTERNATIVE
++	ALTERNATIVE_2 __stringify(call *\reg),				\
++		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
++		__stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
++#else
++	call	*\reg
++#endif
++.endm
++
++#else /* __ASSEMBLY__ */
++
++#define ANNOTATE_NOSPEC_ALTERNATIVE				\
++	"999:\n\t"						\
++	".pushsection .discard.nospec\n\t"			\
++	".long 999b - .\n\t"					\
++	".popsection\n\t"
++
++#if defined(CONFIG_X86_64) && defined(RETPOLINE)
++
++/*
++ * Since the inline asm uses the %V modifier which is only in newer GCC,
++ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
++ */
++# define CALL_NOSPEC						\
++	ANNOTATE_NOSPEC_ALTERNATIVE				\
++	ALTERNATIVE(						\
++	"call *%[thunk_target]\n",				\
++	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
++	X86_FEATURE_RETPOLINE)
++# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
++
++#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
++/*
++ * For i386 we use the original ret-equivalent retpoline, because
++ * otherwise we'll run out of registers. We don't care about CET
++ * here, anyway.
++ */
++# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n",	\
++	"       jmp    904f;\n"					\
++	"       .align 16\n"					\
++	"901:	call   903f;\n"					\
++	"902:	pause;\n"					\
++	"       jmp    902b;\n"					\
++	"       .align 16\n"					\
++	"903:	addl   $4, %%esp;\n"				\
++	"       pushl  %[thunk_target];\n"			\
++	"       ret;\n"						\
++	"       .align 16\n"					\
++	"904:	call   901b;\n",				\
++	X86_FEATURE_RETPOLINE)
++
++# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
++#else /* No retpoline */
++# define CALL_NOSPEC "call *%[thunk_target]\n"
++# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
++#endif
++
++#endif /* __ASSEMBLY__ */
++#endif /* __NOSPEC_BRANCH_H__ */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -889,6 +889,10 @@ static void __init early_identify_cpu(st
+ 	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ 	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ 
++#ifdef CONFIG_RETPOLINE
++	setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
++#endif
++
+ 	fpu__init_system(c);
+ 
+ #ifdef CONFIG_X86_32
+--- a/arch/x86/lib/Makefile
++++ b/arch/x86/lib/Makefile
+@@ -25,6 +25,7 @@ lib-y += memcpy_$(BITS).o
+ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
+ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
++lib-$(CONFIG_RETPOLINE) += retpoline.o
+ 
+ obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
+ 
+--- /dev/null
++++ b/arch/x86/lib/retpoline.S
+@@ -0,0 +1,48 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++#include <linux/stringify.h>
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/cpufeatures.h>
++#include <asm/alternative-asm.h>
++#include <asm/export.h>
++#include <asm/nospec-branch.h>
++
++.macro THUNK reg
++	.section .text.__x86.indirect_thunk.\reg
++
++ENTRY(__x86_indirect_thunk_\reg)
++	CFI_STARTPROC
++	JMP_NOSPEC %\reg
++	CFI_ENDPROC
++ENDPROC(__x86_indirect_thunk_\reg)
++.endm
++
++/*
++ * Despite being an assembler file we can't just use .irp here
++ * because __KSYM_DEPS__ only uses the C preprocessor and would
++ * only see one instance of "__x86_indirect_thunk_\reg" rather
++ * than one per register with the correct names. So we do it
++ * the simple and nasty way...
++ */
++#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
++#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
++
++GENERATE_THUNK(_ASM_AX)
++GENERATE_THUNK(_ASM_BX)
++GENERATE_THUNK(_ASM_CX)
++GENERATE_THUNK(_ASM_DX)
++GENERATE_THUNK(_ASM_SI)
++GENERATE_THUNK(_ASM_DI)
++GENERATE_THUNK(_ASM_BP)
++GENERATE_THUNK(_ASM_SP)
++#ifdef CONFIG_64BIT
++GENERATE_THUNK(r8)
++GENERATE_THUNK(r9)
++GENERATE_THUNK(r10)
++GENERATE_THUNK(r11)
++GENERATE_THUNK(r12)
++GENERATE_THUNK(r13)
++GENERATE_THUNK(r14)
++GENERATE_THUNK(r15)
++#endif
diff --git a/queue/x86-retpoline-add-lfence-to-the-retpoline-rsb-filling-rsb-macros.patch b/queue/x86-retpoline-add-lfence-to-the-retpoline-rsb-filling-rsb-macros.patch
new file mode 100644