4.9 backports to help enable pending CVE fixes

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
diff --git a/queue/EXPORT_SYMBOL-for-asm.patch b/queue/EXPORT_SYMBOL-for-asm.patch
new file mode 100644
index 0000000..03a85f2
--- /dev/null
+++ b/queue/EXPORT_SYMBOL-for-asm.patch
@@ -0,0 +1,161 @@
+From 22823ab419d8ed884195cfa75483fd3a99bb1462 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 11 Jan 2016 10:54:54 -0500
+Subject: [PATCH] EXPORT_SYMBOL() for asm
+
+commit 22823ab419d8ed884195cfa75483fd3a99bb1462 upstream.
+
+Add asm-usable variants of EXPORT_SYMBOL/EXPORT_SYMBOL_GPL.  This
+commit just adds the default implementation; most of the architectures
+can simply add export.h to asm/Kbuild and start using <asm/export.h>
+from assembler.  The rest needs to have their <asm/export.h> define
+everal macros and then explicitly include <asm-generic/export.h>
+
+One area where the things might diverge from default is the alignment;
+normally it's 8 bytes on 64bit targets and 4 on 32bit ones, both for
+unsigned long and for struct kernel_symbol.  Unfortunately, amd64 and
+m68k are unusual - m68k aligns to 2 bytes (for both) and amd64 aligns
+struct kernel_symbol to 16 bytes.  For those we'll need asm/export.h to
+override the constants used by generic version - KSYM_ALIGN and KCRC_ALIGN
+for kernel_symbol and unsigned long resp.  And no, __alignof__ would not
+do the trick - on amd64 __alignof__ of struct kernel_symbol is 8, not 16.
+
+More serious source of unpleasantness is treatment of function
+descriptors on architectures that have those.  Things like ppc64,
+parisc, ia64, etc.  need more than the address of the first insn to
+call an arbitrary function.  As the result, their representation of
+pointers to functions is not the typical "address of the entry point" -
+it's an address of a small static structure containing all the required
+information (including the entry point, of course).  Sadly, the asm-side
+conventions differ in what the function name refers to - entry point or
+the function descriptor.  On ppc64 we do the latter;
+	bar: .quad foo
+is what void (*bar)(void) = foo; turns into and the rare places where
+we need to explicitly work with the label of entry point are dealt with
+as DOTSYM(foo).  For our purposes it's ideal - generic macros are usable.
+However, parisc would have foo and P%foo used for label of entry point
+and address of the function descriptor and
+	bar: .long P%foo
+woudl be used instead.	ia64 goes similar to parisc in that respect,
+except that there it's @fptr(foo) rather than P%foo.  Such architectures
+need to define KSYM_FUNC that would turn a function name into whatever
+is needed to refer to function descriptor.
+
+What's more, on such architectures we need to know whether we are exporting
+a function or an object - in assembler we have to tell that explicitly, to
+decide whether we want EXPORT_SYMBOL(foo) produce e.g.
+	__ksymtab_foo: .quad foo
+or
+	__ksymtab_foo: .quad @fptr(foo)
+
+For that reason we introduce EXPORT_DATA_SYMBOL{,_GPL}(), to be used for
+exports of data objects.  On normal architectures it's the same thing
+as EXPORT_SYMBOL{,_GPL}(), but on parisc-like ones they differ and the
+right one needs to be used.  Most of the exports are functions, so we
+keep EXPORT_SYMBOL for those...
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+
+diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
+new file mode 100644
+index 000000000000..43199a049da5
+--- /dev/null
++++ b/include/asm-generic/export.h
+@@ -0,0 +1,94 @@
++#ifndef __ASM_GENERIC_EXPORT_H
++#define __ASM_GENERIC_EXPORT_H
++
++#ifndef KSYM_FUNC
++#define KSYM_FUNC(x) x
++#endif
++#ifdef CONFIG_64BIT
++#define __put .quad
++#ifndef KSYM_ALIGN
++#define KSYM_ALIGN 8
++#endif
++#ifndef KCRC_ALIGN
++#define KCRC_ALIGN 8
++#endif
++#else
++#define __put .long
++#ifndef KSYM_ALIGN
++#define KSYM_ALIGN 4
++#endif
++#ifndef KCRC_ALIGN
++#define KCRC_ALIGN 4
++#endif
++#endif
++
++#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
++#define KSYM(name) _##name
++#else
++#define KSYM(name) name
++#endif
++
++/*
++ * note on .section use: @progbits vs %progbits nastiness doesn't matter,
++ * since we immediately emit into those sections anyway.
++ */
++.macro ___EXPORT_SYMBOL name,val,sec
++#ifdef CONFIG_MODULES
++	.globl KSYM(__ksymtab_\name)
++	.section ___ksymtab\sec+\name,"a"
++	.balign KSYM_ALIGN
++KSYM(__ksymtab_\name):
++	__put \val, KSYM(__kstrtab_\name)
++	.previous
++	.section __ksymtab_strings,"a"
++KSYM(__kstrtab_\name):
++#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
++	.asciz "_\name"
++#else
++	.asciz "\name"
++#endif
++	.previous
++#ifdef CONFIG_MODVERSIONS
++	.section ___kcrctab\sec+\name,"a"
++	.balign KCRC_ALIGN
++KSYM(__kcrctab_\name):
++	__put KSYM(__crc_\name)
++	.weak KSYM(__crc_\name)
++	.previous
++#endif
++#endif
++.endm
++#undef __put
++
++#if defined(__KSYM_DEPS__)
++
++#define __EXPORT_SYMBOL(sym, val, sec)	=== __KSYM_##sym ===
++
++#elif defined(CONFIG_TRIM_UNUSED_KSYMS)
++
++#include <linux/kconfig.h>
++#include <generated/autoksyms.h>
++
++#define __EXPORT_SYMBOL(sym, val, sec)				\
++	__cond_export_sym(sym, val, sec, config_enabled(__KSYM_##sym))
++#define __cond_export_sym(sym, val, sec, conf)			\
++	___cond_export_sym(sym, val, sec, conf)
++#define ___cond_export_sym(sym, val, sec, enabled)		\
++	__cond_export_sym_##enabled(sym, val, sec)
++#define __cond_export_sym_1(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
++#define __cond_export_sym_0(sym, val, sec) /* nothing */
++
++#else
++#define __EXPORT_SYMBOL(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
++#endif
++
++#define EXPORT_SYMBOL(name)					\
++	__EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)),)
++#define EXPORT_SYMBOL_GPL(name) 				\
++	__EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl)
++#define EXPORT_DATA_SYMBOL(name)				\
++	__EXPORT_SYMBOL(name, KSYM(name),)
++#define EXPORT_DATA_SYMBOL_GPL(name)				\
++	__EXPORT_SYMBOL(name, KSYM(name),_gpl)
++
++#endif
+-- 
+2.15.0
+
diff --git a/queue/fork-Add-generic-vmalloced-stack-support.patch b/queue/fork-Add-generic-vmalloced-stack-support.patch
new file mode 100644
index 0000000..e36cebb
--- /dev/null
+++ b/queue/fork-Add-generic-vmalloced-stack-support.patch
@@ -0,0 +1,292 @@
+From ba14a194a434ccc8f733e263ad2ce941e35e5787 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 11 Aug 2016 02:35:21 -0700
+Subject: [PATCH] fork: Add generic vmalloced stack support
+
+commit ba14a194a434ccc8f733e263ad2ce941e35e5787 upstream.
+
+If CONFIG_VMAP_STACK=y is selected, kernel stacks are allocated with
+__vmalloc_node_range().
+
+Grsecurity has had a similar feature (called GRKERNSEC_KSTACKOVERFLOW=y)
+for a long time.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/14c07d4fd173a5b117f51e8b939f9f4323e39899.1470907718.git.luto@kernel.org
+[ Minor edits. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/Kconfig b/arch/Kconfig
+index e9c9334507dd..9ecf9f6f9e15 100644
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -707,4 +707,38 @@ config ARCH_NO_COHERENT_DMA_MMAP
+ config CPU_NO_EFFICIENT_FFS
+ 	def_bool n
+ 
++config HAVE_ARCH_VMAP_STACK
++	def_bool n
++	help
++	  An arch should select this symbol if it can support kernel stacks
++	  in vmalloc space.  This means:
++
++	  - vmalloc space must be large enough to hold many kernel stacks.
++	    This may rule out many 32-bit architectures.
++
++	  - Stacks in vmalloc space need to work reliably.  For example, if
++	    vmap page tables are created on demand, either this mechanism
++	    needs to work while the stack points to a virtual address with
++	    unpopulated page tables or arch code (switch_to() and switch_mm(),
++	    most likely) needs to ensure that the stack's page table entries
++	    are populated before running on a possibly unpopulated stack.
++
++	  - If the stack overflows into a guard page, something reasonable
++	    should happen.  The definition of "reasonable" is flexible, but
++	    instantly rebooting without logging anything would be unfriendly.
++
++config VMAP_STACK
++	default y
++	bool "Use a virtually-mapped stack"
++	depends on HAVE_ARCH_VMAP_STACK && !KASAN
++	---help---
++	  Enable this if you want the use virtually-mapped kernel stacks
++	  with guard pages.  This causes kernel stack overflows to be
++	  caught immediately rather than causing difficult-to-diagnose
++	  corruption.
++
++	  This is presently incompatible with KASAN because KASAN expects
++	  the stack to map directly to the KASAN shadow map using a formula
++	  that is incorrect if the stack is in vmalloc space.
++
+ source "kernel/gcov/Kconfig"
+diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
+index 29bd59790d6c..c7026429816b 100644
+--- a/arch/ia64/include/asm/thread_info.h
++++ b/arch/ia64/include/asm/thread_info.h
+@@ -56,7 +56,7 @@ struct thread_info {
+ #define alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
+ #define task_thread_info(tsk)	((struct thread_info *) 0)
+ #endif
+-#define free_thread_stack(ti)	/* nothing */
++#define free_thread_stack(tsk)	/* nothing */
+ #define task_stack_page(tsk)	((void *)(tsk))
+ 
+ #define __HAVE_THREAD_FUNCTIONS
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 62c68e513e39..20f9f47bcfd0 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1923,6 +1923,9 @@ struct task_struct {
+ #ifdef CONFIG_MMU
+ 	struct task_struct *oom_reaper_list;
+ #endif
++#ifdef CONFIG_VMAP_STACK
++	struct vm_struct *stack_vm_area;
++#endif
+ /* CPU-specific state of this task */
+ 	struct thread_struct thread;
+ /*
+@@ -1939,6 +1942,18 @@ extern int arch_task_struct_size __read_mostly;
+ # define arch_task_struct_size (sizeof(struct task_struct))
+ #endif
+ 
++#ifdef CONFIG_VMAP_STACK
++static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
++{
++	return t->stack_vm_area;
++}
++#else
++static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
++{
++	return NULL;
++}
++#endif
++
+ /* Future-safe accessor for struct task_struct's cpus_allowed. */
+ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
+ 
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 52e725d4a866..9b85f6b2cdcd 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -158,19 +158,39 @@ void __weak arch_release_thread_stack(unsigned long *stack)
+  * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
+  * kmemcache based allocator.
+  */
+-# if THREAD_SIZE >= PAGE_SIZE
+-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
+-						  int node)
++# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
++static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+ {
++#ifdef CONFIG_VMAP_STACK
++	void *stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
++					   VMALLOC_START, VMALLOC_END,
++					   THREADINFO_GFP | __GFP_HIGHMEM,
++					   PAGE_KERNEL,
++					   0, node,
++					   __builtin_return_address(0));
++
++	/*
++	 * We can't call find_vm_area() in interrupt context, and
++	 * free_thread_stack() can be called in interrupt context,
++	 * so cache the vm_struct.
++	 */
++	if (stack)
++		tsk->stack_vm_area = find_vm_area(stack);
++	return stack;
++#else
+ 	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+ 					     THREAD_SIZE_ORDER);
+ 
+ 	return page ? page_address(page) : NULL;
++#endif
+ }
+ 
+-static inline void free_thread_stack(unsigned long *stack)
++static inline void free_thread_stack(struct task_struct *tsk)
+ {
+-	__free_pages(virt_to_page(stack), THREAD_SIZE_ORDER);
++	if (task_stack_vm_area(tsk))
++		vfree(tsk->stack);
++	else
++		__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+ }
+ # else
+ static struct kmem_cache *thread_stack_cache;
+@@ -181,9 +201,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
+ 	return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
+ }
+ 
+-static void free_thread_stack(unsigned long *stack)
++static void free_thread_stack(struct task_struct *tsk)
+ {
+-	kmem_cache_free(thread_stack_cache, stack);
++	kmem_cache_free(thread_stack_cache, tsk->stack);
+ }
+ 
+ void thread_stack_cache_init(void)
+@@ -213,24 +233,47 @@ struct kmem_cache *vm_area_cachep;
+ /* SLAB cache for mm_struct structures (tsk->mm) */
+ static struct kmem_cache *mm_cachep;
+ 
+-static void account_kernel_stack(unsigned long *stack, int account)
++static void account_kernel_stack(struct task_struct *tsk, int account)
+ {
+-	/* All stack pages are in the same zone and belong to the same memcg. */
+-	struct page *first_page = virt_to_page(stack);
++	void *stack = task_stack_page(tsk);
++	struct vm_struct *vm = task_stack_vm_area(tsk);
++
++	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
++
++	if (vm) {
++		int i;
++
++		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
++
++		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
++			mod_zone_page_state(page_zone(vm->pages[i]),
++					    NR_KERNEL_STACK_KB,
++					    PAGE_SIZE / 1024 * account);
++		}
+ 
+-	mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
+-			    THREAD_SIZE / 1024 * account);
++		/* All stack pages belong to the same memcg. */
++		memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
++					    account * (THREAD_SIZE / 1024));
++	} else {
++		/*
++		 * All stack pages are in the same zone and belong to the
++		 * same memcg.
++		 */
++		struct page *first_page = virt_to_page(stack);
++
++		mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
++				    THREAD_SIZE / 1024 * account);
+ 
+-	memcg_kmem_update_page_stat(
+-		first_page, MEMCG_KERNEL_STACK_KB,
+-		account * (THREAD_SIZE / 1024));
++		memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
++					    account * (THREAD_SIZE / 1024));
++	}
+ }
+ 
+ void free_task(struct task_struct *tsk)
+ {
+-	account_kernel_stack(tsk->stack, -1);
++	account_kernel_stack(tsk, -1);
+ 	arch_release_thread_stack(tsk->stack);
+-	free_thread_stack(tsk->stack);
++	free_thread_stack(tsk);
+ 	rt_mutex_debug_task_free(tsk);
+ 	ftrace_graph_exit_task(tsk);
+ 	put_seccomp_filter(tsk);
+@@ -342,6 +385,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ {
+ 	struct task_struct *tsk;
+ 	unsigned long *stack;
++	struct vm_struct *stack_vm_area;
+ 	int err;
+ 
+ 	if (node == NUMA_NO_NODE)
+@@ -354,11 +398,23 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ 	if (!stack)
+ 		goto free_tsk;
+ 
++	stack_vm_area = task_stack_vm_area(tsk);
++
+ 	err = arch_dup_task_struct(tsk, orig);
++
++	/*
++	 * arch_dup_task_struct() clobbers the stack-related fields.  Make
++	 * sure they're properly initialized before using any stack-related
++	 * functions again.
++	 */
++	tsk->stack = stack;
++#ifdef CONFIG_VMAP_STACK
++	tsk->stack_vm_area = stack_vm_area;
++#endif
++
+ 	if (err)
+ 		goto free_stack;
+ 
+-	tsk->stack = stack;
+ #ifdef CONFIG_SECCOMP
+ 	/*
+ 	 * We must handle setting up seccomp filters once we're under
+@@ -390,14 +446,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ 	tsk->task_frag.page = NULL;
+ 	tsk->wake_q.next = NULL;
+ 
+-	account_kernel_stack(stack, 1);
++	account_kernel_stack(tsk, 1);
+ 
+ 	kcov_task_init(tsk);
+ 
+ 	return tsk;
+ 
+ free_stack:
+-	free_thread_stack(stack);
++	free_thread_stack(tsk);
+ free_tsk:
+ 	free_task_struct(tsk);
+ 	return NULL;
+-- 
+2.15.0
+
diff --git a/queue/fork-Optimize-task-creation-by-caching-two-thread-st.patch b/queue/fork-Optimize-task-creation-by-caching-two-thread-st.patch
new file mode 100644
index 0000000..5ccc7e0
--- /dev/null
+++ b/queue/fork-Optimize-task-creation-by-caching-two-thread-st.patch
@@ -0,0 +1,121 @@
+From ac496bf48d97f2503eaa353996a4dd5e4383eaf0 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 15 Sep 2016 22:45:49 -0700
+Subject: [PATCH] fork: Optimize task creation by caching two thread stacks per
+ CPU if CONFIG_VMAP_STACK=y
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit ac496bf48d97f2503eaa353996a4dd5e4383eaf0 upstream.
+
+vmalloc() is a bit slow, and pounding vmalloc()/vfree() will eventually
+force a global TLB flush.
+
+To reduce pressure on them, if CONFIG_VMAP_STACK=y, cache two thread
+stacks per CPU.  This will let us quickly allocate a hopefully
+cache-hot, TLB-hot stack under heavy forking workloads (shell script style).
+
+On my silly pthread_create() benchmark, it saves about 2 µs per
+pthread_create()+join() with CONFIG_VMAP_STACK=y.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jann Horn <jann@thejh.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/94811d8e3994b2e962f88866290017d498eb069c.1474003868.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 5dd0a516626d..c060c7e7c247 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -159,15 +159,41 @@ void __weak arch_release_thread_stack(unsigned long *stack)
+  * kmemcache based allocator.
+  */
+ # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
++
++#ifdef CONFIG_VMAP_STACK
++/*
++ * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
++ * flush.  Try to minimize the number of calls by caching stacks.
++ */
++#define NR_CACHED_STACKS 2
++static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
++#endif
++
+ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+ {
+ #ifdef CONFIG_VMAP_STACK
+-	void *stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
+-					   VMALLOC_START, VMALLOC_END,
+-					   THREADINFO_GFP | __GFP_HIGHMEM,
+-					   PAGE_KERNEL,
+-					   0, node,
+-					   __builtin_return_address(0));
++	void *stack;
++	int i;
++
++	local_irq_disable();
++	for (i = 0; i < NR_CACHED_STACKS; i++) {
++		struct vm_struct *s = this_cpu_read(cached_stacks[i]);
++
++		if (!s)
++			continue;
++		this_cpu_write(cached_stacks[i], NULL);
++
++		tsk->stack_vm_area = s;
++		local_irq_enable();
++		return s->addr;
++	}
++	local_irq_enable();
++
++	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
++				     VMALLOC_START, VMALLOC_END,
++				     THREADINFO_GFP | __GFP_HIGHMEM,
++				     PAGE_KERNEL,
++				     0, node, __builtin_return_address(0));
+ 
+ 	/*
+ 	 * We can't call find_vm_area() in interrupt context, and
+@@ -187,10 +213,28 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+ 
+ static inline void free_thread_stack(struct task_struct *tsk)
+ {
+-	if (task_stack_vm_area(tsk))
++#ifdef CONFIG_VMAP_STACK
++	if (task_stack_vm_area(tsk)) {
++		unsigned long flags;
++		int i;
++
++		local_irq_save(flags);
++		for (i = 0; i < NR_CACHED_STACKS; i++) {
++			if (this_cpu_read(cached_stacks[i]))
++				continue;
++
++			this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
++			local_irq_restore(flags);
++			return;
++		}
++		local_irq_restore(flags);
++
+ 		vfree(tsk->stack);
+-	else
+-		__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
++		return;
++	}
++#endif
++
++	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+ }
+ # else
+ static struct kmem_cache *thread_stack_cache;
+-- 
+2.15.0
+
diff --git a/queue/locking-static_keys-Provide-DECLARE-and-well-as-DEFI.patch b/queue/locking-static_keys-Provide-DECLARE-and-well-as-DEFI.patch
new file mode 100644
index 0000000..9539d43
--- /dev/null
+++ b/queue/locking-static_keys-Provide-DECLARE-and-well-as-DEFI.patch
@@ -0,0 +1,42 @@
+From b8fb03785d4de097507d0cf45873525e0ac4d2b2 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Thu, 1 Sep 2016 11:39:33 -0700
+Subject: [PATCH] locking/static_keys: Provide DECLARE and well as DEFINE
+ macros
+
+commit b8fb03785d4de097507d0cf45873525e0ac4d2b2 upstream.
+
+We will need to provide declarations of static keys in header
+files. Provide DECLARE_STATIC_KEY_{TRUE,FALSE} macros.
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/r/816881cf85bd3cf13385d212882618f38a3b5d33.1472754711.git.tony.luck@intel.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
+index 661af564fae8..595fb46213fc 100644
+--- a/include/linux/jump_label.h
++++ b/include/linux/jump_label.h
+@@ -267,9 +267,15 @@ struct static_key_false {
+ #define DEFINE_STATIC_KEY_TRUE(name)	\
+ 	struct static_key_true name = STATIC_KEY_TRUE_INIT
+ 
++#define DECLARE_STATIC_KEY_TRUE(name)	\
++	extern struct static_key_true name
++
+ #define DEFINE_STATIC_KEY_FALSE(name)	\
+ 	struct static_key_false name = STATIC_KEY_FALSE_INIT
+ 
++#define DECLARE_STATIC_KEY_FALSE(name)	\
++	extern struct static_key_false name
++
+ extern bool ____wrong_branch_error(void);
+ 
+ #define static_key_enabled(x)							\
+-- 
+2.15.0
+
diff --git a/queue/sched-core-Add-try_get_task_stack-and-put_task_stack.patch b/queue/sched-core-Add-try_get_task_stack-and-put_task_stack.patch
new file mode 100644
index 0000000..513a9ea
--- /dev/null
+++ b/queue/sched-core-Add-try_get_task_stack-and-put_task_stack.patch
@@ -0,0 +1,81 @@
+From c6c314a613cd7d03fb97713e0d642b493de42e69 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 15 Sep 2016 22:45:43 -0700
+Subject: [PATCH] sched/core: Add try_get_task_stack() and put_task_stack()
+
+commit c6c314a613cd7d03fb97713e0d642b493de42e69 upstream.
+
+There are a few places in the kernel that access stack memory
+belonging to a different task.  Before we can start freeing task
+stacks before the task_struct is freed, we need a way for those code
+paths to pin the stack.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jann Horn <jann@thejh.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/17a434f50ad3d77000104f21666575e10a9c1fbd.1474003868.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index a287e8b13549..a95867267e9f 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -3094,11 +3094,19 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
+ {
+ 	return &task->thread_info;
+ }
++
++/*
++ * When accessing the stack of a non-current task that might exit, use
++ * try_get_task_stack() instead.  task_stack_page will return a pointer
++ * that could get freed out from under you.
++ */
+ static inline void *task_stack_page(const struct task_struct *task)
+ {
+ 	return task->stack;
+ }
++
+ #define setup_thread_stack(new,old)	do { } while(0)
++
+ static inline unsigned long *end_of_stack(const struct task_struct *task)
+ {
+ 	return task->stack;
+@@ -3134,6 +3142,14 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
+ }
+ 
+ #endif
++
++static inline void *try_get_task_stack(struct task_struct *tsk)
++{
++	return task_stack_page(tsk);
++}
++
++static inline void put_task_stack(struct task_struct *tsk) {}
++
+ #define task_stack_end_corrupted(task) \
+ 		(*(end_of_stack(task)) != STACK_END_MAGIC)
+ 
+diff --git a/init/Kconfig b/init/Kconfig
+index ec8d43894b02..3b9a47fe843b 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -33,6 +33,9 @@ config THREAD_INFO_IN_TASK
+ 	  make this work, an arch will need to remove all thread_info fields
+ 	  except flags and fix any runtime bugs.
+ 
++	  One subtle change that will be needed is to use try_get_task_stack()
++	  and put_task_stack() in save_thread_stack_tsk() and get_wchan().
++
+ menu "General setup"
+ 
+ config BROKEN
+-- 
+2.15.0
+
diff --git a/queue/sched-core-Allow-putting-thread_info-into-task_struc.patch b/queue/sched-core-Allow-putting-thread_info-into-task_struc.patch
new file mode 100644
index 0000000..c584e03
--- /dev/null
+++ b/queue/sched-core-Allow-putting-thread_info-into-task_struc.patch
@@ -0,0 +1,219 @@
+From c65eacbe290b8141554c71b2c94489e73ade8c8d Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 13 Sep 2016 14:29:24 -0700
+Subject: [PATCH] sched/core: Allow putting thread_info into task_struct
+
+commit c65eacbe290b8141554c71b2c94489e73ade8c8d upstream.
+
+If an arch opts in by setting CONFIG_THREAD_INFO_IN_TASK_STRUCT,
+then thread_info is defined as a single 'u32 flags' and is the first
+entry of task_struct.  thread_info::task is removed (it serves no
+purpose if thread_info is embedded in task_struct), and
+thread_info::cpu gets its own slot in task_struct.
+
+This is heavily based on a patch written by Linus.
+
+Originally-from: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jann Horn <jann@thejh.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/a0898196f0476195ca02713691a5037a14f2aac5.1473801993.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/include/linux/init_task.h b/include/linux/init_task.h
+index f8834f820ec2..9c04d44eeb3c 100644
+--- a/include/linux/init_task.h
++++ b/include/linux/init_task.h
+@@ -15,6 +15,8 @@
+ #include <net/net_namespace.h>
+ #include <linux/sched/rt.h>
+ 
++#include <asm/thread_info.h>
++
+ #ifdef CONFIG_SMP
+ # define INIT_PUSHABLE_TASKS(tsk)					\
+ 	.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
+@@ -183,12 +185,19 @@ extern struct task_group root_task_group;
+ # define INIT_KASAN(tsk)
+ #endif
+ 
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk),
++#else
++# define INIT_TASK_TI(tsk)
++#endif
++
+ /*
+  *  INIT_TASK is used to set up the first task table, touch at
+  * your own risk!. Base=0, limit=0x1fffff (=2MB)
+  */
+ #define INIT_TASK(tsk)	\
+ {									\
++	INIT_TASK_TI(tsk)						\
+ 	.state		= 0,						\
+ 	.stack		= init_stack,					\
+ 	.usage		= ATOMIC_INIT(2),				\
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 20f9f47bcfd0..a287e8b13549 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1458,6 +1458,13 @@ struct tlbflush_unmap_batch {
+ };
+ 
+ struct task_struct {
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	/*
++	 * For reasons of header soup (see current_thread_info()), this
++	 * must be the first element of task_struct.
++	 */
++	struct thread_info thread_info;
++#endif
+ 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
+ 	void *stack;
+ 	atomic_t usage;
+@@ -1467,6 +1474,9 @@ struct task_struct {
+ #ifdef CONFIG_SMP
+ 	struct llist_node wake_entry;
+ 	int on_cpu;
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	unsigned int cpu;	/* current CPU */
++#endif
+ 	unsigned int wakee_flips;
+ 	unsigned long wakee_flip_decay_ts;
+ 	struct task_struct *last_wakee;
+@@ -2588,7 +2598,9 @@ extern void set_curr_task(int cpu, struct task_struct *p);
+ void yield(void);
+ 
+ union thread_union {
++#ifndef CONFIG_THREAD_INFO_IN_TASK
+ 	struct thread_info thread_info;
++#endif
+ 	unsigned long stack[THREAD_SIZE/sizeof(long)];
+ };
+ 
+@@ -3076,10 +3088,26 @@ static inline void threadgroup_change_end(struct task_struct *tsk)
+ 	cgroup_threadgroup_change_end(tsk);
+ }
+ 
+-#ifndef __HAVE_THREAD_FUNCTIONS
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++
++static inline struct thread_info *task_thread_info(struct task_struct *task)
++{
++	return &task->thread_info;
++}
++static inline void *task_stack_page(const struct task_struct *task)
++{
++	return task->stack;
++}
++#define setup_thread_stack(new,old)	do { } while(0)
++static inline unsigned long *end_of_stack(const struct task_struct *task)
++{
++	return task->stack;
++}
++
++#elif !defined(__HAVE_THREAD_FUNCTIONS)
+ 
+ #define task_thread_info(task)	((struct thread_info *)(task)->stack)
+-#define task_stack_page(task)	((task)->stack)
++#define task_stack_page(task)	((void *)(task)->stack)
+ 
+ static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
+ {
+@@ -3379,7 +3407,11 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
+ 
+ static inline unsigned int task_cpu(const struct task_struct *p)
+ {
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	return p->cpu;
++#else
+ 	return task_thread_info(p)->cpu;
++#endif
+ }
+ 
+ static inline int task_node(const struct task_struct *p)
+diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
+index 2b5b10eed74f..e2d0fd81b1ba 100644
+--- a/include/linux/thread_info.h
++++ b/include/linux/thread_info.h
+@@ -13,6 +13,21 @@
+ struct timespec;
+ struct compat_timespec;
+ 
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++struct thread_info {
++	u32			flags;		/* low level flags */
++};
++
++#define INIT_THREAD_INFO(tsk)			\
++{						\
++	.flags		= 0,			\
++}
++#endif
++
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++#define current_thread_info() ((struct thread_info *)current)
++#endif
++
+ /*
+  * System call restart block.
+  */
+diff --git a/init/Kconfig b/init/Kconfig
+index cac3f096050d..ec8d43894b02 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -26,6 +26,13 @@ config IRQ_WORK
+ config BUILDTIME_EXTABLE_SORT
+ 	bool
+ 
++config THREAD_INFO_IN_TASK
++	bool
++	help
++	  Select this to move thread_info off the stack into task_struct.  To
++	  make this work, an arch will need to remove all thread_info fields
++	  except flags and fix any runtime bugs.
++
+ menu "General setup"
+ 
+ config BROKEN
+diff --git a/init/init_task.c b/init/init_task.c
+index ba0a7f362d9e..11f83be1fa79 100644
+--- a/init/init_task.c
++++ b/init/init_task.c
+@@ -22,5 +22,8 @@ EXPORT_SYMBOL(init_task);
+  * Initial thread structure. Alignment of this is handled by a special
+  * linker map entry.
+  */
+-union thread_union init_thread_union __init_task_data =
+-	{ INIT_THREAD_INFO(init_task) };
++union thread_union init_thread_union __init_task_data = {
++#ifndef CONFIG_THREAD_INFO_IN_TASK
++	INIT_THREAD_INFO(init_task)
++#endif
++};
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index c64fc5114004..3655c9625e5b 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1000,7 +1000,11 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+ 	 * per-task data have been completed by this moment.
+ 	 */
+ 	smp_wmb();
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	p->cpu = cpu;
++#else
+ 	task_thread_info(p)->cpu = cpu;
++#endif
+ 	p->wake_cpu = cpu;
+ #endif
+ }
+-- 
+2.15.0
+
diff --git a/queue/sched-core-Free-the-stack-early-if-CONFIG_THREAD_INF.patch b/queue/sched-core-Free-the-stack-early-if-CONFIG_THREAD_INF.patch
new file mode 100644
index 0000000..780badc
--- /dev/null
+++ b/queue/sched-core-Free-the-stack-early-if-CONFIG_THREAD_INF.patch
@@ -0,0 +1,168 @@
+From 68f24b08ee892d47bdef925d676e1ae1ccc316f8 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 15 Sep 2016 22:45:48 -0700
+Subject: [PATCH] sched/core: Free the stack early if
+ CONFIG_THREAD_INFO_IN_TASK
+
+commit 68f24b08ee892d47bdef925d676e1ae1ccc316f8 upstream.
+
+We currently keep every task's stack around until the task_struct
+itself is freed.  This means that we keep the stack allocation alive
+for longer than necessary and that, under load, we free stacks in
+big batches whenever RCU drops the last task reference.  Neither of
+these is good for reuse of cache-hot memory, and freeing in batches
+prevents us from usefully caching small numbers of vmalloced stacks.
+
+On architectures that have thread_info on the stack, we can't easily
+change this, but on architectures that set THREAD_INFO_IN_TASK, we
+can free it as soon as the task is dead.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jann Horn <jann@thejh.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/08ca06cde00ebed0046c5d26cbbf3fbb7ef5b812.1474003868.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/include/linux/init_task.h b/include/linux/init_task.h
+index 9c04d44eeb3c..325f649d77ff 100644
+--- a/include/linux/init_task.h
++++ b/include/linux/init_task.h
+@@ -186,7 +186,9 @@ extern struct task_group root_task_group;
+ #endif
+ 
+ #ifdef CONFIG_THREAD_INFO_IN_TASK
+-# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk),
++# define INIT_TASK_TI(tsk)			\
++	.thread_info = INIT_THREAD_INFO(tsk),	\
++	.stack_refcount = ATOMIC_INIT(1),
+ #else
+ # define INIT_TASK_TI(tsk)
+ #endif
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index a95867267e9f..abb795afc823 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1936,6 +1936,10 @@ struct task_struct {
+ #ifdef CONFIG_VMAP_STACK
+ 	struct vm_struct *stack_vm_area;
+ #endif
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	/* A live task holds one reference. */
++	atomic_t stack_refcount;
++#endif
+ /* CPU-specific state of this task */
+ 	struct thread_struct thread;
+ /*
+@@ -3143,12 +3147,22 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
+ 
+ #endif
+ 
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++static inline void *try_get_task_stack(struct task_struct *tsk)
++{
++	return atomic_inc_not_zero(&tsk->stack_refcount) ?
++		task_stack_page(tsk) : NULL;
++}
++
++extern void put_task_stack(struct task_struct *tsk);
++#else
+ static inline void *try_get_task_stack(struct task_struct *tsk)
+ {
+ 	return task_stack_page(tsk);
+ }
+ 
+ static inline void put_task_stack(struct task_struct *tsk) {}
++#endif
+ 
+ #define task_stack_end_corrupted(task) \
+ 		(*(end_of_stack(task)) != STACK_END_MAGIC)
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 0c240fd5beba..5dd0a516626d 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -269,11 +269,40 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
+ 	}
+ }
+ 
+-void free_task(struct task_struct *tsk)
++static void release_task_stack(struct task_struct *tsk)
+ {
+ 	account_kernel_stack(tsk, -1);
+ 	arch_release_thread_stack(tsk->stack);
+ 	free_thread_stack(tsk);
++	tsk->stack = NULL;
++#ifdef CONFIG_VMAP_STACK
++	tsk->stack_vm_area = NULL;
++#endif
++}
++
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++void put_task_stack(struct task_struct *tsk)
++{
++	if (atomic_dec_and_test(&tsk->stack_refcount))
++		release_task_stack(tsk);
++}
++#endif
++
++void free_task(struct task_struct *tsk)
++{
++#ifndef CONFIG_THREAD_INFO_IN_TASK
++	/*
++	 * The task is finally done with both the stack and thread_info,
++	 * so free both.
++	 */
++	release_task_stack(tsk);
++#else
++	/*
++	 * If the task had a separate stack allocation, it should be gone
++	 * by now.
++	 */
++	WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
++#endif
+ 	rt_mutex_debug_task_free(tsk);
+ 	ftrace_graph_exit_task(tsk);
+ 	put_seccomp_filter(tsk);
+@@ -411,6 +440,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ #ifdef CONFIG_VMAP_STACK
+ 	tsk->stack_vm_area = stack_vm_area;
+ #endif
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	atomic_set(&tsk->stack_refcount, 1);
++#endif
+ 
+ 	if (err)
+ 		goto free_stack;
+@@ -1771,6 +1803,7 @@ bad_fork_cleanup_count:
+ 	atomic_dec(&p->cred->user->processes);
+ 	exit_creds(p);
+ bad_fork_free:
++	put_task_stack(p);
+ 	free_task(p);
+ fork_out:
+ 	return ERR_PTR(retval);
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 0b6238f18da2..23c6037e2d89 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2772,6 +2772,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
+ 		 * task and put them back on the free list.
+ 		 */
+ 		kprobe_flush_task(prev);
++
++		/* Task is done with its stack. */
++		put_task_stack(prev);
++
+ 		put_task_struct(prev);
+ 	}
+ 
+-- 
+2.15.0
+
diff --git a/queue/sched-core-x86-Make-struct-thread_info-arch-specific.patch b/queue/sched-core-x86-Make-struct-thread_info-arch-specific.patch
new file mode 100644
index 0000000..140dced
--- /dev/null
+++ b/queue/sched-core-x86-Make-struct-thread_info-arch-specific.patch
@@ -0,0 +1,91 @@
+From c8061485a0d7569a865a3cc3c63347b0f42b3765 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+Date: Wed, 19 Oct 2016 19:28:11 +0100
+Subject: [PATCH] sched/core, x86: Make struct thread_info arch specific again
+
+commit c8061485a0d7569a865a3cc3c63347b0f42b3765 upstream.
+
+The following commit:
+
+  c65eacbe290b ("sched/core: Allow putting thread_info into task_struct")
+
+... made 'struct thread_info' a generic struct with only a
+single ::flags member, if CONFIG_THREAD_INFO_IN_TASK_STRUCT=y is
+selected.
+
+This change however seems to be quite x86 centric, since at least the
+generic preemption code (asm-generic/preempt.h) assumes that struct
+thread_info also has a preempt_count member, which apparently was not
+true for x86.
+
+We could add a bit more #ifdefs to solve this problem too, but it seems
+to be much simpler to make struct thread_info arch specific
+again. This also makes the conversion to THREAD_INFO_IN_TASK_STRUCT a
+bit easier for architectures that have a couple of arch specific stuff
+in their thread_info definition.
+
+The arch specific stuff _could_ be moved to thread_struct. However
+keeping them in thread_info makes it easier: accessing thread_info
+members is simple, since it is at the beginning of the task_struct,
+while the thread_struct is at the end. At least on s390 the offsets
+needed to access members of the thread_struct (with task_struct as
+base) are too large for various asm instructions.  This is not a
+problem when keeping these members within thread_info.
+
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: keescook@chromium.org
+Cc: linux-arch@vger.kernel.org
+Link: http://lkml.kernel.org/r/1476901693-8492-2-git-send-email-mark.rutland@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index 2aaca53c0974..ad6f5eb07a95 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -52,6 +52,15 @@ struct task_struct;
+ #include <asm/cpufeature.h>
+ #include <linux/atomic.h>
+ 
++struct thread_info {
++	unsigned long		flags;		/* low level flags */
++};
++
++#define INIT_THREAD_INFO(tsk)			\
++{						\
++	.flags		= 0,			\
++}
++
+ #define init_stack		(init_thread_union.stack)
+ 
+ #else /* !__ASSEMBLY__ */
+diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
+index 45f004e9cc59..2873baf5372a 100644
+--- a/include/linux/thread_info.h
++++ b/include/linux/thread_info.h
+@@ -13,17 +13,6 @@
+ struct timespec;
+ struct compat_timespec;
+ 
+-#ifdef CONFIG_THREAD_INFO_IN_TASK
+-struct thread_info {
+-	unsigned long		flags;		/* low level flags */
+-};
+-
+-#define INIT_THREAD_INFO(tsk)			\
+-{						\
+-	.flags		= 0,			\
+-}
+-#endif
+-
+ #ifdef CONFIG_THREAD_INFO_IN_TASK
+ #define current_thread_info() ((struct thread_info *)current)
+ #endif
+-- 
+2.15.0
+
diff --git a/queue/sched-x86-32-kgdb-Don-t-use-thread.ip-in-sleeping_th.patch b/queue/sched-x86-32-kgdb-Don-t-use-thread.ip-in-sleeping_th.patch
new file mode 100644
index 0000000..ff851c0
--- /dev/null
+++ b/queue/sched-x86-32-kgdb-Don-t-use-thread.ip-in-sleeping_th.patch
@@ -0,0 +1,56 @@
+From 4e047aa7f267c3449b6d323510d35864829aca70 Mon Sep 17 00:00:00 2001
+From: Brian Gerst <brgerst@gmail.com>
+Date: Sat, 13 Aug 2016 12:38:16 -0400
+Subject: [PATCH] sched/x86/32, kgdb: Don't use thread.ip in
+ sleeping_thread_to_gdb_regs()
+
+commit 4e047aa7f267c3449b6d323510d35864829aca70 upstream.
+
+Match 64-bit and set gdb_regs[GDB_PC] to zero.  thread.ip is always the
+same point in the scheduler (except for newly forked processes), and will
+be removed in a future patch.
+
+Signed-off-by: Brian Gerst <brgerst@gmail.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jason Wessel <jason.wessel@windriver.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1471106302-10159-2-git-send-email-brgerst@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
+index 04cde527d728..fe649a5f509f 100644
+--- a/arch/x86/kernel/kgdb.c
++++ b/arch/x86/kernel/kgdb.c
+@@ -172,7 +172,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+ 	gdb_regs[GDB_ES]	= __KERNEL_DS;
+ 	gdb_regs[GDB_PS]	= 0;
+ 	gdb_regs[GDB_CS]	= __KERNEL_CS;
+-	gdb_regs[GDB_PC]	= p->thread.ip;
+ 	gdb_regs[GDB_SS]	= __KERNEL_DS;
+ 	gdb_regs[GDB_FS]	= 0xFFFF;
+ 	gdb_regs[GDB_GS]	= 0xFFFF;
+@@ -180,7 +179,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+ 	gdb_regs32[GDB_PS]	= *(unsigned long *)(p->thread.sp + 8);
+ 	gdb_regs32[GDB_CS]	= __KERNEL_CS;
+ 	gdb_regs32[GDB_SS]	= __KERNEL_DS;
+-	gdb_regs[GDB_PC]	= 0;
+ 	gdb_regs[GDB_R8]	= 0;
+ 	gdb_regs[GDB_R9]	= 0;
+ 	gdb_regs[GDB_R10]	= 0;
+@@ -190,6 +188,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+ 	gdb_regs[GDB_R14]	= 0;
+ 	gdb_regs[GDB_R15]	= 0;
+ #endif
++	gdb_regs[GDB_PC]	= 0;
+ 	gdb_regs[GDB_SP]	= p->thread.sp;
+ }
+ 
+-- 
+2.15.0
+
diff --git a/queue/sched-x86-Add-struct-inactive_task_frame-to-better-d.patch b/queue/sched-x86-Add-struct-inactive_task_frame-to-better-d.patch
new file mode 100644
index 0000000..2291b89
--- /dev/null
+++ b/queue/sched-x86-Add-struct-inactive_task_frame-to-better-d.patch
@@ -0,0 +1,107 @@
+From 7b32aeadbc95d4a41402c1c0da6aa3ab51af4c10 Mon Sep 17 00:00:00 2001
+From: Brian Gerst <brgerst@gmail.com>
+Date: Sat, 13 Aug 2016 12:38:18 -0400
+Subject: [PATCH] sched/x86: Add 'struct inactive_task_frame' to better
+ document the sleeping task stack frame
+
+commit 7b32aeadbc95d4a41402c1c0da6aa3ab51af4c10 upstream.
+
+Add 'struct inactive_task_frame', which defines the layout of the stack for
+a sleeping process.  For now, the only defined field is the BP register
+(frame pointer).
+
+Signed-off-by: Brian Gerst <brgerst@gmail.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1471106302-10159-4-git-send-email-brgerst@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
+index 0944218af9e2..7646fb2772f8 100644
+--- a/arch/x86/include/asm/stacktrace.h
++++ b/arch/x86/include/asm/stacktrace.h
+@@ -8,6 +8,7 @@
+ 
+ #include <linux/uaccess.h>
+ #include <linux/ptrace.h>
++#include <asm/switch_to.h>
+ 
+ extern int kstack_depth_to_print;
+ 
+@@ -70,8 +71,7 @@ stack_frame(struct task_struct *task, struct pt_regs *regs)
+ 		return bp;
+ 	}
+ 
+-	/* bp is the last reg pushed by switch_to */
+-	return *(unsigned long *)task->thread.sp;
++	return ((struct inactive_task_frame *)task->thread.sp)->bp;
+ }
+ #else
+ static inline unsigned long
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index 14e4b20f0aaf..ec689c62c01f 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -30,6 +30,11 @@ static inline void prepare_switch_to(struct task_struct *prev,
+ #endif
+ }
+ 
++/* data that is pointed to by thread.sp */
++struct inactive_task_frame {
++	unsigned long bp;
++};
++
+ #ifdef CONFIG_X86_32
+ 
+ #ifdef CONFIG_CC_STACKPROTECTOR
+diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
+index 5e3f294ce264..8e36f249646e 100644
+--- a/arch/x86/kernel/kgdb.c
++++ b/arch/x86/kernel/kgdb.c
+@@ -50,6 +50,7 @@
+ #include <asm/apicdef.h>
+ #include <asm/apic.h>
+ #include <asm/nmi.h>
++#include <asm/switch_to.h>
+ 
+ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
+ {
+@@ -166,7 +167,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+ 	gdb_regs[GDB_DX]	= 0;
+ 	gdb_regs[GDB_SI]	= 0;
+ 	gdb_regs[GDB_DI]	= 0;
+-	gdb_regs[GDB_BP]	= *(unsigned long *)p->thread.sp;
++	gdb_regs[GDB_BP]	= ((struct inactive_task_frame *)p->thread.sp)->bp;
+ #ifdef CONFIG_X86_32
+ 	gdb_regs[GDB_DS]	= __KERNEL_DS;
+ 	gdb_regs[GDB_ES]	= __KERNEL_DS;
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 62c0b0ea2ce4..0115a4a4db96 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -32,6 +32,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/mce.h>
+ #include <asm/vm86.h>
++#include <asm/switch_to.h>
+ 
+ /*
+  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+@@ -556,7 +557,7 @@ unsigned long get_wchan(struct task_struct *p)
+ 	if (sp < bottom || sp > top)
+ 		return 0;
+ 
+-	fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
++	fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
+ 	do {
+ 		if (fp < bottom || fp > top)
+ 			return 0;
+-- 
+2.15.0
+
diff --git a/queue/sched-x86-Pass-kernel-thread-parameters-in-struct-fo.patch b/queue/sched-x86-Pass-kernel-thread-parameters-in-struct-fo.patch
new file mode 100644
index 0000000..4b4cff7
--- /dev/null
+++ b/queue/sched-x86-Pass-kernel-thread-parameters-in-struct-fo.patch
@@ -0,0 +1,237 @@
+From 616d24835eeafa8ef3466479db028abfdfc77531 Mon Sep 17 00:00:00 2001
+From: Brian Gerst <brgerst@gmail.com>
+Date: Sat, 13 Aug 2016 12:38:20 -0400
+Subject: [PATCH] sched/x86: Pass kernel thread parameters in 'struct
+ fork_frame'
+
+commit 616d24835eeafa8ef3466479db028abfdfc77531 upstream.
+
+Instead of setting up a fake pt_regs context, put the kernel thread
+function pointer and arg into the unused callee-restored registers
+of 'struct fork_frame'.
+
+Signed-off-by: Brian Gerst <brgerst@gmail.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1471106302-10159-6-git-send-email-brgerst@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index bf8f221f9c94..b75a8bcd2d23 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -240,35 +240,34 @@ END(__switch_to_asm)
+  * A newly forked process directly context switches into this address.
+  *
+  * eax: prev task we switched from
++ * ebx: kernel thread func (NULL for user thread)
++ * edi: kernel thread arg
+  */
+ ENTRY(ret_from_fork)
+ 	pushl	%eax
+ 	call	schedule_tail
+ 	popl	%eax
+ 
++	testl	%ebx, %ebx
++	jnz	1f		/* kernel threads are uncommon */
++
++2:
+ 	/* When we fork, we trace the syscall return in the child, too. */
+ 	movl    %esp, %eax
+ 	call    syscall_return_slowpath
+ 	jmp     restore_all
+-END(ret_from_fork)
+-
+-ENTRY(ret_from_kernel_thread)
+-	pushl	%eax
+-	call	schedule_tail
+-	popl	%eax
+-	movl	PT_EBP(%esp), %eax
+-	call	*PT_EBX(%esp)
+-	movl	$0, PT_EAX(%esp)
+ 
++	/* kernel thread */
++1:	movl	%edi, %eax
++	call	*%ebx
+ 	/*
+-	 * Kernel threads return to userspace as if returning from a syscall.
+-	 * We should check whether anything actually uses this path and, if so,
+-	 * consider switching it over to ret_from_fork.
++	 * A kernel thread is allowed to return here after successfully
++	 * calling do_execve().  Exit to userspace to complete the execve()
++	 * syscall.
+ 	 */
+-	movl    %esp, %eax
+-	call    syscall_return_slowpath
+-	jmp     restore_all
+-ENDPROC(ret_from_kernel_thread)
++	movl	$0, PT_EAX(%esp)
++	jmp	2b
++END(ret_from_fork)
+ 
+ /*
+  * Return to user mode is not as complex as all this looks,
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index c1af8acd366b..c0373d667674 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -407,37 +407,34 @@ END(__switch_to_asm)
+  * A newly forked process directly context switches into this address.
+  *
+  * rax: prev task we switched from
++ * rbx: kernel thread func (NULL for user thread)
++ * r12: kernel thread arg
+  */
+ ENTRY(ret_from_fork)
+ 	movq	%rax, %rdi
+ 	call	schedule_tail			/* rdi: 'prev' task parameter */
+ 
+-	testb	$3, CS(%rsp)			/* from kernel_thread? */
+-	jnz	1f
+-
+-	/*
+-	 * We came from kernel_thread.  This code path is quite twisted, and
+-	 * someone should clean it up.
+-	 *
+-	 * copy_thread_tls stashes the function pointer in RBX and the
+-	 * parameter to be passed in RBP.  The called function is permitted
+-	 * to call do_execve and thereby jump to user mode.
+-	 */
+-	movq	RBP(%rsp), %rdi
+-	call	*RBX(%rsp)
+-	movl	$0, RAX(%rsp)
+-
+-	/*
+-	 * Fall through as though we're exiting a syscall.  This makes a
+-	 * twisted sort of sense if we just called do_execve.
+-	 */
++	testq	%rbx, %rbx			/* from kernel_thread? */
++	jnz	1f				/* kernel threads are uncommon */
+ 
+-1:
++2:
+ 	movq	%rsp, %rdi
+ 	call	syscall_return_slowpath	/* returns with IRQs disabled */
+ 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
+ 	SWAPGS
+ 	jmp	restore_regs_and_iret
++
++1:
++	/* kernel thread */
++	movq	%r12, %rdi
++	call	*%rbx
++	/*
++	 * A kernel thread is allowed to return here after successfully
++	 * calling do_execve().  Exit to userspace to complete the execve()
++	 * syscall.
++	 */
++	movq	$0, RAX(%rsp)
++	jmp	2b
+ END(ret_from_fork)
+ 
+ /*
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index 886d5ea09dba..5cb436acd463 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -34,6 +34,8 @@ static inline void prepare_switch_to(struct task_struct *prev,
+ #endif
+ }
+ 
++asmlinkage void ret_from_fork(void);
++
+ /* data that is pointed to by thread.sp */
+ struct inactive_task_frame {
+ #ifdef CONFIG_X86_64
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 4bedbc08e53c..18714a191b2d 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -55,9 +55,6 @@
+ #include <asm/switch_to.h>
+ #include <asm/vm86.h>
+ 
+-asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+-asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
+-
+ /*
+  * Return saved PC of a blocked thread.
+  */
+@@ -139,6 +136,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+ 	int err;
+ 
+ 	frame->bp = 0;
++	frame->ret_addr = (unsigned long) ret_from_fork;
+ 	p->thread.sp = (unsigned long) fork_frame;
+ 	p->thread.sp0 = (unsigned long) (childregs+1);
+ 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+@@ -146,25 +144,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+ 	if (unlikely(p->flags & PF_KTHREAD)) {
+ 		/* kernel thread */
+ 		memset(childregs, 0, sizeof(struct pt_regs));
+-		frame->ret_addr = (unsigned long) ret_from_kernel_thread;
+-		task_user_gs(p) = __KERNEL_STACK_CANARY;
+-		childregs->ds = __USER_DS;
+-		childregs->es = __USER_DS;
+-		childregs->fs = __KERNEL_PERCPU;
+-		childregs->bx = sp;	/* function */
+-		childregs->bp = arg;
+-		childregs->orig_ax = -1;
+-		childregs->cs = __KERNEL_CS | get_kernel_rpl();
+-		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
++		frame->bx = sp;		/* function */
++		frame->di = arg;
+ 		p->thread.io_bitmap_ptr = NULL;
+ 		return 0;
+ 	}
++	frame->bx = 0;
+ 	*childregs = *current_pt_regs();
+ 	childregs->ax = 0;
+ 	if (sp)
+ 		childregs->sp = sp;
+ 
+-	frame->ret_addr = (unsigned long) ret_from_fork;
+ 	task_user_gs(p) = get_user_gs(current_pt_regs());
+ 
+ 	p->thread.io_bitmap_ptr = NULL;
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 827eeed03e16..b812cd0d7889 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -50,8 +50,6 @@
+ #include <asm/switch_to.h>
+ #include <asm/xen/hypervisor.h>
+ 
+-asmlinkage extern void ret_from_fork(void);
+-
+ __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
+ 
+ /* Prints also some state that isn't saved in the pt_regs */
+@@ -165,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+ 	if (unlikely(p->flags & PF_KTHREAD)) {
+ 		/* kernel thread */
+ 		memset(childregs, 0, sizeof(struct pt_regs));
+-		childregs->sp = (unsigned long)childregs;
+-		childregs->ss = __KERNEL_DS;
+-		childregs->bx = sp; /* function */
+-		childregs->bp = arg;
+-		childregs->orig_ax = -1;
+-		childregs->cs = __KERNEL_CS | get_kernel_rpl();
+-		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
++		frame->bx = sp;		/* function */
++		frame->r12 = arg;
+ 		return 0;
+ 	}
++	frame->bx = 0;
+ 	*childregs = *current_pt_regs();
+ 
+ 	childregs->ax = 0;
+-- 
+2.15.0
+
diff --git a/queue/sched-x86-Rewrite-the-switch_to-code.patch b/queue/sched-x86-Rewrite-the-switch_to-code.patch
new file mode 100644
index 0000000..19c054a
--- /dev/null
+++ b/queue/sched-x86-Rewrite-the-switch_to-code.patch
@@ -0,0 +1,455 @@
+From 0100301bfdf56a2a370c7157b5ab0fbf9313e1cd Mon Sep 17 00:00:00 2001
+From: Brian Gerst <brgerst@gmail.com>
+Date: Sat, 13 Aug 2016 12:38:19 -0400
+Subject: [PATCH] sched/x86: Rewrite the switch_to() code
+
+commit 0100301bfdf56a2a370c7157b5ab0fbf9313e1cd upstream.
+
+Move the low-level context switch code to an out-of-line asm stub instead of
+using complex inline asm.  This allows constructing a new stack frame for the
+child process to make it seamlessly flow to ret_from_fork without an extra
+test and branch in __switch_to().  It also improves code generation for
+__schedule() by using the C calling convention instead of clobbering all
+registers.
+
+Signed-off-by: Brian Gerst <brgerst@gmail.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1471106302-10159-5-git-send-email-brgerst@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index 0b56666e6039..bf8f221f9c94 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -204,6 +204,43 @@
+ 	POP_GS_EX
+ .endm
+ 
++/*
++ * %eax: prev task
++ * %edx: next task
++ */
++ENTRY(__switch_to_asm)
++	/*
++	 * Save callee-saved registers
++	 * This must match the order in struct inactive_task_frame
++	 */
++	pushl	%ebp
++	pushl	%ebx
++	pushl	%edi
++	pushl	%esi
++
++	/* switch stack */
++	movl	%esp, TASK_threadsp(%eax)
++	movl	TASK_threadsp(%edx), %esp
++
++#ifdef CONFIG_CC_STACKPROTECTOR
++	movl	TASK_stack_canary(%edx), %ebx
++	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
++#endif
++
++	/* restore callee-saved registers */
++	popl	%esi
++	popl	%edi
++	popl	%ebx
++	popl	%ebp
++
++	jmp	__switch_to
++END(__switch_to_asm)
++
++/*
++ * A newly forked process directly context switches into this address.
++ *
++ * eax: prev task we switched from
++ */
+ ENTRY(ret_from_fork)
+ 	pushl	%eax
+ 	call	schedule_tail
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index f6b40e5c88f1..c1af8acd366b 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -367,14 +367,49 @@ END(ptregs_\func)
+ #define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+ #include <asm/syscalls_64.h>
+ 
++/*
++ * %rdi: prev task
++ * %rsi: next task
++ */
++ENTRY(__switch_to_asm)
++	/*
++	 * Save callee-saved registers
++	 * This must match the order in inactive_task_frame
++	 */
++	pushq	%rbp
++	pushq	%rbx
++	pushq	%r12
++	pushq	%r13
++	pushq	%r14
++	pushq	%r15
++
++	/* switch stack */
++	movq	%rsp, TASK_threadsp(%rdi)
++	movq	TASK_threadsp(%rsi), %rsp
++
++#ifdef CONFIG_CC_STACKPROTECTOR
++	movq	TASK_stack_canary(%rsi), %rbx
++	movq	%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
++#endif
++
++	/* restore callee-saved registers */
++	popq	%r15
++	popq	%r14
++	popq	%r13
++	popq	%r12
++	popq	%rbx
++	popq	%rbp
++
++	jmp	__switch_to
++END(__switch_to_asm)
++
+ /*
+  * A newly forked process directly context switches into this address.
+  *
+- * rdi: prev task we switched from
++ * rax: prev task we switched from
+  */
+ ENTRY(ret_from_fork)
+-	LOCK ; btr $TIF_FORK, TI_flags(%r8)
+-
++	movq	%rax, %rdi
+ 	call	schedule_tail			/* rdi: 'prev' task parameter */
+ 
+ 	testb	$3, CS(%rsp)			/* from kernel_thread? */
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 63def9537a2d..6fee8635340b 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -389,9 +389,6 @@ struct thread_struct {
+ 	unsigned short		fsindex;
+ 	unsigned short		gsindex;
+ #endif
+-#ifdef CONFIG_X86_32
+-	unsigned long		ip;
+-#endif
+ #ifdef CONFIG_X86_64
+ 	unsigned long		fsbase;
+ 	unsigned long		gsbase;
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index ec689c62c01f..886d5ea09dba 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -2,8 +2,12 @@
+ #define _ASM_X86_SWITCH_TO_H
+ 
+ struct task_struct; /* one of the stranger aspects of C forward declarations */
++
++struct task_struct *__switch_to_asm(struct task_struct *prev,
++				    struct task_struct *next);
++
+ __visible struct task_struct *__switch_to(struct task_struct *prev,
+-					   struct task_struct *next);
++					  struct task_struct *next);
+ struct tss_struct;
+ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ 		      struct tss_struct *tss);
+@@ -32,131 +36,30 @@ static inline void prepare_switch_to(struct task_struct *prev,
+ 
+ /* data that is pointed to by thread.sp */
+ struct inactive_task_frame {
++#ifdef CONFIG_X86_64
++	unsigned long r15;
++	unsigned long r14;
++	unsigned long r13;
++	unsigned long r12;
++#else
++	unsigned long si;
++	unsigned long di;
++#endif
++	unsigned long bx;
+ 	unsigned long bp;
++	unsigned long ret_addr;
+ };
+ 
+-#ifdef CONFIG_X86_32
+-
+-#ifdef CONFIG_CC_STACKPROTECTOR
+-#define __switch_canary							\
+-	"movl %P[task_canary](%[next]), %%ebx\n\t"			\
+-	"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
+-#define __switch_canary_oparam						\
+-	, [stack_canary] "=m" (stack_canary.canary)
+-#define __switch_canary_iparam						\
+-	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
+-#else	/* CC_STACKPROTECTOR */
+-#define __switch_canary
+-#define __switch_canary_oparam
+-#define __switch_canary_iparam
+-#endif	/* CC_STACKPROTECTOR */
++struct fork_frame {
++	struct inactive_task_frame frame;
++	struct pt_regs regs;
++};
+ 
+-/*
+- * Saving eflags is important. It switches not only IOPL between tasks,
+- * it also protects other tasks from NT leaking through sysenter etc.
+- */
+ #define switch_to(prev, next, last)					\
+ do {									\
+-	/*								\
+-	 * Context-switching clobbers all registers, so we clobber	\
+-	 * them explicitly, via unused output variables.		\
+-	 * (EAX and EBP is not listed because EBP is saved/restored	\
+-	 * explicitly for wchan access and EAX is the return value of	\
+-	 * __switch_to())						\
+-	 */								\
+-	unsigned long ebx, ecx, edx, esi, edi;				\
+-									\
+ 	prepare_switch_to(prev, next);					\
+ 									\
+-	asm volatile("pushl %%ebp\n\t"		/* save    EBP   */	\
+-		     "movl %%esp,%[prev_sp]\n\t"	/* save    ESP   */ \
+-		     "movl %[next_sp],%%esp\n\t"	/* restore ESP   */ \
+-		     "movl $1f,%[prev_ip]\n\t"	/* save    EIP   */	\
+-		     "pushl %[next_ip]\n\t"	/* restore EIP   */	\
+-		     __switch_canary					\
+-		     "jmp __switch_to\n"	/* regparm call  */	\
+-		     "1:\t"						\
+-		     "popl %%ebp\n\t"		/* restore EBP   */	\
+-									\
+-		     /* output parameters */				\
+-		     : [prev_sp] "=m" (prev->thread.sp),		\
+-		       [prev_ip] "=m" (prev->thread.ip),		\
+-		       "=a" (last),					\
+-									\
+-		       /* clobbered output registers: */		\
+-		       "=b" (ebx), "=c" (ecx), "=d" (edx),		\
+-		       "=S" (esi), "=D" (edi)				\
+-		       							\
+-		       __switch_canary_oparam				\
+-									\
+-		       /* input parameters: */				\
+-		     : [next_sp]  "m" (next->thread.sp),		\
+-		       [next_ip]  "m" (next->thread.ip),		\
+-		       							\
+-		       /* regparm parameters for __switch_to(): */	\
+-		       [prev]     "a" (prev),				\
+-		       [next]     "d" (next)				\
+-									\
+-		       __switch_canary_iparam				\
+-									\
+-		     : /* reloaded segment registers */			\
+-			"memory");					\
++	((last) = __switch_to_asm((prev), (next)));			\
+ } while (0)
+ 
+-#else /* CONFIG_X86_32 */
+-
+-/* frame pointer must be last for get_wchan */
+-#define SAVE_CONTEXT    "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
+-
+-#define __EXTRA_CLOBBER  \
+-	, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
+-	  "r12", "r13", "r14", "r15", "flags"
+-
+-#ifdef CONFIG_CC_STACKPROTECTOR
+-#define __switch_canary							  \
+-	"movq %P[task_canary](%%rsi),%%r8\n\t"				  \
+-	"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
+-#define __switch_canary_oparam						  \
+-	, [gs_canary] "=m" (irq_stack_union.stack_canary)
+-#define __switch_canary_iparam						  \
+-	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
+-#else	/* CC_STACKPROTECTOR */
+-#define __switch_canary
+-#define __switch_canary_oparam
+-#define __switch_canary_iparam
+-#endif	/* CC_STACKPROTECTOR */
+-
+-/*
+- * There is no need to save or restore flags, because flags are always
+- * clean in kernel mode, with the possible exception of IOPL.  Kernel IOPL
+- * has no effect.
+- */
+-#define switch_to(prev, next, last)					  \
+-	prepare_switch_to(prev, next);					  \
+-									  \
+-	asm volatile(SAVE_CONTEXT					  \
+-	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
+-	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
+-	     "call __switch_to\n\t"					  \
+-	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
+-	     __switch_canary						  \
+-	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
+-	     "movq %%rax,%%rdi\n\t" 					  \
+-	     "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"		  \
+-	     "jnz   ret_from_fork\n\t"					  \
+-	     RESTORE_CONTEXT						  \
+-	     : "=a" (last)					  	  \
+-	       __switch_canary_oparam					  \
+-	     : [next] "S" (next), [prev] "D" (prev),			  \
+-	       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
+-	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
+-	       [_tif_fork] "i" (_TIF_FORK),			  	  \
+-	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
+-	       [current_task] "m" (current_task)			  \
+-	       __switch_canary_iparam					  \
+-	     : "memory", "cc" __EXTRA_CLOBBER)
+-
+-#endif /* CONFIG_X86_32 */
+-
+ #endif /* _ASM_X86_SWITCH_TO_H */
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index 8b7c8d8e0852..494c4b5ada34 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -95,7 +95,6 @@ struct thread_info {
+ #define TIF_UPROBE		12	/* breakpointed or singlestepping */
+ #define TIF_NOTSC		16	/* TSC is not accessible in userland */
+ #define TIF_IA32		17	/* IA32 compatibility process */
+-#define TIF_FORK		18	/* ret_from_fork */
+ #define TIF_NOHZ		19	/* in adaptive nohz mode */
+ #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
+ #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
+@@ -119,7 +118,6 @@ struct thread_info {
+ #define _TIF_UPROBE		(1 << TIF_UPROBE)
+ #define _TIF_NOTSC		(1 << TIF_NOTSC)
+ #define _TIF_IA32		(1 << TIF_IA32)
+-#define _TIF_FORK		(1 << TIF_FORK)
+ #define _TIF_NOHZ		(1 << TIF_NOHZ)
+ #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
+ #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
+diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
+index 2bd5c6ff7ee7..db3a0af9b9ec 100644
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -28,6 +28,12 @@
+ #endif
+ 
+ void common(void) {
++	BLANK();
++	OFFSET(TASK_threadsp, task_struct, thread.sp);
++#ifdef CONFIG_CC_STACKPROTECTOR
++	OFFSET(TASK_stack_canary, task_struct, stack_canary);
++#endif
++
+ 	BLANK();
+ 	OFFSET(TI_flags, thread_info, flags);
+ 	OFFSET(TI_status, thread_info, status);
+diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
+index ecdc1d217dc0..880aa093268d 100644
+--- a/arch/x86/kernel/asm-offsets_32.c
++++ b/arch/x86/kernel/asm-offsets_32.c
+@@ -57,6 +57,11 @@ void foo(void)
+ 	/* Size of SYSENTER_stack */
+ 	DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+ 
++#ifdef CONFIG_CC_STACKPROTECTOR
++	BLANK();
++	OFFSET(stack_canary_offset, stack_canary, canary);
++#endif
++
+ #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
+ 	BLANK();
+ 	OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
+diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
+index d875f97d4e0b..210927ee2e74 100644
+--- a/arch/x86/kernel/asm-offsets_64.c
++++ b/arch/x86/kernel/asm-offsets_64.c
+@@ -56,6 +56,11 @@ int main(void)
+ 	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+ 	BLANK();
+ 
++#ifdef CONFIG_CC_STACKPROTECTOR
++	DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
++	BLANK();
++#endif
++
+ 	DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
+ 	DEFINE(NR_syscalls, sizeof(syscalls_64));
+ 
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index d86be29c38c7..4bedbc08e53c 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -133,17 +133,20 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+ 	unsigned long arg, struct task_struct *p, unsigned long tls)
+ {
+ 	struct pt_regs *childregs = task_pt_regs(p);
++	struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
++	struct inactive_task_frame *frame = &fork_frame->frame;
+ 	struct task_struct *tsk;
+ 	int err;
+ 
+-	p->thread.sp = (unsigned long) childregs;
++	frame->bp = 0;
++	p->thread.sp = (unsigned long) fork_frame;
+ 	p->thread.sp0 = (unsigned long) (childregs+1);
+ 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+ 
+ 	if (unlikely(p->flags & PF_KTHREAD)) {
+ 		/* kernel thread */
+ 		memset(childregs, 0, sizeof(struct pt_regs));
+-		p->thread.ip = (unsigned long) ret_from_kernel_thread;
++		frame->ret_addr = (unsigned long) ret_from_kernel_thread;
+ 		task_user_gs(p) = __KERNEL_STACK_CANARY;
+ 		childregs->ds = __USER_DS;
+ 		childregs->es = __USER_DS;
+@@ -161,7 +164,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+ 	if (sp)
+ 		childregs->sp = sp;
+ 
+-	p->thread.ip = (unsigned long) ret_from_fork;
++	frame->ret_addr = (unsigned long) ret_from_fork;
+ 	task_user_gs(p) = get_user_gs(current_pt_regs());
+ 
+ 	p->thread.io_bitmap_ptr = NULL;
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 63236d8f84bf..827eeed03e16 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -141,12 +141,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+ {
+ 	int err;
+ 	struct pt_regs *childregs;
++	struct fork_frame *fork_frame;
++	struct inactive_task_frame *frame;
+ 	struct task_struct *me = current;
+ 
+ 	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+ 	childregs = task_pt_regs(p);
+-	p->thread.sp = (unsigned long) childregs;
+-	set_tsk_thread_flag(p, TIF_FORK);
++	fork_frame = container_of(childregs, struct fork_frame, regs);
++	frame = &fork_frame->frame;
++	frame->bp = 0;
++	frame->ret_addr = (unsigned long) ret_from_fork;
++	p->thread.sp = (unsigned long) fork_frame;
+ 	p->thread.io_bitmap_ptr = NULL;
+ 
+ 	savesegment(gs, p->thread.gsindex);
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index c85d2c636092..7e52f83d3a4b 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -942,7 +942,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
+ 	per_cpu(cpu_current_top_of_stack, cpu) =
+ 		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
+ #else
+-	clear_tsk_thread_flag(idle, TIF_FORK);
+ 	initial_gs = per_cpu_offset(cpu);
+ #endif
+ }
+-- 
+2.15.0
+
diff --git a/queue/series b/queue/series
new file mode 100644
index 0000000..7c0433e
--- /dev/null
+++ b/queue/series
@@ -0,0 +1,62 @@
+# v4.9-rc1~163^2~21
+locking-static_keys-Provide-DECLARE-and-well-as-DEFI.patch
+# v4.9-rc1~163^2~20
+x86-mce-Add-PCI-quirks-to-identify-Xeons-with-machin.patch
+# v4.9-rc1~163^2~19
+x86-mce-Improve-memcpy_mcsafe.patch
+# v4.9-rc1~163^2~18
+x86-mce-Drop-X86_FEATURE_MCE_RECOVERY-and-the-relate.patch
+# v4.9-rc1~160^2~73
+x86-entry-Remove-duplicated-comment.patch
+# v4.9-rc1~160^2~61
+fork-Add-generic-vmalloced-stack-support.patch
+# v4.9-rc1~160^2~59
+x86-mm-64-Enable-vmapped-stacks-CONFIG_HAVE_ARCH_VMA.patch
+# v4.9-rc1~160^2~50
+sched-x86-32-kgdb-Don-t-use-thread.ip-in-sleeping_th.patch
+# v4.9-rc1~160^2~48
+sched-x86-Add-struct-inactive_task_frame-to-better-d.patch
+# v4.9-rc1~160^2~47
+sched-x86-Rewrite-the-switch_to-code.patch
+# v4.9-rc1~160^2~46
+sched-x86-Pass-kernel-thread-parameters-in-struct-fo.patch
+# v4.9-rc1~160^2~33
+x86-entry-64-Clean-up-and-document-espfix64-stack-se.patch
+# v4.9-rc1~160^2~27
+# Merge branch 'linus' into x86/asm, to pick up recent fixes
+# v4.9-rc1~160^2~26
+x86-asm-Move-the-thread_info-status-field-to-thread_.patch
+# v4.9-rc1~160^2~25
+x86-entry-Get-rid-of-pt_regs_to_thread_info.patch
+# v4.9-rc1~160^2~24
+um-Stop-conflating-task_struct-stack-with-thread_inf.patch
+# v4.9-rc1~160^2~23
+sched-core-Allow-putting-thread_info-into-task_struc.patch
+# v4.9-rc1~160^2~22
+x86-Move-thread_info-into-task_struct.patch
+# v4.9-rc1~160^2~20
+x86-entry-64-Fix-a-minor-comment-rebase-error.patch
+# v4.9-rc1~160^2~19
+sched-core-Add-try_get_task_stack-and-put_task_stack.patch
+# v4.9-rc1~160^2~14
+sched-core-Free-the-stack-early-if-CONFIG_THREAD_INF.patch
+# v4.9-rc1~160^2~13
+fork-Optimize-task-creation-by-caching-two-thread-st.patch
+# v4.9-rc1~160^2~2
+thread_info-Use-unsigned-long-for-flags.patch
+# v4.9-rc1~160^2
+x86-asm-Get-rid-of-__read_cr4_safe.patch
+# v4.9-rc1~89^2~3
+x86-entry-spell-EBX-register-correctly-in-documentat.patch
+# v4.9-rc1~11^2~20
+EXPORT_SYMBOL-for-asm.patch
+# v4.9-rc1~11^2~19
+x86-move-exports-to-actual-definitions.patch
+
+# v4.9-rc2~7^2~4
+x86-cpufeature-Add-AVX512_4VNNIW-and-AVX512_4FMAPS-f.patch
+# v4.9-rc2~7^2~1
+sched-core-x86-Make-struct-thread_info-arch-specific.patch
+
+# v4.10-rc1~64^2
+x86-kbuild-enable-modversions-for-symbols-exported-f.patch
diff --git a/queue/thread_info-Use-unsigned-long-for-flags.patch b/queue/thread_info-Use-unsigned-long-for-flags.patch
new file mode 100644
index 0000000..600b9ee
--- /dev/null
+++ b/queue/thread_info-Use-unsigned-long-for-flags.patch
@@ -0,0 +1,62 @@
+From 907241dccb4ce5d9413cf3c030b32b0cfc184914 Mon Sep 17 00:00:00 2001
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Fri, 23 Sep 2016 18:24:07 +0100
+Subject: [PATCH] thread_info: Use unsigned long for flags
+
+commit 907241dccb4ce5d9413cf3c030b32b0cfc184914 upstream.
+
+The generic THREAD_INFO_IN_TASK definition of thread_info::flags is a
+u32, matching x86 prior to the introduction of THREAD_INFO_IN_TASK.
+
+However, common helpers like test_ti_thread_flag() implicitly assume
+that thread_info::flags has at least the size and alignment of unsigned
+long, and relying on padding and alignment provided by other elements of
+task_struct is somewhat fragile. Additionally, some architectures use
+more that 32 bits for thread_info::flags, and others may need to in
+future.
+
+With THREAD_INFO_IN_TASK, task struct follows thread_info with a long
+field, and thus we no longer save any space as we did back in commit:
+
+  affa219b60a11b32 ("x86: change thread_info's flag field back to 32 bits")
+
+Given all this, it makes more sense for the generic thread_info::flags
+to be an unsigned long.
+
+In fact given <linux/thread_info.h> contains/uses the helpers mentioned
+above, BE arches *must* use unsigned long (or something of the same size)
+today, or they wouldn't work.
+
+Make it so.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1474651447-30447-1-git-send-email-mark.rutland@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
+index e2d0fd81b1ba..45f004e9cc59 100644
+--- a/include/linux/thread_info.h
++++ b/include/linux/thread_info.h
+@@ -15,7 +15,7 @@ struct compat_timespec;
+ 
+ #ifdef CONFIG_THREAD_INFO_IN_TASK
+ struct thread_info {
+-	u32			flags;		/* low level flags */
++	unsigned long		flags;		/* low level flags */
+ };
+ 
+ #define INIT_THREAD_INFO(tsk)			\
+-- 
+2.15.0
+
diff --git a/queue/um-Stop-conflating-task_struct-stack-with-thread_inf.patch b/queue/um-Stop-conflating-task_struct-stack-with-thread_inf.patch
new file mode 100644
index 0000000..7ed6c5b
--- /dev/null
+++ b/queue/um-Stop-conflating-task_struct-stack-with-thread_inf.patch
@@ -0,0 +1,68 @@
+From d896fa20a70c9e596438728561e058a74ed3196b Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 13 Sep 2016 14:29:23 -0700
+Subject: [PATCH] um/Stop conflating task_struct::stack with thread_info
+
+commit d896fa20a70c9e596438728561e058a74ed3196b upstream.
+
+thread_info may move in the future, so use the accessors.
+
+[ Andy Lutomirski wrote this changelog message and changed
+  "task_thread_info(child)->cpu" to "task_cpu(child)". ]
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jann Horn <jann@thejh.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/3439705d9838940cc82733a7335fa8c654c37db8.1473801993.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
+index a7ef7b131e25..5766ead6fdb9 100644
+--- a/arch/x86/um/ptrace_32.c
++++ b/arch/x86/um/ptrace_32.c
+@@ -194,7 +194,7 @@ int peek_user(struct task_struct *child, long addr, long data)
+ 
+ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+ {
+-	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
++	int err, n, cpu = task_cpu(child);
+ 	struct user_i387_struct fpregs;
+ 
+ 	err = save_i387_registers(userspace_pid[cpu],
+@@ -211,7 +211,7 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
+ 
+ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+ {
+-	int n, cpu = ((struct thread_info *) child->stack)->cpu;
++	int n, cpu = task_cpu(child);
+ 	struct user_i387_struct fpregs;
+ 
+ 	n = copy_from_user(&fpregs, buf, sizeof(fpregs));
+@@ -224,7 +224,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
+ 
+ static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
+ {
+-	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
++	int err, n, cpu = task_cpu(child);
+ 	struct user_fxsr_struct fpregs;
+ 
+ 	err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
+@@ -240,7 +240,7 @@ static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *
+ 
+ static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
+ {
+-	int n, cpu = ((struct thread_info *) child->stack)->cpu;
++	int n, cpu = task_cpu(child);
+ 	struct user_fxsr_struct fpregs;
+ 
+ 	n = copy_from_user(&fpregs, buf, sizeof(fpregs));
+-- 
+2.15.0
+
diff --git a/queue/x86-Move-thread_info-into-task_struct.patch b/queue/x86-Move-thread_info-into-task_struct.patch
new file mode 100644
index 0000000..a00864d
--- /dev/null
+++ b/queue/x86-Move-thread_info-into-task_struct.patch
@@ -0,0 +1,197 @@
+From 15f4eae70d365bba26854c90b6002aaabb18c8aa Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 13 Sep 2016 14:29:25 -0700
+Subject: [PATCH] x86: Move thread_info into task_struct
+
+commit 15f4eae70d365bba26854c90b6002aaabb18c8aa upstream.
+
+Now that most of the thread_info users have been cleaned up,
+this is straightforward.
+
+Most of this code was written by Linus.
+
+Originally-from: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jann Horn <jann@thejh.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/a50eab40abeaec9cb9a9e3cbdeafd32190206654.1473801993.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 4c3972847c2a..2a83bc8b24c6 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -157,6 +157,7 @@ config X86
+ 	select SPARSE_IRQ
+ 	select SRCU
+ 	select SYSCTL_EXCEPTION_TRACE
++	select THREAD_INFO_IN_TASK
+ 	select USER_STACKTRACE_SUPPORT
+ 	select VIRT_TO_BUS
+ 	select X86_DEV_DMA_OPS			if X86_64
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index e7fba58f4d9c..2b46384b4a4f 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
+ 	 * If we need to do entry work or if we guess we'll need to do
+ 	 * exit work, go straight to the slow path.
+ 	 */
+-	testl	$_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
++	movq	PER_CPU_VAR(current_task), %r11
++	testl	$_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
+ 	jnz	entry_SYSCALL64_slow_path
+ 
+ entry_SYSCALL_64_fastpath:
+@@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath:
+ 	 */
+ 	DISABLE_INTERRUPTS(CLBR_NONE)
+ 	TRACE_IRQS_OFF
+-	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
++	movq	PER_CPU_VAR(current_task), %r11
++	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
+ 	jnz	1f
+ 
+ 	LOCKDEP_SYS_EXIT
+@@ -370,6 +372,7 @@ END(ptregs_\func)
+ /*
+  * %rdi: prev task
+  * %rsi: next task
++ * rsi: task we're switching to
+  */
+ ENTRY(__switch_to_asm)
+ 	/*
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index c9dcfe7c7e4b..2aaca53c0974 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -52,20 +52,6 @@ struct task_struct;
+ #include <asm/cpufeature.h>
+ #include <linux/atomic.h>
+ 
+-struct thread_info {
+-	struct task_struct	*task;		/* main task structure */
+-	__u32			flags;		/* low level flags */
+-	__u32			cpu;		/* current CPU */
+-};
+-
+-#define INIT_THREAD_INFO(tsk)			\
+-{						\
+-	.task		= &tsk,			\
+-	.flags		= 0,			\
+-	.cpu		= 0,			\
+-}
+-
+-#define init_thread_info	(init_thread_union.thread_info)
+ #define init_stack		(init_thread_union.stack)
+ 
+ #else /* !__ASSEMBLY__ */
+@@ -157,11 +143,6 @@ struct thread_info {
+  */
+ #ifndef __ASSEMBLY__
+ 
+-static inline struct thread_info *current_thread_info(void)
+-{
+-	return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
+-}
+-
+ static inline unsigned long current_stack_pointer(void)
+ {
+ 	unsigned long sp;
+@@ -223,33 +204,6 @@ static inline int arch_within_stack_frames(const void * const stack,
+ # define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+ #endif
+ 
+-/*
+- * ASM operand which evaluates to a 'thread_info' address of
+- * the current task, if it is known that "reg" is exactly "off"
+- * bytes below the top of the stack currently.
+- *
+- * ( The kernel stack's size is known at build time, it is usually
+- *   2 or 4 pages, and the bottom  of the kernel stack contains
+- *   the thread_info structure. So to access the thread_info very
+- *   quickly from assembly code we can calculate down from the
+- *   top of the kernel stack to the bottom, using constant,
+- *   build-time calculations only. )
+- *
+- * For example, to fetch the current thread_info->flags value into %eax
+- * on x86-64 defconfig kernels, in syscall entry code where RSP is
+- * currently at exactly SIZEOF_PTREGS bytes away from the top of the
+- * stack:
+- *
+- *      mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
+- *
+- * will translate to:
+- *
+- *      8b 84 24 b8 c0 ff ff      mov    -0x3f48(%rsp), %eax
+- *
+- * which is below the current RSP by almost 16K.
+- */
+-#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
+-
+ #endif
+ 
+ #ifdef CONFIG_COMPAT
+diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
+index add5f90b93d4..c62e015b126c 100644
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -35,9 +35,7 @@ void common(void) {
+ #endif
+ 
+ 	BLANK();
+-	OFFSET(TI_flags, thread_info, flags);
+-
+-	BLANK();
++	OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
+ 	OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
+ 
+ 	BLANK();
+diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
+index 4a7903714065..9ebd0b0e73d9 100644
+--- a/arch/x86/kernel/irq_64.c
++++ b/arch/x86/kernel/irq_64.c
+@@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
+ 	if (user_mode(regs))
+ 		return;
+ 
+-	if (regs->sp >= curbase + sizeof(struct thread_info) +
+-				  sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
++	if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
+ 	    regs->sp <= curbase + THREAD_SIZE)
+ 		return;
+ 
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index c1fa790c81cd..0b9ed8ec5226 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -549,9 +549,7 @@ unsigned long get_wchan(struct task_struct *p)
+ 	 * PADDING
+ 	 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
+ 	 * stack
+-	 * ----------- bottom = start + sizeof(thread_info)
+-	 * thread_info
+-	 * ----------- start
++	 * ----------- bottom = start
+ 	 *
+ 	 * The tasks stack pointer points at the location where the
+ 	 * framepointer is stored. The data on the stack is:
+@@ -562,7 +560,7 @@ unsigned long get_wchan(struct task_struct *p)
+ 	 */
+ 	top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
+ 	top -= 2 * sizeof(unsigned long);
+-	bottom = start + sizeof(struct thread_info);
++	bottom = start;
+ 
+ 	sp = READ_ONCE(p->thread.sp);
+ 	if (sp < bottom || sp > top)
+-- 
+2.15.0
+
diff --git a/queue/x86-asm-Get-rid-of-__read_cr4_safe.patch b/queue/x86-asm-Get-rid-of-__read_cr4_safe.patch
new file mode 100644
index 0000000..24c3526
--- /dev/null
+++ b/queue/x86-asm-Get-rid-of-__read_cr4_safe.patch
@@ -0,0 +1,174 @@
+From 1ef55be16ed69538f89e0a6508be5e62fdc9851c Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Sep 2016 12:48:12 -0700
+Subject: [PATCH] x86/asm: Get rid of __read_cr4_safe()
+
+commit 1ef55be16ed69538f89e0a6508be5e62fdc9851c upstream.
+
+We use __read_cr4() vs __read_cr4_safe() inconsistently.  On
+CR4-less CPUs, all CR4 bits are effectively clear, so we can make
+the code simpler and more robust by making __read_cr4() always fix
+up faults on 32-bit kernels.
+
+This may fix some bugs on old 486-like CPUs, but I don't have any
+easy way to test that.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: david@saggiorato.net
+Link: http://lkml.kernel.org/r/ea647033d357d9ce2ad2bbde5a631045f5052fb6.1475178370.git.luto@kernel.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
+index 2970d22d7766..91b6f4eed3fd 100644
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -80,10 +80,6 @@ static inline unsigned long __read_cr4(void)
+ {
+ 	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
+ }
+-static inline unsigned long __read_cr4_safe(void)
+-{
+-	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
+-}
+ 
+ static inline void __write_cr4(unsigned long x)
+ {
+diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
+index 7fa9e7740ba3..fcf243f077ac 100644
+--- a/arch/x86/include/asm/paravirt_types.h
++++ b/arch/x86/include/asm/paravirt_types.h
+@@ -108,7 +108,6 @@ struct pv_cpu_ops {
+ 	unsigned long (*read_cr0)(void);
+ 	void (*write_cr0)(unsigned long);
+ 
+-	unsigned long (*read_cr4_safe)(void);
+ 	unsigned long (*read_cr4)(void);
+ 	void (*write_cr4)(unsigned long);
+ 
+diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
+index 587d7914ea4b..19a2224f9e16 100644
+--- a/arch/x86/include/asm/special_insns.h
++++ b/arch/x86/include/asm/special_insns.h
+@@ -59,22 +59,19 @@ static inline void native_write_cr3(unsigned long val)
+ static inline unsigned long native_read_cr4(void)
+ {
+ 	unsigned long val;
+-	asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
+-	return val;
+-}
+-
+-static inline unsigned long native_read_cr4_safe(void)
+-{
+-	unsigned long val;
+-	/* This could fault if %cr4 does not exist. In x86_64, a cr4 always
+-	 * exists, so it will never fail. */
+ #ifdef CONFIG_X86_32
++	/*
++	 * This could fault if CR4 does not exist.  Non-existent CR4
++	 * is functionally equivalent to CR4 == 0.  Keep it simple and pretend
++	 * that CR4 == 0 on CPUs that don't have CR4.
++	 */
+ 	asm volatile("1: mov %%cr4, %0\n"
+ 		     "2:\n"
+ 		     _ASM_EXTABLE(1b, 2b)
+ 		     : "=r" (val), "=m" (__force_order) : "0" (0));
+ #else
+-	val = native_read_cr4();
++	/* CR4 always exists on x86_64. */
++	asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
+ #endif
+ 	return val;
+ }
+@@ -182,11 +179,6 @@ static inline unsigned long __read_cr4(void)
+ 	return native_read_cr4();
+ }
+ 
+-static inline unsigned long __read_cr4_safe(void)
+-{
+-	return native_read_cr4_safe();
+-}
+-
+ static inline void __write_cr4(unsigned long x)
+ {
+ 	native_write_cr4(x);
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index dee8a70382ba..6fa85944af83 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+ /* Initialize cr4 shadow for this CPU. */
+ static inline void cr4_init_shadow(void)
+ {
+-	this_cpu_write(cpu_tlbstate.cr4, __read_cr4_safe());
++	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
+ }
+ 
+ /* Set in this cpu's CR4. */
+diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
+index bef340082d20..bbf3d5933eaa 100644
+--- a/arch/x86/kernel/paravirt.c
++++ b/arch/x86/kernel/paravirt.c
+@@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
+ 	.read_cr0 = native_read_cr0,
+ 	.write_cr0 = native_write_cr0,
+ 	.read_cr4 = native_read_cr4,
+-	.read_cr4_safe = native_read_cr4_safe,
+ 	.write_cr4 = native_write_cr4,
+ #ifdef CONFIG_X86_64
+ 	.read_cr8 = native_read_cr8,
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 404efdfa083b..bd7be8efdc4c 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -90,7 +90,7 @@ void __show_regs(struct pt_regs *regs, int all)
+ 	cr0 = read_cr0();
+ 	cr2 = read_cr2();
+ 	cr3 = read_cr3();
+-	cr4 = __read_cr4_safe();
++	cr4 = __read_cr4();
+ 	printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
+ 			cr0, cr2, cr3, cr4);
+ 
+diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
+index 87f2330cc805..3aabfdcbcb52 100644
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -1137,7 +1137,7 @@ void __init setup_arch(char **cmdline_p)
+ 	 * auditing all the early-boot CR4 manipulation would be needed to
+ 	 * rule it out.
+ 	 */
+-	mmu_cr4_features = __read_cr4_safe();
++	mmu_cr4_features = __read_cr4();
+ 
+ 	memblock_set_current_limit(get_max_mapped());
+ 
+diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
+index b12c26e2e309..53cace2ec0e2 100644
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -130,7 +130,7 @@ static void __save_processor_state(struct saved_context *ctxt)
+ 	ctxt->cr0 = read_cr0();
+ 	ctxt->cr2 = read_cr2();
+ 	ctxt->cr3 = read_cr3();
+-	ctxt->cr4 = __read_cr4_safe();
++	ctxt->cr4 = __read_cr4();
+ #ifdef CONFIG_X86_64
+ 	ctxt->cr8 = read_cr8();
+ #endif
+diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
+index b86ebb1a9a7f..e2cf8fcea6bb 100644
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -1237,7 +1237,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
+ 	.write_cr0 = xen_write_cr0,
+ 
+ 	.read_cr4 = native_read_cr4,
+-	.read_cr4_safe = native_read_cr4_safe,
+ 	.write_cr4 = xen_write_cr4,
+ 
+ #ifdef CONFIG_X86_64
+-- 
+2.15.0
+
diff --git a/queue/x86-asm-Move-the-thread_info-status-field-to-thread_.patch b/queue/x86-asm-Move-the-thread_info-status-field-to-thread_.patch
new file mode 100644
index 0000000..54c4e3e
--- /dev/null
+++ b/queue/x86-asm-Move-the-thread_info-status-field-to-thread_.patch
@@ -0,0 +1,252 @@
+From b9d989c7218ac922185d82ad46f3e58b27a4bea9 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 13 Sep 2016 14:29:21 -0700
+Subject: [PATCH] x86/asm: Move the thread_info::status field to thread_struct
+
+commit b9d989c7218ac922185d82ad46f3e58b27a4bea9 upstream.
+
+Because sched.h and thread_info.h are a tangled mess, I turned
+in_compat_syscall() into a macro.  If we had current_thread_struct()
+or similar and we could use it from thread_info.h, then this would
+be a bit cleaner.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jann Horn <jann@thejh.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/ccc8a1b2f41f9c264a41f771bb4a6539a642ad72.1473801993.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
+index 1433f6b4607d..871bbf975d4c 100644
+--- a/arch/x86/entry/common.c
++++ b/arch/x86/entry/common.c
+@@ -209,7 +209,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
+ 	 * special case only applies after poking regs and before the
+ 	 * very next return to user mode.
+ 	 */
+-	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
++	current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+ #endif
+ 
+ 	user_enter_irqoff();
+@@ -307,7 +307,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+ 	unsigned int nr = (unsigned int)regs->orig_ax;
+ 
+ #ifdef CONFIG_IA32_EMULATION
+-	ti->status |= TS_COMPAT;
++	current->thread.status |= TS_COMPAT;
+ #endif
+ 
+ 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index b22fb5a4ff3c..984a7bf17f6a 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -389,6 +389,9 @@ struct thread_struct {
+ 	unsigned short		fsindex;
+ 	unsigned short		gsindex;
+ #endif
++
++	u32			status;		/* thread synchronous flags */
++
+ #ifdef CONFIG_X86_64
+ 	unsigned long		fsbase;
+ 	unsigned long		gsbase;
+@@ -434,6 +437,15 @@ struct thread_struct {
+ 	 */
+ };
+ 
++/*
++ * Thread-synchronous status.
++ *
++ * This is different from the flags in that nobody else
++ * ever touches our thread-synchronous status, so we don't
++ * have to worry about atomic accesses.
++ */
++#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
++
+ /*
+  * Set IOPL bits in EFLAGS from given mask
+  */
+diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
+index 4e23dd15c661..e3c95e8e61c5 100644
+--- a/arch/x86/include/asm/syscall.h
++++ b/arch/x86/include/asm/syscall.h
+@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
+ 	 * TS_COMPAT is set for 32-bit syscall entries and then
+ 	 * remains set until we return to user mode.
+ 	 */
+-	if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
++	if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ 		/*
+ 		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+ 		 * and will match correctly in comparisons.
+@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
+ 					 unsigned long *args)
+ {
+ # ifdef CONFIG_IA32_EMULATION
+-	if (task_thread_info(task)->status & TS_COMPAT)
++	if (task->thread.status & TS_COMPAT)
+ 		switch (i) {
+ 		case 0:
+ 			if (!n--) break;
+@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
+ 					 const unsigned long *args)
+ {
+ # ifdef CONFIG_IA32_EMULATION
+-	if (task_thread_info(task)->status & TS_COMPAT)
++	if (task->thread.status & TS_COMPAT)
+ 		switch (i) {
+ 		case 0:
+ 			if (!n--) break;
+@@ -234,18 +234,8 @@ static inline void syscall_set_arguments(struct task_struct *task,
+ 
+ static inline int syscall_get_arch(void)
+ {
+-#ifdef CONFIG_IA32_EMULATION
+-	/*
+-	 * TS_COMPAT is set for 32-bit syscall entry and then
+-	 * remains set until we return to user mode.
+-	 *
+-	 * x32 tasks should be considered AUDIT_ARCH_X86_64.
+-	 */
+-	if (task_thread_info(current)->status & TS_COMPAT)
+-		return AUDIT_ARCH_I386;
+-#endif
+-	/* Both x32 and x86_64 are considered "64-bit". */
+-	return AUDIT_ARCH_X86_64;
++	/* x32 tasks should be considered AUDIT_ARCH_X86_64. */
++	return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+ }
+ #endif	/* CONFIG_X86_32 */
+ 
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index 494c4b5ada34..c9dcfe7c7e4b 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -55,7 +55,6 @@ struct task_struct;
+ struct thread_info {
+ 	struct task_struct	*task;		/* main task structure */
+ 	__u32			flags;		/* low level flags */
+-	__u32			status;		/* thread synchronous flags */
+ 	__u32			cpu;		/* current CPU */
+ };
+ 
+@@ -253,31 +252,17 @@ static inline int arch_within_stack_frames(const void * const stack,
+ 
+ #endif
+ 
+-/*
+- * Thread-synchronous status.
+- *
+- * This is different from the flags in that nobody else
+- * ever touches our thread-synchronous status, so we don't
+- * have to worry about atomic accesses.
+- */
+-#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
+ #ifdef CONFIG_COMPAT
+ #define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
+ #endif
+-
+ #ifndef __ASSEMBLY__
+ 
+-static inline bool in_ia32_syscall(void)
+-{
+ #ifdef CONFIG_X86_32
+-	return true;
+-#endif
+-#ifdef CONFIG_IA32_EMULATION
+-	if (current_thread_info()->status & TS_COMPAT)
+-		return true;
++#define in_ia32_syscall() true
++#else
++#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
++			   current->thread.status & TS_COMPAT)
+ #endif
+-	return false;
+-}
+ 
+ /*
+  * Force syscall return via IRET by making it look as if there was
+diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
+index db3a0af9b9ec..add5f90b93d4 100644
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -36,7 +36,6 @@ void common(void) {
+ 
+ 	BLANK();
+ 	OFFSET(TI_flags, thread_info, flags);
+-	OFFSET(TI_status, thread_info, status);
+ 
+ 	BLANK();
+ 	OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
+diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
+index 93982aebb398..2f2b8c7ccb85 100644
+--- a/arch/x86/kernel/fpu/init.c
++++ b/arch/x86/kernel/fpu/init.c
+@@ -317,7 +317,6 @@ static void __init fpu__init_system_ctx_switch(void)
+ 	on_boot_cpu = 0;
+ 
+ 	WARN_ON_FPU(current->thread.fpu.fpstate_active);
+-	current_thread_info()->status = 0;
+ 
+ 	if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
+ 		eagerfpu = ENABLE;
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index b812cd0d7889..de9acaf2d371 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -510,7 +510,7 @@ void set_personality_ia32(bool x32)
+ 		current->personality &= ~READ_IMPLIES_EXEC;
+ 		/* in_compat_syscall() uses the presence of the x32
+ 		   syscall bit flag to determine compat status */
+-		current_thread_info()->status &= ~TS_COMPAT;
++		current->thread.status &= ~TS_COMPAT;
+ 	} else {
+ 		set_thread_flag(TIF_IA32);
+ 		clear_thread_flag(TIF_X32);
+@@ -518,7 +518,7 @@ void set_personality_ia32(bool x32)
+ 			current->mm->context.ia32_compat = TIF_IA32;
+ 		current->personality |= force_personality32;
+ 		/* Prepare the first "return" to user space */
+-		current_thread_info()->status |= TS_COMPAT;
++		current->thread.status |= TS_COMPAT;
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(set_personality_ia32);
+diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
+index 5b88a1b26fc7..ce94c38cf4d6 100644
+--- a/arch/x86/kernel/ptrace.c
++++ b/arch/x86/kernel/ptrace.c
+@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
+ 		 */
+ 		regs->orig_ax = value;
+ 		if (syscall_get_nr(child, regs) >= 0)
+-			task_thread_info(child)->status |= TS_I386_REGS_POKED;
++			child->thread.status |= TS_I386_REGS_POKED;
+ 		break;
+ 
+ 	case offsetof(struct user32, regs.eflags):
+diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
+index 04cb3212db2d..da20ecb5397a 100644
+--- a/arch/x86/kernel/signal.c
++++ b/arch/x86/kernel/signal.c
+@@ -783,7 +783,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
+ 	 * than the tracee.
+ 	 */
+ #ifdef CONFIG_IA32_EMULATION
+-	if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
++	if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ 		return __NR_ia32_restart_syscall;
+ #endif
+ #ifdef CONFIG_X86_X32_ABI
+-- 
+2.15.0
+
diff --git a/queue/x86-cpufeature-Add-AVX512_4VNNIW-and-AVX512_4FMAPS-f.patch b/queue/x86-cpufeature-Add-AVX512_4VNNIW-and-AVX512_4FMAPS-f.patch
new file mode 100644
index 0000000..78ec401
--- /dev/null
+++ b/queue/x86-cpufeature-Add-AVX512_4VNNIW-and-AVX512_4FMAPS-f.patch
@@ -0,0 +1,90 @@
+From 8214899342981dbd49ae24aadbbd19e9e7830684 Mon Sep 17 00:00:00 2001
+From: Piotr Luc <piotr.luc@intel.com>
+Date: Tue, 18 Oct 2016 17:01:11 +0200
+Subject: [PATCH] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features
+
+commit 8214899342981dbd49ae24aadbbd19e9e7830684 upstream.
+
+AVX512_4VNNIW  - Vector instructions for deep learning enhanced word
+variable precision.
+AVX512_4FMAPS - Vector instructions for deep learning floating-point
+single precision.
+
+These new instructions are to be used in future Intel Xeon & Xeon Phi
+processors. The bits 2&3 of CPUID[level:0x07, EDX] inform that new
+instructions are supported by a processor.
+
+The spec can be found in the Intel Software Developer Manual (SDM) or in
+the Instruction Set Extensions Programming Reference (ISE).
+
+Define new feature flags to enumerate the new instructions in /proc/cpuinfo
+accordingly to CPUID bits and add the required xsave extensions which are
+required for proper operation.
+
+Signed-off-by: Piotr Luc <piotr.luc@intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/r/20161018150111.29926-1-piotr.luc@intel.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 1188bc849ee3..a39629206864 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -194,6 +194,8 @@
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ 
+ #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
++#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
++#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
+index 8cb57df9398d..1db8dc490b66 100644
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
+ 
+ 	static const struct cpuid_bit cpuid_bits[] = {
+ 		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
++		{ X86_FEATURE_AVX512_4VNNIW,	CR_EDX, 2, 0x00000007, 0 },
++		{ X86_FEATURE_AVX512_4FMAPS,	CR_EDX, 3, 0x00000007, 0 },
+ 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
+ 		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
+ 		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
+diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
+index 124aa5c593f8..095ef7ddd6ae 100644
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
+ 	setup_clear_cpu_cap(X86_FEATURE_MPX);
+ 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
+ 	setup_clear_cpu_cap(X86_FEATURE_PKU);
++	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
++	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
+ }
+ 
+ /*
+diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
+index 1188bc849ee3..a39629206864 100644
+--- a/tools/arch/x86/include/asm/cpufeatures.h
++++ b/tools/arch/x86/include/asm/cpufeatures.h
+@@ -194,6 +194,8 @@
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ 
+ #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
++#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
++#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+-- 
+2.15.0
+
diff --git a/queue/x86-entry-64-Clean-up-and-document-espfix64-stack-se.patch b/queue/x86-entry-64-Clean-up-and-document-espfix64-stack-se.patch
new file mode 100644
index 0000000..fe131ea
--- /dev/null
+++ b/queue/x86-entry-64-Clean-up-and-document-espfix64-stack-se.patch
@@ -0,0 +1,113 @@
+From 85063fac1f72419eec4349621fe829b07f9acb1e Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 12 Sep 2016 15:05:51 -0700
+Subject: [PATCH] x86/entry/64: Clean up and document espfix64 stack setup
+
+commit 85063fac1f72419eec4349621fe829b07f9acb1e upstream.
+
+The espfix64 setup code was a bit inscrutible and contained an
+unnecessary push of RAX.  Remove that push, update all the stack
+offsets to match, and document the whole mess.
+
+Reported-By: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/e5459eb10cf1175c8b36b840bc425f210d045f35.1473717910.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index c0373d667674..e7fba58f4d9c 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -586,27 +586,69 @@ native_irq_return_iret:
+ 
+ #ifdef CONFIG_X86_ESPFIX64
+ native_irq_return_ldt:
+-	pushq	%rax
+-	pushq	%rdi
++	/*
++	 * We are running with user GSBASE.  All GPRs contain their user
++	 * values.  We have a percpu ESPFIX stack that is eight slots
++	 * long (see ESPFIX_STACK_SIZE).  espfix_waddr points to the bottom
++	 * of the ESPFIX stack.
++	 *
++	 * We clobber RAX and RDI in this code.  We stash RDI on the
++	 * normal stack and RAX on the ESPFIX stack.
++	 *
++	 * The ESPFIX stack layout we set up looks like this:
++	 *
++	 * --- top of ESPFIX stack ---
++	 * SS
++	 * RSP
++	 * RFLAGS
++	 * CS
++	 * RIP  <-- RSP points here when we're done
++	 * RAX  <-- espfix_waddr points here
++	 * --- bottom of ESPFIX stack ---
++	 */
++
++	pushq	%rdi				/* Stash user RDI */
+ 	SWAPGS
+ 	movq	PER_CPU_VAR(espfix_waddr), %rdi
+-	movq	%rax, (0*8)(%rdi)		/* RAX */
+-	movq	(2*8)(%rsp), %rax		/* RIP */
++	movq	%rax, (0*8)(%rdi)		/* user RAX */
++	movq	(1*8)(%rsp), %rax		/* user RIP */
+ 	movq	%rax, (1*8)(%rdi)
+-	movq	(3*8)(%rsp), %rax		/* CS */
++	movq	(2*8)(%rsp), %rax		/* user CS */
+ 	movq	%rax, (2*8)(%rdi)
+-	movq	(4*8)(%rsp), %rax		/* RFLAGS */
++	movq	(3*8)(%rsp), %rax		/* user RFLAGS */
+ 	movq	%rax, (3*8)(%rdi)
+-	movq	(6*8)(%rsp), %rax		/* SS */
++	movq	(5*8)(%rsp), %rax		/* user SS */
+ 	movq	%rax, (5*8)(%rdi)
+-	movq	(5*8)(%rsp), %rax		/* RSP */
++	movq	(4*8)(%rsp), %rax		/* user RSP */
+ 	movq	%rax, (4*8)(%rdi)
+-	andl	$0xffff0000, %eax
+-	popq	%rdi
++	/* Now RAX == RSP. */
++
++	andl	$0xffff0000, %eax		/* RAX = (RSP & 0xffff0000) */
++	popq	%rdi				/* Restore user RDI */
++
++	/*
++	 * espfix_stack[31:16] == 0.  The page tables are set up such that
++	 * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
++	 * espfix_waddr for any X.  That is, there are 65536 RO aliases of
++	 * the same page.  Set up RSP so that RSP[31:16] contains the
++	 * respective 16 bits of the /userspace/ RSP and RSP nonetheless
++	 * still points to an RO alias of the ESPFIX stack.
++	 */
+ 	orq	PER_CPU_VAR(espfix_stack), %rax
+ 	SWAPGS
+ 	movq	%rax, %rsp
+-	popq	%rax
++
++	/*
++	 * At this point, we cannot write to the stack any more, but we can
++	 * still read.
++	 */
++	popq	%rax				/* Restore user RAX */
++
++	/*
++	 * RSP now points to an ordinary IRET frame, except that the page
++	 * is read-only and RSP[31:16] are preloaded with the userspace
++	 * values.  We can now IRET back to userspace.
++	 */
+ 	jmp	native_irq_return_iret
+ #endif
+ END(common_interrupt)
+-- 
+2.15.0
+
diff --git a/queue/x86-entry-64-Fix-a-minor-comment-rebase-error.patch b/queue/x86-entry-64-Fix-a-minor-comment-rebase-error.patch
new file mode 100644
index 0000000..2ce03f1
--- /dev/null
+++ b/queue/x86-entry-64-Fix-a-minor-comment-rebase-error.patch
@@ -0,0 +1,40 @@
+From ff0071c03684485495e06f3936399eb9c93141a6 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 15 Sep 2016 22:45:42 -0700
+Subject: [PATCH] x86/entry/64: Fix a minor comment rebase error
+
+commit ff0071c03684485495e06f3936399eb9c93141a6 upstream.
+
+When I rebased my thread_info changes onto Brian's switch_to()
+changes, I carefully checked that I fixed up all the code correctly,
+but I missed a comment :(
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jann Horn <jann@thejh.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: 15f4eae70d36 ("x86: Move thread_info into task_struct")
+Link: http://lkml.kernel.org/r/089fe1e1cbe8b258b064fccbb1a5a5fd23861031.1474003868.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 2b46384b4a4f..80ab68a42621 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -372,7 +372,6 @@ END(ptregs_\func)
+ /*
+  * %rdi: prev task
+  * %rsi: next task
+- * rsi: task we're switching to
+  */
+ ENTRY(__switch_to_asm)
+ 	/*
+-- 
+2.15.0
+
diff --git a/queue/x86-entry-Get-rid-of-pt_regs_to_thread_info.patch b/queue/x86-entry-Get-rid-of-pt_regs_to_thread_info.patch
new file mode 100644
index 0000000..5b70330
--- /dev/null
+++ b/queue/x86-entry-Get-rid-of-pt_regs_to_thread_info.patch
@@ -0,0 +1,108 @@
+From 97245d00585d82540f4538cf72d92a1e853c7b0e Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 13 Sep 2016 14:29:22 -0700
+Subject: [PATCH] x86/entry: Get rid of pt_regs_to_thread_info()
+
+commit 97245d00585d82540f4538cf72d92a1e853c7b0e upstream.
+
+It was a nice optimization while it lasted, but thread_info is moving
+and this optimization will no longer work.
+
+Quoting Linus:
+
+    Oh Gods, Andy. That pt_regs_to_thread_info() thing made me want
+    to do unspeakable acts on a poor innocent wax figure that looked
+    _exactly_ like you.
+
+[ Changelog written by Andy. ]
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jann Horn <jann@thejh.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/6376aa81c68798cc81631673f52bd91a3e078944.1473801993.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
+index 871bbf975d4c..bdd9cc59d20f 100644
+--- a/arch/x86/entry/common.c
++++ b/arch/x86/entry/common.c
+@@ -31,13 +31,6 @@
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/syscalls.h>
+ 
+-static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
+-{
+-	unsigned long top_of_stack =
+-		(unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
+-	return (struct thread_info *)(top_of_stack - THREAD_SIZE);
+-}
+-
+ #ifdef CONFIG_CONTEXT_TRACKING
+ /* Called on entry from user mode with IRQs off. */
+ __visible inline void enter_from_user_mode(void)
+@@ -71,7 +64,7 @@ static long syscall_trace_enter(struct pt_regs *regs)
+ {
+ 	u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+ 
+-	struct thread_info *ti = pt_regs_to_thread_info(regs);
++	struct thread_info *ti = current_thread_info();
+ 	unsigned long ret = 0;
+ 	bool emulated = false;
+ 	u32 work;
+@@ -173,18 +166,17 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
+ 		/* Disable IRQs and retry */
+ 		local_irq_disable();
+ 
+-		cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags);
++		cached_flags = READ_ONCE(current_thread_info()->flags);
+ 
+ 		if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
+ 			break;
+-
+ 	}
+ }
+ 
+ /* Called with IRQs disabled. */
+ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
+ {
+-	struct thread_info *ti = pt_regs_to_thread_info(regs);
++	struct thread_info *ti = current_thread_info();
+ 	u32 cached_flags;
+ 
+ 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
+@@ -247,7 +239,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
+  */
+ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
+ {
+-	struct thread_info *ti = pt_regs_to_thread_info(regs);
++	struct thread_info *ti = current_thread_info();
+ 	u32 cached_flags = READ_ONCE(ti->flags);
+ 
+ 	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
+@@ -270,7 +262,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
+ #ifdef CONFIG_X86_64
+ __visible void do_syscall_64(struct pt_regs *regs)
+ {
+-	struct thread_info *ti = pt_regs_to_thread_info(regs);
++	struct thread_info *ti = current_thread_info();
+ 	unsigned long nr = regs->orig_ax;
+ 
+ 	enter_from_user_mode();
+@@ -303,7 +295,7 @@ __visible void do_syscall_64(struct pt_regs *regs)
+  */
+ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+ {
+-	struct thread_info *ti = pt_regs_to_thread_info(regs);
++	struct thread_info *ti = current_thread_info();
+ 	unsigned int nr = (unsigned int)regs->orig_ax;
+ 
+ #ifdef CONFIG_IA32_EMULATION
+-- 
+2.15.0
+
diff --git a/queue/x86-entry-Remove-duplicated-comment.patch b/queue/x86-entry-Remove-duplicated-comment.patch
new file mode 100644
index 0000000..c6d5388
--- /dev/null
+++ b/queue/x86-entry-Remove-duplicated-comment.patch
@@ -0,0 +1,40 @@
+From b3830e8d478cd9fe33e820425ce431c8ef280967 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 1 Aug 2016 12:05:02 +0200
+Subject: [PATCH] x86/entry: Remove duplicated comment
+
+commit b3830e8d478cd9fe33e820425ce431c8ef280967 upstream.
+
+Ok, ok, we see it is called from C :-)
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20160801100502.29796-1-bp@alien8.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index b846875aeea6..8956eae04c25 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -347,8 +347,7 @@ ENTRY(stub_ptregs_64)
+ 	jmp	entry_SYSCALL64_slow_path
+ 
+ 1:
+-	/* Called from C */
+-	jmp	*%rax				/* called from C */
++	jmp	*%rax				/* Called from C */
+ END(stub_ptregs_64)
+ 
+ .macro ptregs_stub func
+-- 
+2.15.0
+
diff --git a/queue/x86-entry-spell-EBX-register-correctly-in-documentat.patch b/queue/x86-entry-spell-EBX-register-correctly-in-documentat.patch
new file mode 100644
index 0000000..a08a569
--- /dev/null
+++ b/queue/x86-entry-spell-EBX-register-correctly-in-documentat.patch
@@ -0,0 +1,30 @@
+From 75ca5b22260ef7b5ce39c6d521eee8b4cba44703 Mon Sep 17 00:00:00 2001
+From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
+Date: Fri, 29 Jul 2016 13:39:51 +0200
+Subject: [PATCH] x86/entry: spell EBX register correctly in documentation
+
+commit 75ca5b22260ef7b5ce39c6d521eee8b4cba44703 upstream.
+
+As EBS does not mean anything reasonable in the context it is used, it
+seems like a misspelling for EBX.
+
+Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 9ee0da1807ed..c8804827d436 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1058,7 +1058,7 @@ END(error_entry)
+ 
+ 
+ /*
+- * On entry, EBS is a "return to kernel mode" flag:
++ * On entry, EBX is a "return to kernel mode" flag:
+  *   1: already in kernel mode, don't need SWAPGS
+  *   0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
+  */
+-- 
+2.15.0
+
diff --git a/queue/x86-kbuild-enable-modversions-for-symbols-exported-f.patch b/queue/x86-kbuild-enable-modversions-for-symbols-exported-f.patch
new file mode 100644
index 0000000..b6b9b6f
--- /dev/null
+++ b/queue/x86-kbuild-enable-modversions-for-symbols-exported-f.patch
@@ -0,0 +1,63 @@
+From 334bb773876403eae3457d81be0b8ea70f8e4ccc Mon Sep 17 00:00:00 2001
+From: Adam Borowski <kilobyte@angband.pl>
+Date: Sun, 11 Dec 2016 02:09:18 +0100
+Subject: [PATCH] x86/kbuild: enable modversions for symbols exported from asm
+
+commit 334bb773876403eae3457d81be0b8ea70f8e4ccc upstream.
+
+Commit 4efca4ed ("kbuild: modversions for EXPORT_SYMBOL() for asm") adds
+modversion support for symbols exported from asm files. Architectures
+must include C-style declarations for those symbols in asm/asm-prototypes.h
+in order for them to be versioned.
+
+Add these declarations for x86, and an architecture-independent file that
+can be used for common symbols.
+
+With f27c2f6 reverting 8ab2ae6 ("default exported asm symbols to zero") we
+produce a scary warning on x86, this commit fixes that.
+
+Signed-off-by: Adam Borowski <kilobyte@angband.pl>
+Tested-by: Kalle Valo <kvalo@codeaurora.org>
+Acked-by: Nicholas Piggin <npiggin@gmail.com>
+Tested-by: Peter Wu <peter@lekensteyn.nl>
+Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>
+Signed-off-by: Michal Marek <mmarek@suse.com>
+
+diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
+new file mode 100644
+index 000000000000..44b8762fa0c7
+--- /dev/null
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -0,0 +1,16 @@
++#include <asm/ftrace.h>
++#include <asm/uaccess.h>
++#include <asm/string.h>
++#include <asm/page.h>
++#include <asm/checksum.h>
++
++#include <asm-generic/asm-prototypes.h>
++
++#include <asm/page.h>
++#include <asm/pgtable.h>
++#include <asm/special_insns.h>
++#include <asm/preempt.h>
++
++#ifndef CONFIG_X86_CMPXCHG64
++extern void cmpxchg8b_emu(void);
++#endif
+diff --git a/include/asm-generic/asm-prototypes.h b/include/asm-generic/asm-prototypes.h
+new file mode 100644
+index 000000000000..df13637e4017
+--- /dev/null
++++ b/include/asm-generic/asm-prototypes.h
+@@ -0,0 +1,7 @@
++#include <linux/bitops.h>
++extern void *__memset(void *, int, __kernel_size_t);
++extern void *__memcpy(void *, const void *, __kernel_size_t);
++extern void *__memmove(void *, const void *, __kernel_size_t);
++extern void *memset(void *, int, __kernel_size_t);
++extern void *memcpy(void *, const void *, __kernel_size_t);
++extern void *memmove(void *, const void *, __kernel_size_t);
+-- 
+2.15.0
+
diff --git a/queue/x86-mce-Add-PCI-quirks-to-identify-Xeons-with-machin.patch b/queue/x86-mce-Add-PCI-quirks-to-identify-Xeons-with-machin.patch
new file mode 100644
index 0000000..7261c05
--- /dev/null
+++ b/queue/x86-mce-Add-PCI-quirks-to-identify-Xeons-with-machin.patch
@@ -0,0 +1,121 @@
+From 3637efb00864f465baebd49464e58319fd295b65 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Thu, 1 Sep 2016 11:39:33 -0700
+Subject: [PATCH] x86/mce: Add PCI quirks to identify Xeons with machine check
+ recovery
+
+commit 3637efb00864f465baebd49464e58319fd295b65 upstream.
+
+Each Xeon includes a number of capability registers in PCI space that
+describe some features not enumerated by CPUID.
+
+Use these to determine that we are running on a model that can recover from
+machine checks. Hooks for Ivybridge ... Skylake provided.
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Boris Petkov <bp@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/r/abf331dc4a3e2a2d17444129bc51127437bcf4ba.1472754711.git.tony.luck@intel.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
+index 90dbbd9666d4..877a1dfbf770 100644
+--- a/arch/x86/include/asm/string_64.h
++++ b/arch/x86/include/asm/string_64.h
+@@ -2,6 +2,7 @@
+ #define _ASM_X86_STRING_64_H
+ 
+ #ifdef __KERNEL__
++#include <linux/jump_label.h>
+ 
+ /* Written 2002 by Andi Kleen */
+ 
+@@ -78,6 +79,8 @@ int strcmp(const char *cs, const char *ct);
+ #define memset(s, c, n) __memset(s, c, n)
+ #endif
+ 
++DECLARE_STATIC_KEY_FALSE(mcsafe_key);
++
+ /**
+  * memcpy_mcsafe - copy memory with indication if a machine check happened
+  *
+diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
+index 79d8ec849468..acccebcc836d 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -41,6 +41,7 @@
+ #include <linux/debugfs.h>
+ #include <linux/irq_work.h>
+ #include <linux/export.h>
++#include <linux/jump_label.h>
+ 
+ #include <asm/processor.h>
+ #include <asm/traps.h>
+@@ -2080,6 +2081,7 @@ void mce_disable_bank(int bank)
+  * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
+  * mce=nobootlog Don't log MCEs from before booting.
+  * mce=bios_cmci_threshold Don't program the CMCI threshold
++ * mce=recovery force enable memcpy_mcsafe()
+  */
+ static int __init mcheck_enable(char *str)
+ {
+@@ -2676,8 +2678,14 @@ static int __init mcheck_debugfs_init(void)
+ static int __init mcheck_debugfs_init(void) { return -EINVAL; }
+ #endif
+ 
++DEFINE_STATIC_KEY_FALSE(mcsafe_key);
++EXPORT_SYMBOL_GPL(mcsafe_key);
++
+ static int __init mcheck_late_init(void)
+ {
++	if (mca_cfg.recovery)
++		static_branch_inc(&mcsafe_key);
++
+ 	mcheck_debugfs_init();
+ 
+ 	/*
+diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
+index cc457ff818ad..51402a7e4ca6 100644
+--- a/arch/x86/kernel/quirks.c
++++ b/arch/x86/kernel/quirks.c
+@@ -626,3 +626,34 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
+ 			amd_disable_seq_and_redirect_scrub);
+ 
+ #endif
++
++#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
++#include <linux/jump_label.h>
++#include <asm/string_64.h>
++
++/* Ivy Bridge, Haswell, Broadwell */
++static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
++{
++	u32 capid0;
++
++	pci_read_config_dword(pdev, 0x84, &capid0);
++
++	if (capid0 & 0x10)
++		static_branch_inc(&mcsafe_key);
++}
++
++/* Skylake */
++static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
++{
++	u32 capid0;
++
++	pci_read_config_dword(pdev, 0x84, &capid0);
++
++	if ((capid0 & 0xc0) == 0xc0)
++		static_branch_inc(&mcsafe_key);
++}
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
++#endif
+-- 
+2.15.0
+
diff --git a/queue/x86-mce-Drop-X86_FEATURE_MCE_RECOVERY-and-the-relate.patch b/queue/x86-mce-Drop-X86_FEATURE_MCE_RECOVERY-and-the-relate.patch
new file mode 100644
index 0000000..caca953
--- /dev/null
+++ b/queue/x86-mce-Drop-X86_FEATURE_MCE_RECOVERY-and-the-relate.patch
@@ -0,0 +1,57 @@
+From ffb173e657fa8123bffa2a169e124b4bca0b5bc4 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Thu, 1 Sep 2016 11:39:33 -0700
+Subject: [PATCH] x86/mce: Drop X86_FEATURE_MCE_RECOVERY and the related model
+ string test
+
+commit ffb173e657fa8123bffa2a169e124b4bca0b5bc4 upstream.
+
+We now have a better way to determine if we are running on a cpu that
+supports machine check recovery. Free up this feature bit.
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Boris Petkov <bp@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/r/d5db39e08d46cf1012d94d3902275d08ba931926.1472754712.git.tony.luck@intel.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 92a8308b96f6..1188bc849ee3 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -106,7 +106,6 @@
+ #define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
+ #define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+-#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
+ 
+ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+ #define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
+diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
+index acccebcc836d..7f3f0e147242 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -1634,17 +1634,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
+ 
+ 		if (c->x86 == 6 && c->x86_model == 45)
+ 			quirk_no_way_out = quirk_sandybridge_ifu;
+-		/*
+-		 * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
+-		 * whether this processor will actually generate recoverable
+-		 * machine checks. Check to see if this is an E7 model Xeon.
+-		 * We can't do a model number check because E5 and E7 use the
+-		 * same model number. E5 doesn't support recovery, E7 does.
+-		 */
+-		if (mca_cfg.recovery || (mca_cfg.ser &&
+-			!strncmp(c->x86_model_id,
+-				 "Intel(R) Xeon(R) CPU E7-", 24)))
+-			set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
+ 	}
+ 	if (cfg->monarch_timeout < 0)
+ 		cfg->monarch_timeout = 0;
+-- 
+2.15.0
+
diff --git a/queue/x86-mce-Improve-memcpy_mcsafe.patch b/queue/x86-mce-Improve-memcpy_mcsafe.patch
new file mode 100644
index 0000000..2d865ec
--- /dev/null
+++ b/queue/x86-mce-Improve-memcpy_mcsafe.patch
@@ -0,0 +1,120 @@
+From 9a6fb28a355d2609ace4dab4e6425442c647894d Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Thu, 1 Sep 2016 11:39:33 -0700
+Subject: [PATCH] x86/mce: Improve memcpy_mcsafe()
+
+commit 9a6fb28a355d2609ace4dab4e6425442c647894d upstream.
+
+Use the mcsafe_key defined in the previous patch to make decisions on which
+copy function to use. We can't use the FEATURE bit any more because PCI
+quirks run too late to affect the patching of code. So we use a static key.
+
+Turn memcpy_mcsafe() into an inline function to make life easier for
+callers. The assembly code that actually does the copy is now named
+memcpy_mcsafe_unrolled()
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Boris Petkov <bp@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/r/bfde2fc774e94f53d91b70a4321c85a0d33e7118.1472754712.git.tony.luck@intel.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
+index 643eba42d620..2c1ebeb4d737 100644
+--- a/arch/x86/include/asm/pmem.h
++++ b/arch/x86/include/asm/pmem.h
+@@ -46,10 +46,7 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
+ 
+ static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
+ {
+-	if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
+-		return memcpy_mcsafe(dst, src, n);
+-	memcpy(dst, src, n);
+-	return 0;
++	return memcpy_mcsafe(dst, src, n);
+ }
+ 
+ /**
+diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
+index 877a1dfbf770..a164862d77e3 100644
+--- a/arch/x86/include/asm/string_64.h
++++ b/arch/x86/include/asm/string_64.h
+@@ -79,6 +79,7 @@ int strcmp(const char *cs, const char *ct);
+ #define memset(s, c, n) __memset(s, c, n)
+ #endif
+ 
++__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
+ DECLARE_STATIC_KEY_FALSE(mcsafe_key);
+ 
+ /**
+@@ -89,10 +90,23 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key);
+  * @cnt:	number of bytes to copy
+  *
+  * Low level memory copy function that catches machine checks
++ * We only call into the "safe" function on systems that can
++ * actually do machine check recovery. Everyone else can just
++ * use memcpy().
+  *
+  * Return 0 for success, -EFAULT for fail
+  */
+-int memcpy_mcsafe(void *dst, const void *src, size_t cnt);
++static __always_inline __must_check int
++memcpy_mcsafe(void *dst, const void *src, size_t cnt)
++{
++#ifdef CONFIG_X86_MCE
++	if (static_branch_unlikely(&mcsafe_key))
++		return memcpy_mcsafe_unrolled(dst, src, cnt);
++	else
++#endif
++		memcpy(dst, src, cnt);
++	return 0;
++}
+ 
+ #endif /* __KERNEL__ */
+ 
+diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
+index 95e49f6e4fc3..b2cee3d19477 100644
+--- a/arch/x86/kernel/x8664_ksyms_64.c
++++ b/arch/x86/kernel/x8664_ksyms_64.c
+@@ -38,7 +38,7 @@ EXPORT_SYMBOL(__copy_user_nocache);
+ EXPORT_SYMBOL(_copy_from_user);
+ EXPORT_SYMBOL(_copy_to_user);
+ 
+-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
++EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled);
+ 
+ EXPORT_SYMBOL(copy_page);
+ EXPORT_SYMBOL(clear_page);
+diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
+index 2ec0b0abbfaa..49e6ebac7e73 100644
+--- a/arch/x86/lib/memcpy_64.S
++++ b/arch/x86/lib/memcpy_64.S
+@@ -181,11 +181,11 @@ ENDPROC(memcpy_orig)
+ 
+ #ifndef CONFIG_UML
+ /*
+- * memcpy_mcsafe - memory copy with machine check exception handling
++ * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
+  * Note that we only catch machine checks when reading the source addresses.
+  * Writes to target are posted and don't generate machine checks.
+  */
+-ENTRY(memcpy_mcsafe)
++ENTRY(memcpy_mcsafe_unrolled)
+ 	cmpl $8, %edx
+ 	/* Less than 8 bytes? Go to byte copy loop */
+ 	jb .L_no_whole_words
+@@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe)
+ .L_done_memcpy_trap:
+ 	xorq %rax, %rax
+ 	ret
+-ENDPROC(memcpy_mcsafe)
++ENDPROC(memcpy_mcsafe_unrolled)
+ 
+ 	.section .fixup, "ax"
+ 	/* Return -EFAULT for any failure */
+-- 
+2.15.0
+
diff --git a/queue/x86-mm-64-Enable-vmapped-stacks-CONFIG_HAVE_ARCH_VMA.patch b/queue/x86-mm-64-Enable-vmapped-stacks-CONFIG_HAVE_ARCH_VMA.patch
new file mode 100644
index 0000000..457ae3b
--- /dev/null
+++ b/queue/x86-mm-64-Enable-vmapped-stacks-CONFIG_HAVE_ARCH_VMA.patch
@@ -0,0 +1,234 @@
+From e37e43a497d5a8b7c0cc1736d56986f432c394c9 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 11 Aug 2016 02:35:23 -0700
+Subject: [PATCH] x86/mm/64: Enable vmapped stacks
+ (CONFIG_HAVE_ARCH_VMAP_STACK=y)
+
+commit e37e43a497d5a8b7c0cc1736d56986f432c394c9 upstream.
+
+This allows x86_64 kernels to enable vmapped stacks by setting
+HAVE_ARCH_VMAP_STACK=y - which enables the CONFIG_VMAP_STACK=y
+high level Kconfig option.
+
+There are a couple of interesting bits:
+
+First, x86 lazily faults in top-level paging entries for the vmalloc
+area.  This won't work if we get a page fault while trying to access
+the stack: the CPU will promote it to a double-fault and we'll die.
+To avoid this problem, probe the new stack when switching stacks and
+forcibly populate the pgd entry for the stack when switching mms.
+
+Second, once we have guard pages around the stack, we'll want to
+detect and handle stack overflow.
+
+I didn't enable it on x86_32.  We'd need to rework the double-fault
+code a bit and I'm concerned about running out of vmalloc virtual
+addresses under some workloads.
+
+This patch, by itself, will behave somewhat erratically when the
+stack overflows while RSP is still more than a few tens of bytes
+above the bottom of the stack.  Specifically, we'll get #PF and make
+it to no_context and them oops without reliably triggering a
+double-fault, and no_context doesn't know about stack overflows.
+The next patch will improve that case.
+
+Thank you to Nadav and Brian for helping me pay enough attention to
+the SDM to hopefully get this right.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/c88f3e2920b18e6cc621d772a04a62c06869037e.1470907718.git.luto@kernel.org
+[ Minor edits. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index c580d8c33562..21a6d0ec5983 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -94,6 +94,7 @@ config X86
+ 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ 	select HAVE_ARCH_WITHIN_STACK_FRAMES
+ 	select HAVE_EBPF_JIT			if X86_64
++	select HAVE_ARCH_VMAP_STACK		if X86_64
+ 	select HAVE_CC_STACKPROTECTOR
+ 	select HAVE_CMPXCHG_DOUBLE
+ 	select HAVE_CMPXCHG_LOCAL
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index 8f321a1b03a1..14e4b20f0aaf 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -8,6 +8,28 @@ struct tss_struct;
+ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ 		      struct tss_struct *tss);
+ 
++/* This runs runs on the previous thread's stack. */
++static inline void prepare_switch_to(struct task_struct *prev,
++				     struct task_struct *next)
++{
++#ifdef CONFIG_VMAP_STACK
++	/*
++	 * If we switch to a stack that has a top-level paging entry
++	 * that is not present in the current mm, the resulting #PF will
++	 * will be promoted to a double-fault and we'll panic.  Probe
++	 * the new stack now so that vmalloc_fault can fix up the page
++	 * tables if needed.  This can only happen if we use a stack
++	 * in vmap space.
++	 *
++	 * We assume that the stack is aligned so that it never spans
++	 * more than one top-level paging entry.
++	 *
++	 * To minimize cache pollution, just follow the stack pointer.
++	 */
++	READ_ONCE(*(unsigned char *)next->thread.sp);
++#endif
++}
++
+ #ifdef CONFIG_X86_32
+ 
+ #ifdef CONFIG_CC_STACKPROTECTOR
+@@ -39,6 +61,8 @@ do {									\
+ 	 */								\
+ 	unsigned long ebx, ecx, edx, esi, edi;				\
+ 									\
++	prepare_switch_to(prev, next);					\
++									\
+ 	asm volatile("pushl %%ebp\n\t"		/* save    EBP   */	\
+ 		     "movl %%esp,%[prev_sp]\n\t"	/* save    ESP   */ \
+ 		     "movl %[next_sp],%%esp\n\t"	/* restore ESP   */ \
+@@ -103,7 +127,9 @@ do {									\
+  * clean in kernel mode, with the possible exception of IOPL.  Kernel IOPL
+  * has no effect.
+  */
+-#define switch_to(prev, next, last) \
++#define switch_to(prev, next, last)					  \
++	prepare_switch_to(prev, next);					  \
++									  \
+ 	asm volatile(SAVE_CONTEXT					  \
+ 	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
+ 	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index b70ca12dd389..907b4e4aeb5e 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP,     SIGBUS,  "segment not present",	segment_not_present)
+ DO_ERROR(X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment)
+ DO_ERROR(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check)
+ 
++#ifdef CONFIG_VMAP_STACK
++static void __noreturn handle_stack_overflow(const char *message,
++					     struct pt_regs *regs,
++					     unsigned long fault_address)
++{
++	printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
++		 (void *)fault_address, current->stack,
++		 (char *)current->stack + THREAD_SIZE - 1);
++	die(message, regs, 0);
++
++	/* Be absolutely certain we don't return. */
++	panic(message);
++}
++#endif
++
+ #ifdef CONFIG_X86_64
+ /* Runs on IST stack */
+ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+ {
+ 	static const char str[] = "double fault";
+ 	struct task_struct *tsk = current;
++#ifdef CONFIG_VMAP_STACK
++	unsigned long cr2;
++#endif
+ 
+ #ifdef CONFIG_X86_ESPFIX64
+ 	extern unsigned char native_irq_return_iret[];
+@@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+ 	tsk->thread.error_code = error_code;
+ 	tsk->thread.trap_nr = X86_TRAP_DF;
+ 
++#ifdef CONFIG_VMAP_STACK
++	/*
++	 * If we overflow the stack into a guard page, the CPU will fail
++	 * to deliver #PF and will send #DF instead.  Similarly, if we
++	 * take any non-IST exception while too close to the bottom of
++	 * the stack, the processor will get a page fault while
++	 * delivering the exception and will generate a double fault.
++	 *
++	 * According to the SDM (footnote in 6.15 under "Interrupt 14 -
++	 * Page-Fault Exception (#PF):
++	 *
++	 *   Processors update CR2 whenever a page fault is detected. If a
++	 *   second page fault occurs while an earlier page fault is being
++	 *   deliv- ered, the faulting linear address of the second fault will
++	 *   overwrite the contents of CR2 (replacing the previous
++	 *   address). These updates to CR2 occur even if the page fault
++	 *   results in a double fault or occurs during the delivery of a
++	 *   double fault.
++	 *
++	 * The logic below has a small possibility of incorrectly diagnosing
++	 * some errors as stack overflows.  For example, if the IDT or GDT
++	 * gets corrupted such that #GP delivery fails due to a bad descriptor
++	 * causing #GP and we hit this condition while CR2 coincidentally
++	 * points to the stack guard page, we'll think we overflowed the
++	 * stack.  Given that we're going to panic one way or another
++	 * if this happens, this isn't necessarily worth fixing.
++	 *
++	 * If necessary, we could improve the test by only diagnosing
++	 * a stack overflow if the saved RSP points within 47 bytes of
++	 * the bottom of the stack: if RSP == tsk_stack + 48 and we
++	 * take an exception, the stack is already aligned and there
++	 * will be enough room SS, RSP, RFLAGS, CS, RIP, and a
++	 * possible error code, so a stack overflow would *not* double
++	 * fault.  With any less space left, exception delivery could
++	 * fail, and, as a practical matter, we've overflowed the
++	 * stack even if the actual trigger for the double fault was
++	 * something else.
++	 */
++	cr2 = read_cr2();
++	if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
++		handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
++#endif
++
+ #ifdef CONFIG_DOUBLEFAULT
+ 	df_debug(regs, error_code);
+ #endif
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 4dbe65622810..a7655f6caf7d 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ 	unsigned cpu = smp_processor_id();
+ 
+ 	if (likely(prev != next)) {
++		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
++			/*
++			 * If our current stack is in vmalloc space and isn't
++			 * mapped in the new pgd, we'll double-fault.  Forcibly
++			 * map it.
++			 */
++			unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
++
++			pgd_t *pgd = next->pgd + stack_pgd_index;
++
++			if (unlikely(pgd_none(*pgd)))
++				set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
++		}
++
+ #ifdef CONFIG_SMP
+ 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ 		this_cpu_write(cpu_tlbstate.active_mm, next);
+ #endif
++
+ 		cpumask_set_cpu(cpu, mm_cpumask(next));
+ 
+ 		/*
+-- 
+2.15.0
+
diff --git a/queue/x86-move-exports-to-actual-definitions.patch b/queue/x86-move-exports-to-actual-definitions.patch
new file mode 100644
index 0000000..d822535
--- /dev/null
+++ b/queue/x86-move-exports-to-actual-definitions.patch
@@ -0,0 +1,723 @@
+From b0b9d354f5f52a5bc96c4a8715b69be17729d3b5 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 11 Jan 2016 11:04:34 -0500
+Subject: [PATCH] x86: move exports to actual definitions
+
+commit 784d5699eddc55878627da20d3fe0c8542e2f1a2 upstream.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index 7b52e50863ff..edba8606b99a 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -44,6 +44,7 @@
+ #include <asm/alternative-asm.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/export.h>
+ 
+ 	.section .entry.text, "ax"
+ 
+@@ -991,6 +992,7 @@ trace:
+ 	jmp	ftrace_stub
+ END(mcount)
+ #endif /* CONFIG_DYNAMIC_FTRACE */
++EXPORT_SYMBOL(mcount)
+ #endif /* CONFIG_FUNCTION_TRACER */
+ 
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index c98ec2efd750..ef766a358b37 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -35,6 +35,7 @@
+ #include <asm/asm.h>
+ #include <asm/smap.h>
+ #include <asm/pgtable_types.h>
++#include <asm/export.h>
+ #include <linux/err.h>
+ 
+ /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
+@@ -875,6 +876,7 @@ ENTRY(native_load_gs_index)
+ 	popfq
+ 	ret
+ END(native_load_gs_index)
++EXPORT_SYMBOL(native_load_gs_index)
+ 
+ 	_ASM_EXTABLE(.Lgs_change, bad_gs)
+ 	.section .fixup, "ax"
+diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
+index e5a17114a8c4..fee6bc79b987 100644
+--- a/arch/x86/entry/thunk_32.S
++++ b/arch/x86/entry/thunk_32.S
+@@ -6,6 +6,7 @@
+  */
+ 	#include <linux/linkage.h>
+ 	#include <asm/asm.h>
++	#include <asm/export.h>
+ 
+ 	/* put return address in eax (arg1) */
+ 	.macro THUNK name, func, put_ret_addr_in_eax=0
+@@ -36,5 +37,7 @@
+ #ifdef CONFIG_PREEMPT
+ 	THUNK ___preempt_schedule, preempt_schedule
+ 	THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
++	EXPORT_SYMBOL(___preempt_schedule)
++	EXPORT_SYMBOL(___preempt_schedule_notrace)
+ #endif
+ 
+diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
+index 627ecbcb2e62..be36bf4e0957 100644
+--- a/arch/x86/entry/thunk_64.S
++++ b/arch/x86/entry/thunk_64.S
+@@ -8,6 +8,7 @@
+ #include <linux/linkage.h>
+ #include "calling.h"
+ #include <asm/asm.h>
++#include <asm/export.h>
+ 
+ 	/* rdi:	arg1 ... normal C conventions. rax is saved/restored. */
+ 	.macro THUNK name, func, put_ret_addr_in_rdi=0
+@@ -49,6 +50,8 @@
+ #ifdef CONFIG_PREEMPT
+ 	THUNK ___preempt_schedule, preempt_schedule
+ 	THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
++	EXPORT_SYMBOL(___preempt_schedule)
++	EXPORT_SYMBOL(___preempt_schedule_notrace)
+ #endif
+ 
+ #if defined(CONFIG_TRACE_IRQFLAGS) \
+diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
+new file mode 100644
+index 000000000000..138de56b13eb
+--- /dev/null
++++ b/arch/x86/include/asm/export.h
+@@ -0,0 +1,4 @@
++#ifdef CONFIG_64BIT
++#define KSYM_ALIGN 16
++#endif
++#include <asm-generic/export.h>
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
+index 0503f5bfb18d..d3f49c3d5aca 100644
+--- a/arch/x86/kernel/Makefile
++++ b/arch/x86/kernel/Makefile
+@@ -46,9 +46,7 @@ obj-$(CONFIG_MODIFY_LDT_SYSCALL)	+= ldt.o
+ obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
+ obj-$(CONFIG_IRQ_WORK)  += irq_work.o
+ obj-y			+= probe_roms.o
+-obj-$(CONFIG_X86_32)	+= i386_ksyms_32.o
+-obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
+-obj-$(CONFIG_X86_64)	+= mcount_64.o
++obj-$(CONFIG_X86_64)	+= sys_x86_64.o mcount_64.o
+ obj-$(CONFIG_X86_ESPFIX64)	+= espfix_64.o
+ obj-$(CONFIG_SYSFS)	+= ksysfs.o
+ obj-y			+= bootflag.o e820.o
+diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
+index 6f8902b0d151..4707baf94203 100644
+--- a/arch/x86/kernel/head_32.S
++++ b/arch/x86/kernel/head_32.S
+@@ -23,6 +23,7 @@
+ #include <asm/percpu.h>
+ #include <asm/nops.h>
+ #include <asm/bootparam.h>
++#include <asm/export.h>
+ 
+ /* Physical address */
+ #define pa(X) ((X) - __PAGE_OFFSET)
+@@ -673,6 +674,7 @@ ENTRY(empty_zero_page)
+ 	.fill 4096,1,0
+ ENTRY(swapper_pg_dir)
+ 	.fill 1024,4,0
++EXPORT_SYMBOL(empty_zero_page)
+ 
+ /*
+  * This starts the data section.
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index 9f8efc9f0075..537d913f45ec 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -21,6 +21,7 @@
+ #include <asm/percpu.h>
+ #include <asm/nops.h>
+ #include "../entry/calling.h"
++#include <asm/export.h>
+ 
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/asm-offsets.h>
+@@ -488,10 +489,12 @@ early_gdt_descr_base:
+ ENTRY(phys_base)
+ 	/* This must match the first entry in level2_kernel_pgt */
+ 	.quad   0x0000000000000000
++EXPORT_SYMBOL(phys_base)
+ 
+ #include "../../x86/xen/xen-head.S"
+ 	
+ 	__PAGE_ALIGNED_BSS
+ NEXT_PAGE(empty_zero_page)
+ 	.skip PAGE_SIZE
++EXPORT_SYMBOL(empty_zero_page)
+ 
+diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
+deleted file mode 100644
+index 1f9b878ef5ef..000000000000
+--- a/arch/x86/kernel/i386_ksyms_32.c
++++ /dev/null
+@@ -1,47 +0,0 @@
+-#include <linux/export.h>
+-#include <linux/spinlock_types.h>
+-
+-#include <asm/checksum.h>
+-#include <asm/pgtable.h>
+-#include <asm/desc.h>
+-#include <asm/ftrace.h>
+-
+-#ifdef CONFIG_FUNCTION_TRACER
+-/* mcount is defined in assembly */
+-EXPORT_SYMBOL(mcount);
+-#endif
+-
+-/*
+- * Note, this is a prototype to get at the symbol for
+- * the export, but dont use it from C code, it is used
+- * by assembly code and is not using C calling convention!
+- */
+-#ifndef CONFIG_X86_CMPXCHG64
+-extern void cmpxchg8b_emu(void);
+-EXPORT_SYMBOL(cmpxchg8b_emu);
+-#endif
+-
+-/* Networking helper routines. */
+-EXPORT_SYMBOL(csum_partial_copy_generic);
+-
+-EXPORT_SYMBOL(__get_user_1);
+-EXPORT_SYMBOL(__get_user_2);
+-EXPORT_SYMBOL(__get_user_4);
+-EXPORT_SYMBOL(__get_user_8);
+-
+-EXPORT_SYMBOL(__put_user_1);
+-EXPORT_SYMBOL(__put_user_2);
+-EXPORT_SYMBOL(__put_user_4);
+-EXPORT_SYMBOL(__put_user_8);
+-
+-EXPORT_SYMBOL(strstr);
+-
+-EXPORT_SYMBOL(csum_partial);
+-EXPORT_SYMBOL(empty_zero_page);
+-
+-#ifdef CONFIG_PREEMPT
+-EXPORT_SYMBOL(___preempt_schedule);
+-EXPORT_SYMBOL(___preempt_schedule_notrace);
+-#endif
+-
+-EXPORT_SYMBOL(__sw_hweight32);
+diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
+index 61924222a9e1..efe73aacf966 100644
+--- a/arch/x86/kernel/mcount_64.S
++++ b/arch/x86/kernel/mcount_64.S
+@@ -7,6 +7,7 @@
+ #include <linux/linkage.h>
+ #include <asm/ptrace.h>
+ #include <asm/ftrace.h>
++#include <asm/export.h>
+ 
+ 
+ 	.code64
+@@ -294,6 +295,7 @@ trace:
+ 	jmp fgraph_trace
+ END(function_hook)
+ #endif /* CONFIG_DYNAMIC_FTRACE */
++EXPORT_SYMBOL(function_hook)
+ #endif /* CONFIG_FUNCTION_TRACER */
+ 
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
+deleted file mode 100644
+index b2cee3d19477..000000000000
+--- a/arch/x86/kernel/x8664_ksyms_64.c
++++ /dev/null
+@@ -1,85 +0,0 @@
+-/* Exports for assembly files.
+-   All C exports should go in the respective C files. */
+-
+-#include <linux/export.h>
+-#include <linux/spinlock_types.h>
+-#include <linux/smp.h>
+-
+-#include <net/checksum.h>
+-
+-#include <asm/processor.h>
+-#include <asm/pgtable.h>
+-#include <asm/uaccess.h>
+-#include <asm/desc.h>
+-#include <asm/ftrace.h>
+-
+-#ifdef CONFIG_FUNCTION_TRACER
+-/* mcount and __fentry__ are defined in assembly */
+-#ifdef CC_USING_FENTRY
+-EXPORT_SYMBOL(__fentry__);
+-#else
+-EXPORT_SYMBOL(mcount);
+-#endif
+-#endif
+-
+-EXPORT_SYMBOL(__get_user_1);
+-EXPORT_SYMBOL(__get_user_2);
+-EXPORT_SYMBOL(__get_user_4);
+-EXPORT_SYMBOL(__get_user_8);
+-EXPORT_SYMBOL(__put_user_1);
+-EXPORT_SYMBOL(__put_user_2);
+-EXPORT_SYMBOL(__put_user_4);
+-EXPORT_SYMBOL(__put_user_8);
+-
+-EXPORT_SYMBOL(copy_user_generic_string);
+-EXPORT_SYMBOL(copy_user_generic_unrolled);
+-EXPORT_SYMBOL(copy_user_enhanced_fast_string);
+-EXPORT_SYMBOL(__copy_user_nocache);
+-EXPORT_SYMBOL(_copy_from_user);
+-EXPORT_SYMBOL(_copy_to_user);
+-
+-EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled);
+-
+-EXPORT_SYMBOL(copy_page);
+-EXPORT_SYMBOL(clear_page);
+-
+-EXPORT_SYMBOL(csum_partial);
+-
+-EXPORT_SYMBOL(__sw_hweight32);
+-EXPORT_SYMBOL(__sw_hweight64);
+-
+-/*
+- * Export string functions. We normally rely on gcc builtin for most of these,
+- * but gcc sometimes decides not to inline them.
+- */
+-#undef memcpy
+-#undef memset
+-#undef memmove
+-
+-extern void *__memset(void *, int, __kernel_size_t);
+-extern void *__memcpy(void *, const void *, __kernel_size_t);
+-extern void *__memmove(void *, const void *, __kernel_size_t);
+-extern void *memset(void *, int, __kernel_size_t);
+-extern void *memcpy(void *, const void *, __kernel_size_t);
+-extern void *memmove(void *, const void *, __kernel_size_t);
+-
+-EXPORT_SYMBOL(__memset);
+-EXPORT_SYMBOL(__memcpy);
+-EXPORT_SYMBOL(__memmove);
+-
+-EXPORT_SYMBOL(memset);
+-EXPORT_SYMBOL(memcpy);
+-EXPORT_SYMBOL(memmove);
+-
+-#ifndef CONFIG_DEBUG_VIRTUAL
+-EXPORT_SYMBOL(phys_base);
+-#endif
+-EXPORT_SYMBOL(empty_zero_page);
+-#ifndef CONFIG_PARAVIRT
+-EXPORT_SYMBOL(native_load_gs_index);
+-#endif
+-
+-#ifdef CONFIG_PREEMPT
+-EXPORT_SYMBOL(___preempt_schedule);
+-EXPORT_SYMBOL(___preempt_schedule_notrace);
+-#endif
+diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
+index c1e623209853..4d34bb548b41 100644
+--- a/arch/x86/lib/checksum_32.S
++++ b/arch/x86/lib/checksum_32.S
+@@ -28,6 +28,7 @@
+ #include <linux/linkage.h>
+ #include <asm/errno.h>
+ #include <asm/asm.h>
++#include <asm/export.h>
+ 				
+ /*
+  * computes a partial checksum, e.g. for TCP/UDP fragments
+@@ -251,6 +252,7 @@ ENTRY(csum_partial)
+ ENDPROC(csum_partial)
+ 				
+ #endif
++EXPORT_SYMBOL(csum_partial)
+ 
+ /*
+ unsigned int csum_partial_copy_generic (const char *src, char *dst,
+@@ -490,3 +492,4 @@ ENDPROC(csum_partial_copy_generic)
+ #undef ROUND1		
+ 		
+ #endif
++EXPORT_SYMBOL(csum_partial_copy_generic)
+diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
+index 65be7cfaf947..5e2af3a88cf5 100644
+--- a/arch/x86/lib/clear_page_64.S
++++ b/arch/x86/lib/clear_page_64.S
+@@ -1,6 +1,7 @@
+ #include <linux/linkage.h>
+ #include <asm/cpufeatures.h>
+ #include <asm/alternative-asm.h>
++#include <asm/export.h>
+ 
+ /*
+  * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
+@@ -23,6 +24,7 @@ ENTRY(clear_page)
+ 	rep stosq
+ 	ret
+ ENDPROC(clear_page)
++EXPORT_SYMBOL(clear_page)
+ 
+ ENTRY(clear_page_orig)
+ 
+diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
+index ad5349778490..03a186fc06ea 100644
+--- a/arch/x86/lib/cmpxchg8b_emu.S
++++ b/arch/x86/lib/cmpxchg8b_emu.S
+@@ -7,6 +7,7 @@
+  */
+ 
+ #include <linux/linkage.h>
++#include <asm/export.h>
+ 
+ .text
+ 
+@@ -48,3 +49,4 @@ ENTRY(cmpxchg8b_emu)
+ 	ret
+ 
+ ENDPROC(cmpxchg8b_emu)
++EXPORT_SYMBOL(cmpxchg8b_emu)
+diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
+index 24ef1c2104d4..e8508156c99d 100644
+--- a/arch/x86/lib/copy_page_64.S
++++ b/arch/x86/lib/copy_page_64.S
+@@ -3,6 +3,7 @@
+ #include <linux/linkage.h>
+ #include <asm/cpufeatures.h>
+ #include <asm/alternative-asm.h>
++#include <asm/export.h>
+ 
+ /*
+  * Some CPUs run faster using the string copy instructions (sane microcode).
+@@ -17,6 +18,7 @@ ENTRY(copy_page)
+ 	rep	movsq
+ 	ret
+ ENDPROC(copy_page)
++EXPORT_SYMBOL(copy_page)
+ 
+ ENTRY(copy_page_regs)
+ 	subq	$2*8,	%rsp
+diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
+index bf603ebbfd8e..d376e4b48f88 100644
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -14,6 +14,7 @@
+ #include <asm/alternative-asm.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/export.h>
+ 
+ /* Standard copy_to_user with segment limit checking */
+ ENTRY(_copy_to_user)
+@@ -29,6 +30,7 @@ ENTRY(_copy_to_user)
+ 		      "jmp copy_user_enhanced_fast_string",	\
+ 		      X86_FEATURE_ERMS
+ ENDPROC(_copy_to_user)
++EXPORT_SYMBOL(_copy_to_user)
+ 
+ /* Standard copy_from_user with segment limit checking */
+ ENTRY(_copy_from_user)
+@@ -44,6 +46,8 @@ ENTRY(_copy_from_user)
+ 		      "jmp copy_user_enhanced_fast_string",	\
+ 		      X86_FEATURE_ERMS
+ ENDPROC(_copy_from_user)
++EXPORT_SYMBOL(_copy_from_user)
++
+ 
+ 	.section .fixup,"ax"
+ 	/* must zero dest */
+@@ -155,6 +159,7 @@ ENTRY(copy_user_generic_unrolled)
+ 	_ASM_EXTABLE(21b,50b)
+ 	_ASM_EXTABLE(22b,50b)
+ ENDPROC(copy_user_generic_unrolled)
++EXPORT_SYMBOL(copy_user_generic_unrolled)
+ 
+ /* Some CPUs run faster using the string copy instructions.
+  * This is also a lot simpler. Use them when possible.
+@@ -200,6 +205,7 @@ ENTRY(copy_user_generic_string)
+ 	_ASM_EXTABLE(1b,11b)
+ 	_ASM_EXTABLE(3b,12b)
+ ENDPROC(copy_user_generic_string)
++EXPORT_SYMBOL(copy_user_generic_string)
+ 
+ /*
+  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
+@@ -229,6 +235,7 @@ ENTRY(copy_user_enhanced_fast_string)
+ 
+ 	_ASM_EXTABLE(1b,12b)
+ ENDPROC(copy_user_enhanced_fast_string)
++EXPORT_SYMBOL(copy_user_enhanced_fast_string)
+ 
+ /*
+  * copy_user_nocache - Uncached memory copy with exception handling
+@@ -379,3 +386,4 @@ ENTRY(__copy_user_nocache)
+ 	_ASM_EXTABLE(40b,.L_fixup_1b_copy)
+ 	_ASM_EXTABLE(41b,.L_fixup_1b_copy)
+ ENDPROC(__copy_user_nocache)
++EXPORT_SYMBOL(__copy_user_nocache)
+diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c
+index 9a7fe6a70491..378e5d5bf9b1 100644
+--- a/arch/x86/lib/csum-partial_64.c
++++ b/arch/x86/lib/csum-partial_64.c
+@@ -135,6 +135,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
+ 	return (__force __wsum)add32_with_carry(do_csum(buff, len),
+ 						(__force u32)sum);
+ }
++EXPORT_SYMBOL(csum_partial);
+ 
+ /*
+  * this routine is used for miscellaneous IP-like checksums, mainly
+diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
+index 0ef5128c2de8..37b62d412148 100644
+--- a/arch/x86/lib/getuser.S
++++ b/arch/x86/lib/getuser.S
+@@ -32,6 +32,7 @@
+ #include <asm/thread_info.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/export.h>
+ 
+ 	.text
+ ENTRY(__get_user_1)
+@@ -44,6 +45,7 @@ ENTRY(__get_user_1)
+ 	ASM_CLAC
+ 	ret
+ ENDPROC(__get_user_1)
++EXPORT_SYMBOL(__get_user_1)
+ 
+ ENTRY(__get_user_2)
+ 	add $1,%_ASM_AX
+@@ -57,6 +59,7 @@ ENTRY(__get_user_2)
+ 	ASM_CLAC
+ 	ret
+ ENDPROC(__get_user_2)
++EXPORT_SYMBOL(__get_user_2)
+ 
+ ENTRY(__get_user_4)
+ 	add $3,%_ASM_AX
+@@ -70,6 +73,7 @@ ENTRY(__get_user_4)
+ 	ASM_CLAC
+ 	ret
+ ENDPROC(__get_user_4)
++EXPORT_SYMBOL(__get_user_4)
+ 
+ ENTRY(__get_user_8)
+ #ifdef CONFIG_X86_64
+@@ -97,6 +101,7 @@ ENTRY(__get_user_8)
+ 	ret
+ #endif
+ ENDPROC(__get_user_8)
++EXPORT_SYMBOL(__get_user_8)
+ 
+ 
+ bad_get_user:
+diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
+index 8a602a1e404a..23d893cbc200 100644
+--- a/arch/x86/lib/hweight.S
++++ b/arch/x86/lib/hweight.S
+@@ -1,4 +1,5 @@
+ #include <linux/linkage.h>
++#include <asm/export.h>
+ 
+ #include <asm/asm.h>
+ 
+@@ -32,6 +33,7 @@ ENTRY(__sw_hweight32)
+ 	__ASM_SIZE(pop,) %__ASM_REG(dx)
+ 	ret
+ ENDPROC(__sw_hweight32)
++EXPORT_SYMBOL(__sw_hweight32)
+ 
+ ENTRY(__sw_hweight64)
+ #ifdef CONFIG_X86_64
+@@ -77,3 +79,4 @@ ENTRY(__sw_hweight64)
+ 	ret
+ #endif
+ ENDPROC(__sw_hweight64)
++EXPORT_SYMBOL(__sw_hweight64)
+diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
+index 98dcc112b363..9a53a06e5a3e 100644
+--- a/arch/x86/lib/memcpy_64.S
++++ b/arch/x86/lib/memcpy_64.S
+@@ -4,6 +4,7 @@
+ #include <asm/errno.h>
+ #include <asm/cpufeatures.h>
+ #include <asm/alternative-asm.h>
++#include <asm/export.h>
+ 
+ /*
+  * We build a jump to memcpy_orig by default which gets NOPped out on
+@@ -40,6 +41,8 @@ ENTRY(memcpy)
+ 	ret
+ ENDPROC(memcpy)
+ ENDPROC(__memcpy)
++EXPORT_SYMBOL(memcpy)
++EXPORT_SYMBOL(__memcpy)
+ 
+ /*
+  * memcpy_erms() - enhanced fast string memcpy. This is faster and
+@@ -274,6 +277,7 @@ ENTRY(memcpy_mcsafe_unrolled)
+ 	xorq %rax, %rax
+ 	ret
+ ENDPROC(memcpy_mcsafe_unrolled)
++EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
+ 
+ 	.section .fixup, "ax"
+ 	/* Return -EFAULT for any failure */
+diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
+index 90ce01bee00c..15de86cd15b0 100644
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -8,6 +8,7 @@
+ #include <linux/linkage.h>
+ #include <asm/cpufeatures.h>
+ #include <asm/alternative-asm.h>
++#include <asm/export.h>
+ 
+ #undef memmove
+ 
+@@ -207,3 +208,5 @@ ENTRY(__memmove)
+ 	retq
+ ENDPROC(__memmove)
+ ENDPROC(memmove)
++EXPORT_SYMBOL(__memmove)
++EXPORT_SYMBOL(memmove)
+diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
+index e1229ecd2a82..55b95db30a61 100644
+--- a/arch/x86/lib/memset_64.S
++++ b/arch/x86/lib/memset_64.S
+@@ -3,6 +3,7 @@
+ #include <linux/linkage.h>
+ #include <asm/cpufeatures.h>
+ #include <asm/alternative-asm.h>
++#include <asm/export.h>
+ 
+ .weak memset
+ 
+@@ -43,6 +44,8 @@ ENTRY(__memset)
+ 	ret
+ ENDPROC(memset)
+ ENDPROC(__memset)
++EXPORT_SYMBOL(memset)
++EXPORT_SYMBOL(__memset)
+ 
+ /*
+  * ISO C memset - set a memory block to a byte value. This function uses
+diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
+index c891ece81e5b..cd5d716d2897 100644
+--- a/arch/x86/lib/putuser.S
++++ b/arch/x86/lib/putuser.S
+@@ -15,6 +15,7 @@
+ #include <asm/errno.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/export.h>
+ 
+ 
+ /*
+@@ -43,6 +44,7 @@ ENTRY(__put_user_1)
+ 	xor %eax,%eax
+ 	EXIT
+ ENDPROC(__put_user_1)
++EXPORT_SYMBOL(__put_user_1)
+ 
+ ENTRY(__put_user_2)
+ 	ENTER
+@@ -55,6 +57,7 @@ ENTRY(__put_user_2)
+ 	xor %eax,%eax
+ 	EXIT
+ ENDPROC(__put_user_2)
++EXPORT_SYMBOL(__put_user_2)
+ 
+ ENTRY(__put_user_4)
+ 	ENTER
+@@ -67,6 +70,7 @@ ENTRY(__put_user_4)
+ 	xor %eax,%eax
+ 	EXIT
+ ENDPROC(__put_user_4)
++EXPORT_SYMBOL(__put_user_4)
+ 
+ ENTRY(__put_user_8)
+ 	ENTER
+@@ -82,6 +86,7 @@ ENTRY(__put_user_8)
+ 	xor %eax,%eax
+ 	EXIT
+ ENDPROC(__put_user_8)
++EXPORT_SYMBOL(__put_user_8)
+ 
+ bad_put_user:
+ 	movl $-EFAULT,%eax
+diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c
+index 8e2d55f754bf..a03b1c750bfe 100644
+--- a/arch/x86/lib/strstr_32.c
++++ b/arch/x86/lib/strstr_32.c
+@@ -1,4 +1,5 @@
+ #include <linux/string.h>
++#include <linux/export.h>
+ 
+ char *strstr(const char *cs, const char *ct)
+ {
+@@ -28,4 +29,4 @@ __asm__ __volatile__(
+ 	: "dx", "di");
+ return __res;
+ }
+-
++EXPORT_SYMBOL(strstr);
+diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
+index 3ee2bb6b440b..e7e7055a8658 100644
+--- a/arch/x86/um/Makefile
++++ b/arch/x86/um/Makefile
+@@ -8,7 +8,7 @@ else
+ 	BITS := 64
+ endif
+ 
+-obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
++obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
+ 	ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
+ 	stub_$(BITS).o stub_segv.o \
+ 	sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
+diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S
+index fa4b8b9841ff..b9933eb9274a 100644
+--- a/arch/x86/um/checksum_32.S
++++ b/arch/x86/um/checksum_32.S
+@@ -27,6 +27,7 @@
+ 
+ #include <asm/errno.h>
+ #include <asm/asm.h>
++#include <asm/export.h>
+ 				
+ /*
+  * computes a partial checksum, e.g. for TCP/UDP fragments
+@@ -214,3 +215,4 @@ csum_partial:
+ 	ret
+ 				
+ #endif
++	EXPORT_SYMBOL(csum_partial)
+diff --git a/arch/x86/um/ksyms.c b/arch/x86/um/ksyms.c
+deleted file mode 100644
+index 2e8f43ec6214..000000000000
+--- a/arch/x86/um/ksyms.c
++++ /dev/null
+@@ -1,13 +0,0 @@
+-#include <linux/module.h>
+-#include <asm/string.h>
+-#include <asm/checksum.h>
+-
+-#ifndef CONFIG_X86_32
+-/*XXX: we need them because they would be exported by x86_64 */
+-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
+-EXPORT_SYMBOL(memcpy);
+-#else
+-EXPORT_SYMBOL(__memcpy);
+-#endif
+-#endif
+-EXPORT_SYMBOL(csum_partial);
+-- 
+2.15.0
+