Backport L1TF mitigation

commit: ac5ee8cb77fe9723357b26d1b605a5e6907d4607 [log] [tgz]
author: Ben Hutchings <ben@decadent.org.uk> Fri Sep 28 04:01:44 2018 +0100
committer: Ben Hutchings <ben@decadent.org.uk> Fri Sep 28 04:49:54 2018 +0100
tree: dc46252790d2d3a6d232c417e6d01719d5845fb3
parent: 84e721a6cc8601a459500305fc8fe4c01905696d [diff]
diff --git a/queue-3.16/drm-drivers-add-support-for-using-the-arch-wc-mapping-api.patch b/queue-3.16/drm-drivers-add-support-for-using-the-arch-wc-mapping-api.patch
new file mode 100644
index 0000000..76c5231
--- /dev/null
+++ b/queue-3.16/drm-drivers-add-support-for-using-the-arch-wc-mapping-api.patch

@@ -0,0 +1,157 @@
+From: Dave Airlie <airlied@redhat.com>
+Date: Mon, 24 Oct 2016 15:37:48 +1000
+Subject: drm/drivers: add support for using the arch wc mapping API.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 7cf321d118a825c1541b43ca45294126fd474efa upstream.
+
+This fixes a regression in all these drivers since the cache
+mode tracking was fixed for mixed mappings. It uses the new
+arch API to add the VRAM range to the PAT mapping tracking
+tables.
+
+Fixes: 87744ab3832 (mm: fix cache mode tracking in vm_insert_mixed())
+Reviewed-by: Christian König <christian.koenig@amd.com>.
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+[bwh: Backported to 3.16:
+ - Drop changes in amdgpu
+ - In nouveau, use struct nouveau_device * and nv_device_resource_{start,len}()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+--- a/drivers/gpu/drm/ast/ast_ttm.c
++++ b/drivers/gpu/drm/ast/ast_ttm.c
+@@ -275,6 +275,8 @@ int ast_mm_init(struct ast_private *ast)
+ 		return ret;
+ 	}
+ 
++	arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
++				   pci_resource_len(dev->pdev, 0));
+ 	ast->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
+ 					pci_resource_len(dev->pdev, 0));
+ 
+@@ -283,11 +285,15 @@ int ast_mm_init(struct ast_private *ast)
+ 
+ void ast_mm_fini(struct ast_private *ast)
+ {
++	struct drm_device *dev = ast->dev;
++
+ 	ttm_bo_device_release(&ast->ttm.bdev);
+ 
+ 	ast_ttm_global_release(ast);
+ 
+ 	arch_phys_wc_del(ast->fb_mtrr);
++	arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
++				pci_resource_len(dev->pdev, 0));
+ }
+ 
+ void ast_ttm_placement(struct ast_bo *bo, int domain)
+--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
++++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
+@@ -275,6 +275,9 @@ int cirrus_mm_init(struct cirrus_device
+ 		return ret;
+ 	}
+ 
++	arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
++				   pci_resource_len(dev->pdev, 0));
++
+ 	cirrus->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
+ 					   pci_resource_len(dev->pdev, 0));
+ 
+@@ -284,6 +287,8 @@ int cirrus_mm_init(struct cirrus_device
+ 
+ void cirrus_mm_fini(struct cirrus_device *cirrus)
+ {
++	struct drm_device *dev = cirrus->dev;
++
+ 	if (!cirrus->mm_inited)
+ 		return;
+ 
+@@ -293,6 +298,8 @@ void cirrus_mm_fini(struct cirrus_device
+ 
+ 	arch_phys_wc_del(cirrus->fb_mtrr);
+ 	cirrus->fb_mtrr = 0;
++	arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
++				pci_resource_len(dev->pdev, 0));
+ }
+ 
+ void cirrus_ttm_placement(struct cirrus_bo *bo, int domain)
+--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
++++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
+@@ -274,6 +274,9 @@ int mgag200_mm_init(struct mga_device *m
+ 		return ret;
+ 	}
+ 
++	arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
++				   pci_resource_len(dev->pdev, 0));
++
+ 	mdev->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
+ 					 pci_resource_len(dev->pdev, 0));
+ 
+@@ -282,10 +285,14 @@ int mgag200_mm_init(struct mga_device *m
+ 
+ void mgag200_mm_fini(struct mga_device *mdev)
+ {
++	struct drm_device *dev = mdev->dev;
++
+ 	ttm_bo_device_release(&mdev->ttm.bdev);
+ 
+ 	mgag200_ttm_global_release(mdev);
+ 
++	arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
++				pci_resource_len(dev->pdev, 0));
+ 	arch_phys_wc_del(mdev->fb_mtrr);
+ 	mdev->fb_mtrr = 0;
+ }
+--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
+@@ -397,6 +397,9 @@ nouveau_ttm_init(struct nouveau_drm *drm
+ 	drm->gem.vram_available  = nouveau_fb(drm->device)->ram->size;
+ 	drm->gem.vram_available -= nouveau_instmem(drm->device)->reserved;
+ 
++	arch_io_reserve_memtype_wc(nv_device_resource_start(device, 1),
++				   nv_device_resource_len(device, 1));
++
+ 	ret = ttm_bo_init_mm(&drm->ttm.bdev, TTM_PL_VRAM,
+ 			      drm->gem.vram_available >> PAGE_SHIFT);
+ 	if (ret) {
+@@ -429,6 +432,8 @@ nouveau_ttm_init(struct nouveau_drm *drm
+ void
+ nouveau_ttm_fini(struct nouveau_drm *drm)
+ {
++	struct nouveau_device *device = nv_device(drm->device);
++
+ 	mutex_lock(&drm->dev->struct_mutex);
+ 	ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_VRAM);
+ 	ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_TT);
+@@ -440,4 +445,7 @@ nouveau_ttm_fini(struct nouveau_drm *drm
+ 
+ 	arch_phys_wc_del(drm->ttm.mtrr);
+ 	drm->ttm.mtrr = 0;
++	arch_io_free_memtype_wc(nv_device_resource_start(device, 1),
++				nv_device_resource_len(device, 1));
++
+ }
+--- a/drivers/gpu/drm/radeon/radeon_object.c
++++ b/drivers/gpu/drm/radeon/radeon_object.c
+@@ -359,6 +359,10 @@ void radeon_bo_force_delete(struct radeo
+ 
+ int radeon_bo_init(struct radeon_device *rdev)
+ {
++	/* reserve PAT memory space to WC for VRAM */
++	arch_io_reserve_memtype_wc(rdev->mc.aper_base,
++				   rdev->mc.aper_size);
++
+ 	/* Add an MTRR for the VRAM */
+ 	if (!rdev->fastfb_working) {
+ 		rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
+@@ -376,6 +380,7 @@ void radeon_bo_fini(struct radeon_device
+ {
+ 	radeon_ttm_fini(rdev);
+ 	arch_phys_wc_del(rdev->mc.vram_mtrr);
++	arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
+ }
+ 
+ /* Returns how many bytes TTM can move per IB.

diff --git a/queue-3.16/mm-add-vm_insert_pfn_prot.patch b/queue-3.16/mm-add-vm_insert_pfn_prot.patch
new file mode 100644
index 0000000..603d67f
--- /dev/null
+++ b/queue-3.16/mm-add-vm_insert_pfn_prot.patch

@@ -0,0 +1,97 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 29 Dec 2015 20:12:20 -0800
+Subject: mm: Add vm_insert_pfn_prot()
+
+commit 1745cbc5d0dee0749a6bc0ea8e872c5db0074061 upstream.
+
+The x86 vvar vma contains pages with differing cacheability
+flags.  x86 currently implements this by manually inserting all
+the ptes using (io_)remap_pfn_range when the vma is set up.
+
+x86 wants to move to using .fault with VM_FAULT_NOPAGE to set up
+the mappings as needed.  The correct API to use to insert a pfn
+in .fault is vm_insert_pfn(), but vm_insert_pfn() can't override the
+vma's cache mode, and the HPET page in particular needs to be
+uncached despite the fact that the rest of the VMA is cached.
+
+Add vm_insert_pfn_prot() to support varying cacheability within
+the same non-COW VMA in a more sane manner.
+
+x86 could alternatively use multiple VMAs, but that's messy,
+would break CRIU, and would create unnecessary VMAs that would
+waste memory.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Acked-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/d2938d1eb37be7a5e4f86182db646551f11e45aa.1451446564.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/linux/mm.h |  2 ++
+ mm/memory.c        | 25 +++++++++++++++++++++++--
+ 2 files changed, 25 insertions(+), 2 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1965,6 +1965,8 @@ int remap_pfn_range(struct vm_area_struc
+ int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
+ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+ 			unsigned long pfn);
++int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
++			unsigned long pfn, pgprot_t pgprot);
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+ 			unsigned long pfn);
+ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1574,8 +1574,29 @@ out:
+ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+ 			unsigned long pfn)
+ {
++	return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
++}
++EXPORT_SYMBOL(vm_insert_pfn);
++
++/**
++ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
++ * @vma: user vma to map to
++ * @addr: target user address of this page
++ * @pfn: source kernel pfn
++ * @pgprot: pgprot flags for the inserted page
++ *
++ * This is exactly like vm_insert_pfn, except that it allows drivers to
++ * to override pgprot on a per-page basis.
++ *
++ * This only makes sense for IO mappings, and it makes no sense for
++ * cow mappings.  In general, using multiple vmas is preferable;
++ * vm_insert_pfn_prot should only be used if using multiple VMAs is
++ * impractical.
++ */
++int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
++			unsigned long pfn, pgprot_t pgprot)
++{
+ 	int ret;
+-	pgprot_t pgprot = vma->vm_page_prot;
+ 	/*
+ 	 * Technically, architectures with pte_special can avoid all these
+ 	 * restrictions (same for remap_pfn_range).  However we would like
+@@ -1597,7 +1618,7 @@ int vm_insert_pfn(struct vm_area_struct
+ 
+ 	return ret;
+ }
+-EXPORT_SYMBOL(vm_insert_pfn);
++EXPORT_SYMBOL(vm_insert_pfn_prot);
+ 
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+ 			unsigned long pfn)

diff --git a/queue-3.16/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch b/queue-3.16/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch
new file mode 100644
index 0000000..f74904b
--- /dev/null
+++ b/queue-3.16/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch

@@ -0,0 +1,59 @@
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Fri, 7 Oct 2016 17:00:18 -0700
+Subject: mm: fix cache mode tracking in vm_insert_mixed()
+
+commit 9ac0dc7d949db7afd4116d55fa4fcf6a66d820f0 upstream.
+
+commit 87744ab3832b83ba71b931f86f9cfdb000d07da5 upstream
+
+vm_insert_mixed() unlike vm_insert_pfn_prot() and vmf_insert_pfn_pmd(),
+fails to check the pgprot_t it uses for the mapping against the one
+recorded in the memtype tracking tree.  Add the missing call to
+track_pfn_insert() to preclude cases where incompatible aliased mappings
+are established for a given physical address range.
+
+[groeck: Backport to v4.4.y]
+
+Link: http://lkml.kernel.org/r/147328717909.35069.14256589123570653697.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Cc: David Airlie <airlied@linux.ie>
+Cc: Matthew Wilcox <mawilcox@microsoft.com>
+Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ mm/memory.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1623,10 +1623,14 @@ EXPORT_SYMBOL(vm_insert_pfn_prot);
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+ 			unsigned long pfn)
+ {
++	pgprot_t pgprot = vma->vm_page_prot;
++
+ 	BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
+ 
+ 	if (addr < vma->vm_start || addr >= vma->vm_end)
+ 		return -EFAULT;
++	if (track_pfn_insert(vma, &pgprot, pfn))
++		return -EINVAL;
+ 
+ 	/*
+ 	 * If we don't have pte special, then we have to use the pfn_valid()
+@@ -1639,9 +1643,9 @@ int vm_insert_mixed(struct vm_area_struc
+ 		struct page *page;
+ 
+ 		page = pfn_to_page(pfn);
+-		return insert_page(vma, addr, page, vma->vm_page_prot);
++		return insert_page(vma, addr, page, pgprot);
+ 	}
+-	return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
++	return insert_pfn(vma, addr, pfn, pgprot);
+ }
+ EXPORT_SYMBOL(vm_insert_mixed);
+ 

diff --git a/queue-3.16/mm-pagewalk-remove-pgd_entry-and-pud_entry.patch b/queue-3.16/mm-pagewalk-remove-pgd_entry-and-pud_entry.patch
new file mode 100644
index 0000000..9e9400c
--- /dev/null
+++ b/queue-3.16/mm-pagewalk-remove-pgd_entry-and-pud_entry.patch

@@ -0,0 +1,74 @@
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 11 Feb 2015 15:27:34 -0800
+Subject: mm/pagewalk: remove pgd_entry() and pud_entry()
+
+commit 0b1fbfe50006c41014cc25660c0e735d21c34939 upstream.
+
+Currently no user of page table walker sets ->pgd_entry() or
+->pud_entry(), so checking their existence in each loop is just wasting
+CPU cycle.  So let's remove it to reduce overhead.
+
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Pavel Emelyanov <xemul@parallels.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[bwh: Backported to 3.16 as dependency of L1TF mitigation]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/linux/mm.h | 6 ------
+ mm/pagewalk.c      | 9 ++-------
+ 2 files changed, 2 insertions(+), 13 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1114,8 +1114,6 @@ void unmap_vmas(struct mmu_gather *tlb,
+ 
+ /**
+  * mm_walk - callbacks for walk_page_range
+- * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
+- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
+  * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
+  *	       this handler is required to be able to handle
+  *	       pmd_trans_huge() pmds.  They may simply choose to
+@@ -1129,10 +1127,6 @@ void unmap_vmas(struct mmu_gather *tlb,
+  * (see walk_page_range for more details)
+  */
+ struct mm_walk {
+-	int (*pgd_entry)(pgd_t *pgd, unsigned long addr,
+-			 unsigned long next, struct mm_walk *walk);
+-	int (*pud_entry)(pud_t *pud, unsigned long addr,
+-	                 unsigned long next, struct mm_walk *walk);
+ 	int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
+ 			 unsigned long next, struct mm_walk *walk);
+ 	int (*pte_entry)(pte_t *pte, unsigned long addr,
+--- a/mm/pagewalk.c
++++ b/mm/pagewalk.c
+@@ -86,9 +86,7 @@ static int walk_pud_range(pgd_t *pgd, un
+ 				break;
+ 			continue;
+ 		}
+-		if (walk->pud_entry)
+-			err = walk->pud_entry(pud, addr, next, walk);
+-		if (!err && (walk->pmd_entry || walk->pte_entry))
++		if (walk->pmd_entry || walk->pte_entry)
+ 			err = walk_pmd_range(pud, addr, next, walk);
+ 		if (err)
+ 			break;
+@@ -237,10 +235,7 @@ int walk_page_range(unsigned long addr,
+ 			pgd++;
+ 			continue;
+ 		}
+-		if (walk->pgd_entry)
+-			err = walk->pgd_entry(pgd, addr, next, walk);
+-		if (!err &&
+-		    (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
++		if (walk->pmd_entry || walk->pte_entry)
+ 			err = walk_pud_range(pgd, addr, next, walk);
+ 		if (err)
+ 			break;

diff --git a/queue-3.16/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch b/queue-3.16/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch
new file mode 100644
index 0000000..704eb1c
--- /dev/null
+++ b/queue-3.16/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch

@@ -0,0 +1,97 @@
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Fri, 8 Sep 2017 16:10:46 -0700
+Subject: mm: x86: move _PAGE_SWP_SOFT_DIRTY from bit 7 to bit 1
+
+commit eee4818baac0f2b37848fdf90e4b16430dc536ac upstream.
+
+_PAGE_PSE is used to distinguish between a truly non-present
+(_PAGE_PRESENT=0) PMD, and a PMD which is undergoing a THP split and
+should be treated as present.
+
+But _PAGE_SWP_SOFT_DIRTY currently uses the _PAGE_PSE bit, which would
+cause confusion between one of those PMDs undergoing a THP split, and a
+soft-dirty PMD.  Dropping _PAGE_PSE check in pmd_present() does not work
+well, because it can hurt optimization of tlb handling in thp split.
+
+Thus, we need to move the bit.
+
+In the current kernel, bits 1-4 are not used in non-present format since
+commit 00839ee3b299 ("x86/mm: Move swap offset/type up in PTE to work
+around erratum").  So let's move _PAGE_SWP_SOFT_DIRTY to bit 1.  Bit 7
+is used as reserved (always clear), so please don't use it for other
+purpose.
+
+Link: http://lkml.kernel.org/r/20170717193955.20207-3-zi.yan@sent.com
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Signed-off-by: Zi Yan <zi.yan@cs.rutgers.edu>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
+Cc: David Nellans <dnellans@nvidia.com>
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[bwh: Backported to 3.16: Bit 9 may be reserved for PAGE_BIT_NUMA here]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable_64.h    | 12 +++++++++---
+ arch/x86/include/asm/pgtable_types.h | 10 +++++-----
+ 2 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -165,15 +165,21 @@ static inline int pgd_large(pgd_t pgd) {
+ /*
+  * Encode and de-code a swap entry
+  *
+- * |     ...                | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
+- * |     ...                |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
+- * | OFFSET (15->63) | TYPE (10-14) | 0 |0|X|X|X| X| X|X|X|0| <- swp entry
++ * |     ...                | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
++ * |     ...                |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
++ * | OFFSET (15->63) | TYPE (10-14) | 0 |0|0|X|X| X| X|X|SD|0| <- swp entry
+  *
+  * G (8) is aliased and used as a PROT_NONE indicator for
+  * !present ptes.  We need to start storing swap entries above
+  * there.  We also need to avoid using A and D because of an
+  * erratum where they can be incorrectly set by hardware on
+  * non-present PTEs.
++ *
++ * SD (1) in swp entry is used to store soft dirty bit, which helps us
++ * remember soft dirty over page migration
++ *
++ * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
++ * but also L and G.
+  */
+ #ifdef CONFIG_NUMA_BALANCING
+ /* Automatic NUMA balancing needs to be distinguishable from swap entries */
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -94,15 +94,15 @@
+ /*
+  * Tracking soft dirty bit when a page goes to a swap is tricky.
+  * We need a bit which can be stored in pte _and_ not conflict
+- * with swap entry format. On x86 bits 6 and 7 are *not* involved
+- * into swap entry computation, but bit 6 is used for nonlinear
+- * file mapping, so we borrow bit 7 for soft dirty tracking.
++ * with swap entry format. On x86 bits 1-4 are *not* involved
++ * into swap entry computation, but bit 7 is used for thp migration,
++ * so we borrow bit 1 for soft dirty tracking.
+  *
+  * Please note that this bit must be treated as swap dirty page
+- * mark if and only if the PTE has present bit clear!
++ * mark if and only if the PTE/PMD has present bit clear!
+  */
+ #ifdef CONFIG_MEM_SOFT_DIRTY
+-#define _PAGE_SWP_SOFT_DIRTY	_PAGE_PSE
++#define _PAGE_SWP_SOFT_DIRTY	_PAGE_RW
+ #else
+ #define _PAGE_SWP_SOFT_DIRTY	(_AT(pteval_t, 0))
+ #endif

diff --git a/queue-3.16/pagewalk-improve-vma-handling.patch b/queue-3.16/pagewalk-improve-vma-handling.patch
new file mode 100644
index 0000000..79f7d2f
--- /dev/null
+++ b/queue-3.16/pagewalk-improve-vma-handling.patch

@@ -0,0 +1,341 @@
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 11 Feb 2015 15:27:37 -0800
+Subject: pagewalk: improve vma handling
+
+commit fafaa4264eba49fd10695c193a82760558d093f4 upstream.
+
+Current implementation of page table walker has a fundamental problem in
+vma handling, which started when we tried to handle vma(VM_HUGETLB).
+Because it's done in pgd loop, considering vma boundary makes code
+complicated and bug-prone.
+
+From the users viewpoint, some user checks some vma-related condition to
+determine whether the user really does page walk over the vma.
+
+In order to solve these, this patch moves vma check outside pgd loop and
+introduce a new callback ->test_walk().
+
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Pavel Emelyanov <xemul@parallels.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[bwh: Backported to 3.16 as dependency of L1TF mitigation]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/linux/mm.h |  15 +++-
+ mm/pagewalk.c      | 206 +++++++++++++++++++++++++--------------------
+ 2 files changed, 129 insertions(+), 92 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1121,10 +1121,16 @@ void unmap_vmas(struct mmu_gather *tlb,
+  * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
+  * @pte_hole: if set, called for each hole at all levels
+  * @hugetlb_entry: if set, called for each hugetlb entry
+- *		   *Caution*: The caller must hold mmap_sem() if @hugetlb_entry
+- * 			      is used.
++ * @test_walk: caller specific callback function to determine whether
++ *             we walk over the current vma or not. A positive returned
++ *             value means "do page table walk over the current vma,"
++ *             and a negative one means "abort current page table walk
++ *             right now." 0 means "skip the current vma."
++ * @mm:        mm_struct representing the target process of page table walk
++ * @vma:       vma currently walked (NULL if walking outside vmas)
++ * @private:   private data for callbacks' usage
+  *
+- * (see walk_page_range for more details)
++ * (see the comment on walk_page_range() for more details)
+  */
+ struct mm_walk {
+ 	int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
+@@ -1136,7 +1142,10 @@ struct mm_walk {
+ 	int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
+ 			     unsigned long addr, unsigned long next,
+ 			     struct mm_walk *walk);
++	int (*test_walk)(unsigned long addr, unsigned long next,
++			struct mm_walk *walk);
+ 	struct mm_struct *mm;
++	struct vm_area_struct *vma;
+ 	void *private;
+ };
+ 
+--- a/mm/pagewalk.c
++++ b/mm/pagewalk.c
+@@ -59,7 +59,7 @@ again:
+ 			continue;
+ 
+ 		split_huge_page_pmd_mm(walk->mm, addr, pmd);
+-		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
++		if (pmd_trans_unstable(pmd))
+ 			goto again;
+ 		err = walk_pte_range(pmd, addr, next, walk);
+ 		if (err)
+@@ -95,6 +95,32 @@ static int walk_pud_range(pgd_t *pgd, un
+ 	return err;
+ }
+ 
++static int walk_pgd_range(unsigned long addr, unsigned long end,
++			  struct mm_walk *walk)
++{
++	pgd_t *pgd;
++	unsigned long next;
++	int err = 0;
++
++	pgd = pgd_offset(walk->mm, addr);
++	do {
++		next = pgd_addr_end(addr, end);
++		if (pgd_none_or_clear_bad(pgd)) {
++			if (walk->pte_hole)
++				err = walk->pte_hole(addr, next, walk);
++			if (err)
++				break;
++			continue;
++		}
++		if (walk->pmd_entry || walk->pte_entry)
++			err = walk_pud_range(pgd, addr, next, walk);
++		if (err)
++			break;
++	} while (pgd++, addr = next, addr != end);
++
++	return err;
++}
++
+ #ifdef CONFIG_HUGETLB_PAGE
+ static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
+ 				       unsigned long end)
+@@ -103,10 +129,10 @@ static unsigned long hugetlb_entry_end(s
+ 	return boundary < end ? boundary : end;
+ }
+ 
+-static int walk_hugetlb_range(struct vm_area_struct *vma,
+-			      unsigned long addr, unsigned long end,
++static int walk_hugetlb_range(unsigned long addr, unsigned long end,
+ 			      struct mm_walk *walk)
+ {
++	struct vm_area_struct *vma = walk->vma;
+ 	struct hstate *h = hstate_vma(vma);
+ 	unsigned long next;
+ 	unsigned long hmask = huge_page_mask(h);
+@@ -119,15 +145,14 @@ static int walk_hugetlb_range(struct vm_
+ 		if (pte && walk->hugetlb_entry)
+ 			err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
+ 		if (err)
+-			return err;
++			break;
+ 	} while (addr = next, addr != end);
+ 
+-	return 0;
++	return err;
+ }
+ 
+ #else /* CONFIG_HUGETLB_PAGE */
+-static int walk_hugetlb_range(struct vm_area_struct *vma,
+-			      unsigned long addr, unsigned long end,
++static int walk_hugetlb_range(unsigned long addr, unsigned long end,
+ 			      struct mm_walk *walk)
+ {
+ 	return 0;
+@@ -135,112 +160,115 @@ static int walk_hugetlb_range(struct vm_
+ 
+ #endif /* CONFIG_HUGETLB_PAGE */
+ 
++/*
++ * Decide whether we really walk over the current vma on [@start, @end)
++ * or skip it via the returned value. Return 0 if we do walk over the
++ * current vma, and return 1 if we skip the vma. Negative values means
++ * error, where we abort the current walk.
++ *
++ * Default check (only VM_PFNMAP check for now) is used when the caller
++ * doesn't define test_walk() callback.
++ */
++static int walk_page_test(unsigned long start, unsigned long end,
++			struct mm_walk *walk)
++{
++	struct vm_area_struct *vma = walk->vma;
+ 
++	if (walk->test_walk)
++		return walk->test_walk(start, end, walk);
++
++	/*
++	 * Do not walk over vma(VM_PFNMAP), because we have no valid struct
++	 * page backing a VM_PFNMAP range. See also commit a9ff785e4437.
++	 */
++	if (vma->vm_flags & VM_PFNMAP)
++		return 1;
++	return 0;
++}
++
++static int __walk_page_range(unsigned long start, unsigned long end,
++			struct mm_walk *walk)
++{
++	int err = 0;
++	struct vm_area_struct *vma = walk->vma;
++
++	if (vma && is_vm_hugetlb_page(vma)) {
++		if (walk->hugetlb_entry)
++			err = walk_hugetlb_range(start, end, walk);
++	} else
++		err = walk_pgd_range(start, end, walk);
++
++	return err;
++}
+ 
+ /**
+- * walk_page_range - walk a memory map's page tables with a callback
+- * @addr: starting address
+- * @end: ending address
+- * @walk: set of callbacks to invoke for each level of the tree
++ * walk_page_range - walk page table with caller specific callbacks
+  *
+- * Recursively walk the page table for the memory area in a VMA,
+- * calling supplied callbacks. Callbacks are called in-order (first
+- * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
+- * etc.). If lower-level callbacks are omitted, walking depth is reduced.
++ * Recursively walk the page table tree of the process represented by @walk->mm
++ * within the virtual address range [@start, @end). During walking, we can do
++ * some caller-specific works for each entry, by setting up pmd_entry(),
++ * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
++ * callbacks, the associated entries/pages are just ignored.
++ * The return values of these callbacks are commonly defined like below:
++ *  - 0  : succeeded to handle the current entry, and if you don't reach the
++ *         end address yet, continue to walk.
++ *  - >0 : succeeded to handle the current entry, and return to the caller
++ *         with caller specific value.
++ *  - <0 : failed to handle the current entry, and return to the caller
++ *         with error code.
+  *
+- * Each callback receives an entry pointer and the start and end of the
+- * associated range, and a copy of the original mm_walk for access to
+- * the ->private or ->mm fields.
++ * Before starting to walk page table, some callers want to check whether
++ * they really want to walk over the current vma, typically by checking
++ * its vm_flags. walk_page_test() and @walk->test_walk() are used for this
++ * purpose.
+  *
+- * Usually no locks are taken, but splitting transparent huge page may
+- * take page table lock. And the bottom level iterator will map PTE
+- * directories from highmem if necessary.
++ * struct mm_walk keeps current values of some common data like vma and pmd,
++ * which are useful for the access from callbacks. If you want to pass some
++ * caller-specific data to callbacks, @walk->private should be helpful.
+  *
+- * If any callback returns a non-zero value, the walk is aborted and
+- * the return value is propagated back to the caller. Otherwise 0 is returned.
+- *
+- * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry
+- * is !NULL.
++ * Locking:
++ *   Callers of walk_page_range() and walk_page_vma() should hold
++ *   @walk->mm->mmap_sem, because these function traverse vma list and/or
++ *   access to vma's data.
+  */
+-int walk_page_range(unsigned long addr, unsigned long end,
++int walk_page_range(unsigned long start, unsigned long end,
+ 		    struct mm_walk *walk)
+ {
+-	pgd_t *pgd;
+-	unsigned long next;
+ 	int err = 0;
++	unsigned long next;
++	struct vm_area_struct *vma;
+ 
+-	if (addr >= end)
+-		return err;
++	if (start >= end)
++		return -EINVAL;
+ 
+ 	if (!walk->mm)
+ 		return -EINVAL;
+ 
+ 	VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
+ 
+-	pgd = pgd_offset(walk->mm, addr);
++	vma = find_vma(walk->mm, start);
+ 	do {
+-		struct vm_area_struct *vma = NULL;
+-
+-		next = pgd_addr_end(addr, end);
++		if (!vma) { /* after the last vma */
++			walk->vma = NULL;
++			next = end;
++		} else if (start < vma->vm_start) { /* outside vma */
++			walk->vma = NULL;
++			next = min(end, vma->vm_start);
++		} else { /* inside vma */
++			walk->vma = vma;
++			next = min(end, vma->vm_end);
++			vma = vma->vm_next;
+ 
+-		/*
+-		 * This function was not intended to be vma based.
+-		 * But there are vma special cases to be handled:
+-		 * - hugetlb vma's
+-		 * - VM_PFNMAP vma's
+-		 */
+-		vma = find_vma(walk->mm, addr);
+-		if (vma) {
+-			/*
+-			 * There are no page structures backing a VM_PFNMAP
+-			 * range, so do not allow split_huge_page_pmd().
+-			 */
+-			if ((vma->vm_start <= addr) &&
+-			    (vma->vm_flags & VM_PFNMAP)) {
+-				if (walk->pte_hole)
+-					err = walk->pte_hole(addr, next, walk);
+-				if (err)
+-					break;
+-				pgd = pgd_offset(walk->mm, next);
+-				continue;
+-			}
+-			/*
+-			 * Handle hugetlb vma individually because pagetable
+-			 * walk for the hugetlb page is dependent on the
+-			 * architecture and we can't handled it in the same
+-			 * manner as non-huge pages.
+-			 */
+-			if (walk->hugetlb_entry && (vma->vm_start <= addr) &&
+-			    is_vm_hugetlb_page(vma)) {
+-				if (vma->vm_end < next)
+-					next = vma->vm_end;
+-				/*
+-				 * Hugepage is very tightly coupled with vma,
+-				 * so walk through hugetlb entries within a
+-				 * given vma.
+-				 */
+-				err = walk_hugetlb_range(vma, addr, next, walk);
+-				if (err)
+-					break;
+-				pgd = pgd_offset(walk->mm, next);
++			err = walk_page_test(start, next, walk);
++			if (err > 0)
+ 				continue;
+-			}
+-		}
+-
+-		if (pgd_none_or_clear_bad(pgd)) {
+-			if (walk->pte_hole)
+-				err = walk->pte_hole(addr, next, walk);
+-			if (err)
++			if (err < 0)
+ 				break;
+-			pgd++;
+-			continue;
+ 		}
+-		if (walk->pmd_entry || walk->pte_entry)
+-			err = walk_pud_range(pgd, addr, next, walk);
++		if (walk->vma || walk->pte_hole)
++			err = __walk_page_range(start, next, walk);
+ 		if (err)
+ 			break;
+-		pgd++;
+-	} while (addr = next, addr < end);
+-
++	} while (start = next, start < end);
+ 	return err;
+ }

diff --git a/queue-3.16/series b/queue-3.16/series
index 5de06fd..2bad97d 100644
--- a/queue-3.16/series
+++ b/queue-3.16/series

@@ -88,3 +88,34 @@
 x86-drop-_page_file-and-pte_file-related-helpers.patch
 xtensa-drop-_page_file-and-pte_file-related-helpers.patch
 powerpc-drop-_page_file-and-pte_file-related-helpers.patch
+x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch
+x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch
+mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch
+x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch
+x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch
+x86-mm-add-pud-functions.patch
+x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch
+x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch
+x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch
+mm-add-vm_insert_pfn_prot.patch
+mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch
+x86-io-add-interface-to-reserve-io-memtype-for-a-resource-range.patch
+drm-drivers-add-support-for-using-the-arch-wc-mapping-api.patch
+mm-pagewalk-remove-pgd_entry-and-pud_entry.patch
+pagewalk-improve-vma-handling.patch
+x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none.patch
+x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch
+x86-init-fix-build-with-config_swap-n.patch
+x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch
+x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch
+x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch
+x86-speculation-l1tf-fix-overflow-in-l1tf_pfn_limit-on-32bit.patch
+x86-speculation-l1tf-fix-off-by-one-error-when-warning-that-system.patch
+x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch
+x86-speculation-l1tf-unbreak-__have_arch_pfn_modify_allowed.patch
+x86-speculation-l1tf-invert-all-not-present-mappings.patch
+x86-speculation-l1tf-exempt-zeroed-ptes-from-inversion.patch
+x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch
+x86-mm-pat-make-set_memory_np-l1tf-safe.patch
+x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch
+x86-speculation-l1tf-suggest-what-to-do-on-systems-with-too-much-ram.patch

diff --git a/queue-3.16/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch b/queue-3.16/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch
new file mode 100644
index 0000000..e8858c7
--- /dev/null
+++ b/queue-3.16/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch

@@ -0,0 +1,93 @@
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Wed, 20 Jun 2018 16:42:57 -0400
+Subject: x86/bugs: Move the l1tf function and define pr_fmt properly
+
+commit 56563f53d3066afa9e63d6c997bf67e76a8b05c0 upstream.
+
+The pr_warn in l1tf_select_mitigation would have used the prior pr_fmt
+which was defined as "Spectre V2 : ".
+
+Move the function to be past SSBD and also define the pr_fmt.
+
+Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/kernel/cpu/bugs.c | 55 ++++++++++++++++++++------------------
+ 1 file changed, 29 insertions(+), 26 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -270,32 +270,6 @@ static void x86_amd_ssb_disable(void)
+ 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
+ }
+ 
+-static void __init l1tf_select_mitigation(void)
+-{
+-	u64 half_pa;
+-
+-	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+-		return;
+-
+-#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
+-	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+-	return;
+-#endif
+-
+-	/*
+-	 * This is extremely unlikely to happen because almost all
+-	 * systems have far more MAX_PA/2 than RAM can be fit into
+-	 * DIMM slots.
+-	 */
+-	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+-	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
+-		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+-		return;
+-	}
+-
+-	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+-}
+-
+ #ifdef RETPOLINE
+ static bool spectre_v2_bad_module;
+ 
+@@ -721,6 +695,35 @@ void x86_spec_ctrl_setup_ap(void)
+ 		x86_amd_ssb_disable();
+ }
+ 
++#undef pr_fmt
++#define pr_fmt(fmt)	"L1TF: " fmt
++static void __init l1tf_select_mitigation(void)
++{
++	u64 half_pa;
++
++	if (!boot_cpu_has_bug(X86_BUG_L1TF))
++		return;
++
++#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
++	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
++	return;
++#endif
++
++	/*
++	 * This is extremely unlikely to happen because almost all
++	 * systems have far more MAX_PA/2 than RAM can be fit into
++	 * DIMM slots.
++	 */
++	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
++	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
++		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
++		return;
++	}
++
++	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
++}
++#undef pr_fmt
++
+ #ifdef CONFIG_SYSFS
+ 
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,

diff --git a/queue-3.16/x86-init-fix-build-with-config_swap-n.patch b/queue-3.16/x86-init-fix-build-with-config_swap-n.patch
new file mode 100644
index 0000000..8e88655
--- /dev/null
+++ b/queue-3.16/x86-init-fix-build-with-config_swap-n.patch

@@ -0,0 +1,35 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Tue, 14 Aug 2018 20:50:47 +0200
+Subject: x86/init: fix build with CONFIG_SWAP=n
+
+commit 792adb90fa724ce07c0171cbc96b9215af4b1045 upstream.
+
+The introduction of generic_max_swapfile_size and arch-specific versions has
+broken linking on x86 with CONFIG_SWAP=n due to undefined reference to
+'generic_max_swapfile_size'. Fix it by compiling the x86-specific
+max_swapfile_size() only with CONFIG_SWAP=y.
+
+Reported-by: Tomas Pruzina <pruzinat@gmail.com>
+Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/mm/init.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -701,6 +701,7 @@ void __init zone_sizes_init(void)
+ 	free_area_init_nodes(max_zone_pfns);
+ }
+ 
++#ifdef CONFIG_SWAP
+ unsigned long max_swapfile_size(void)
+ {
+ 	unsigned long pages;
+@@ -713,3 +714,4 @@ unsigned long max_swapfile_size(void)
+ 	}
+ 	return pages;
+ }
++#endif

diff --git a/queue-3.16/x86-io-add-interface-to-reserve-io-memtype-for-a-resource-range.patch b/queue-3.16/x86-io-add-interface-to-reserve-io-memtype-for-a-resource-range.patch
new file mode 100644
index 0000000..5010945
--- /dev/null
+++ b/queue-3.16/x86-io-add-interface-to-reserve-io-memtype-for-a-resource-range.patch

@@ -0,0 +1,118 @@
+From: Dave Airlie <airlied@redhat.com>
+Date: Mon, 24 Oct 2016 15:27:59 +1000
+Subject: x86/io: add interface to reserve io memtype for a resource range.
+ (v1.1)
+
+commit 8ef4227615e158faa4ee85a1d6466782f7e22f2f upstream.
+
+A recent change to the mm code in:
+87744ab3832b mm: fix cache mode tracking in vm_insert_mixed()
+
+started enforcing checking the memory type against the registered list for
+amixed pfn insertion mappings. It happens that the drm drivers for a number
+of gpus relied on this being broken. Currently the driver only inserted
+VRAM mappings into the tracking table when they came from the kernel,
+and userspace mappings never landed in the table. This led to a regression
+where all the mapping end up as UC instead of WC now.
+
+I've considered a number of solutions but since this needs to be fixed
+in fixes and not next, and some of the solutions were going to introduce
+overhead that hadn't been there before I didn't consider them viable at
+this stage. These mainly concerned hooking into the TTM io reserve APIs,
+but these API have a bunch of fast paths I didn't want to unwind to add
+this to.
+
+The solution I've decided on is to add a new API like the arch_phys_wc
+APIs (these would have worked but wc_del didn't take a range), and
+use them from the drivers to add a WC compatible mapping to the table
+for all VRAM on those GPUs. This means we can then create userspace
+mapping that won't get degraded to UC.
+
+v1.1: use CONFIG_X86_PAT + add some comments in io.h
+
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: x86@kernel.org
+Cc: mcgrof@suse.com
+Cc: Dan Williams <dan.j.williams@intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+[bwh: Backported to 3.16: Memory types have type unsigned long, and the
+ constant is named _PAGE_CACHE_WC instead of _PAGE_CACHE_MODE_WC.]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/io.h |  6 ++++++
+ arch/x86/mm/pat.c         | 14 ++++++++++++++
+ include/linux/io.h        | 22 ++++++++++++++++++++++
+ 3 files changed, 42 insertions(+)
+
+--- a/arch/x86/include/asm/io.h
++++ b/arch/x86/include/asm/io.h
+@@ -340,4 +340,10 @@ extern void arch_phys_wc_del(int handle)
+ #define arch_phys_wc_add arch_phys_wc_add
+ #endif
+ 
++#ifdef CONFIG_X86_PAT
++extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size);
++extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size);
++#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
++#endif
++
+ #endif /* _ASM_X86_IO_H */
+--- a/arch/x86/mm/pat.c
++++ b/arch/x86/mm/pat.c
+@@ -481,6 +481,20 @@ void io_free_memtype(resource_size_t sta
+ 	free_memtype(start, end);
+ }
+ 
++int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
++{
++	unsigned long type = _PAGE_CACHE_WC;
++
++	return io_reserve_memtype(start, start + size, &type);
++}
++EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
++
++void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
++{
++	io_free_memtype(start, start + size);
++}
++EXPORT_SYMBOL(arch_io_free_memtype_wc);
++
+ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ 				unsigned long size, pgprot_t vma_prot)
+ {
+--- a/include/linux/io.h
++++ b/include/linux/io.h
+@@ -101,4 +101,26 @@ static inline void arch_phys_wc_del(int
+ #define arch_phys_wc_add arch_phys_wc_add
+ #endif
+ 
++/*
++ * On x86 PAT systems we have memory tracking that keeps track of
++ * the allowed mappings on memory ranges. This tracking works for
++ * all the in-kernel mapping APIs (ioremap*), but where the user
++ * wishes to map a range from a physical device into user memory
++ * the tracking won't be updated. This API is to be used by
++ * drivers which remap physical device pages into userspace,
++ * and wants to make sure they are mapped WC and not UC.
++ */
++#ifndef arch_io_reserve_memtype_wc
++static inline int arch_io_reserve_memtype_wc(resource_size_t base,
++					     resource_size_t size)
++{
++	return 0;
++}
++
++static inline void arch_io_free_memtype_wc(resource_size_t base,
++					   resource_size_t size)
++{
++}
++#endif
++
+ #endif /* _LINUX_IO_H */

diff --git a/queue-3.16/x86-mm-add-pud-functions.patch b/queue-3.16/x86-mm-add-pud-functions.patch
new file mode 100644
index 0000000..c985d26
--- /dev/null
+++ b/queue-3.16/x86-mm-add-pud-functions.patch

@@ -0,0 +1,51 @@
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Fri, 28 Sep 2018 01:15:29 +0100
+Subject: x86: mm: Add PUD functions
+
+These are extracted from commit a00cc7d9dd93 "mm, x86: add support for
+PUD-sized transparent hugepages" and will be used by later patches.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -303,6 +303,25 @@ static inline pmd_t pmd_mknotpresent(pmd
+ 	return pmd_clear_flags(pmd, _PAGE_PRESENT);
+ }
+ 
++static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
++{
++	pudval_t v = native_pud_val(pud);
++
++	return __pud(v | set);
++}
++
++static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
++{
++	pudval_t v = native_pud_val(pud);
++
++	return __pud(v & ~clear);
++}
++
++static inline pud_t pud_mkhuge(pud_t pud)
++{
++	return pud_set_flags(pud, _PAGE_PSE);
++}
++
+ #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+ static inline int pte_soft_dirty(pte_t pte)
+ {
+@@ -352,6 +371,12 @@ static inline pmd_t pfn_pmd(unsigned lon
+ 		     massage_pgprot(pgprot));
+ }
+ 
++static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
++{
++	return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
++		     massage_pgprot(pgprot));
++}
++
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
+ 	pteval_t val = pte_val(pte);

diff --git a/queue-3.16/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch b/queue-3.16/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch
new file mode 100644
index 0000000..923a866
--- /dev/null
+++ b/queue-3.16/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch

@@ -0,0 +1,66 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:38 -0700
+Subject: x86/mm/kmmio: Make the tracer robust against L1TF
+
+commit 1063711b57393c1999248cccb57bebfaf16739e7 upstream.
+
+The mmio tracer sets io mapping PTEs and PMDs to non present when enabled
+without inverting the address bits, which makes the PTE entry vulnerable
+for L1TF.
+
+Make it use the right low level macros to actually invert the address bits
+to protect against L1TF.
+
+In principle this could be avoided because MMIO tracing is not likely to be
+enabled on production machines, but the fix is straigt forward and for
+consistency sake it's better to get rid of the open coded PTE manipulation.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/mm/kmmio.c | 25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/mm/kmmio.c
++++ b/arch/x86/mm/kmmio.c
+@@ -114,24 +114,29 @@ static struct kmmio_fault_page *get_kmmi
+ 
+ static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
+ {
++	pmd_t new_pmd;
+ 	pmdval_t v = pmd_val(*pmd);
+ 	if (clear) {
+-		*old = v & _PAGE_PRESENT;
+-		v &= ~_PAGE_PRESENT;
+-	} else	/* presume this has been called with clear==true previously */
+-		v |= *old;
+-	set_pmd(pmd, __pmd(v));
++		*old = v;
++		new_pmd = pmd_mknotpresent(*pmd);
++	} else {
++		/* Presume this has been called with clear==true previously */
++		new_pmd = __pmd(*old);
++	}
++	set_pmd(pmd, new_pmd);
+ }
+ 
+ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
+ {
+ 	pteval_t v = pte_val(*pte);
+ 	if (clear) {
+-		*old = v & _PAGE_PRESENT;
+-		v &= ~_PAGE_PRESENT;
+-	} else	/* presume this has been called with clear==true previously */
+-		v |= *old;
+-	set_pte_atomic(pte, __pte(v));
++		*old = v;
++		/* Nothing should care about address */
++		pte_clear(&init_mm, 0, pte);
++	} else {
++		/* Presume this has been called with clear==true previously */
++		set_pte_atomic(pte, __pte(*old));
++	}
+ }
+ 
+ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)

diff --git a/queue-3.16/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch b/queue-3.16/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch
new file mode 100644
index 0000000..32cdfe4
--- /dev/null
+++ b/queue-3.16/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch

@@ -0,0 +1,104 @@
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Thu, 7 Jul 2016 17:19:11 -0700
+Subject: x86/mm: Move swap offset/type up in PTE to work around erratum
+
+commit 00839ee3b299303c6a5e26a0a2485427a3afcbbf upstream.
+
+This erratum can result in Accessed/Dirty getting set by the hardware
+when we do not expect them to be (on !Present PTEs).
+
+Instead of trying to fix them up after this happens, we just
+allow the bits to get set and try to ignore them.  We do this by
+shifting the layout of the bits we use for swap offset/type in
+our 64-bit PTEs.
+
+It looks like this:
+
+ bitnrs: |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0|
+ names:  |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P|
+ before: |         OFFSET (9-63)          |0|X|X| TYPE(1-5) |0|
+  after: | OFFSET (14-63)  |  TYPE (9-13) |0|X|X|X| X| X|X|X|0|
+
+Note that D was already a don't care (X) even before.  We just
+move TYPE up and turn its old spot (which could be hit by the
+A bit) into all don't cares.
+
+We take 5 bits away from the offset, but that still leaves us
+with 50 bits which lets us index into a 62-bit swapfile (4 EiB).
+I think that's probably fine for the moment.  We could
+theoretically reclaim 5 of the bits (1, 2, 3, 4, 7) but it
+doesn't gain us anything.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave@sr71.net>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: dave.hansen@intel.com
+Cc: linux-mm@kvack.org
+Cc: mhocko@suse.com
+Link: http://lkml.kernel.org/r/20160708001911.9A3FD2B6@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+[bwh: Backported to 3.16: Bit 9 may be reserved for PAGE_BIT_NUMA, which
+ no longer exists upstream.  Adjust the bit numbers accordingly,
+ incorporating commit ace7fab7a6cd "x86/mm: Fix swap entry comment and
+ macro".]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -162,23 +162,37 @@ static inline int pgd_large(pgd_t pgd) {
+ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
+ #define pte_unmap(pte) ((void)(pte))/* NOP */
+ 
+-/* Encode and de-code a swap entry */
+-#define SWP_TYPE_BITS 5
++/*
++ * Encode and de-code a swap entry
++ *
++ * |     ...                | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
++ * |     ...                |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
++ * | OFFSET (15->63) | TYPE (10-14) | 0 |0|X|X|X| X| X|X|X|0| <- swp entry
++ *
++ * G (8) is aliased and used as a PROT_NONE indicator for
++ * !present ptes.  We need to start storing swap entries above
++ * there.  We also need to avoid using A and D because of an
++ * erratum where they can be incorrectly set by hardware on
++ * non-present PTEs.
++ */
+ #ifdef CONFIG_NUMA_BALANCING
+ /* Automatic NUMA balancing needs to be distinguishable from swap entries */
+-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)
++#define SWP_TYPE_FIRST_SHIFT (_PAGE_BIT_PROTNONE + 2)
+ #else
+-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
++#define SWP_TYPE_FIRST_SHIFT (_PAGE_BIT_PROTNONE + 1)
+ #endif
++#define SWP_TYPE_BITS 5
++/* Place the offset above the type: */
++#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
+ 
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+ 
+-#define __swp_type(x)			(((x).val >> (_PAGE_BIT_PRESENT + 1)) \
++#define __swp_type(x)			(((x).val >> (SWP_TYPE_FIRST_BIT)) \
+ 					 & ((1U << SWP_TYPE_BITS) - 1))
+-#define __swp_offset(x)			((x).val >> SWP_OFFSET_SHIFT)
++#define __swp_offset(x)			((x).val >> SWP_OFFSET_FIRST_BIT)
+ #define __swp_entry(type, offset)	((swp_entry_t) { \
+-					 ((type) << (_PAGE_BIT_PRESENT + 1)) \
+-					 | ((offset) << SWP_OFFSET_SHIFT) })
++					 ((type) << (SWP_TYPE_FIRST_BIT)) \
++					 | ((offset) << SWP_OFFSET_FIRST_BIT) })
+ #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
+ #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
+ 

diff --git a/queue-3.16/x86-mm-pat-make-set_memory_np-l1tf-safe.patch b/queue-3.16/x86-mm-pat-make-set_memory_np-l1tf-safe.patch
new file mode 100644
index 0000000..8cedb8d
--- /dev/null
+++ b/queue-3.16/x86-mm-pat-make-set_memory_np-l1tf-safe.patch

@@ -0,0 +1,45 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:39 -0700
+Subject: x86/mm/pat: Make set_memory_np() L1TF safe
+
+commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream
+
+set_memory_np() is used to mark kernel mappings not present, but it has
+it's own open coded mechanism which does not have the L1TF protection of
+inverting the address bits.
+
+Replace the open coded PTE manipulation with the L1TF protecting low level
+PTE routines.
+
+Passes the CPA self test.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 3.16:
+ - cpa->pfn is actually a physical address here and needs to be shifted to
+   produce a PFN
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -952,7 +952,8 @@ static int populate_pmd(struct cpa_data
+ 
+ 		pmd = pmd_offset(pud, start);
+ 
+-		set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
++		set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn >> PAGE_SHIFT,
++					canon_pgprot(pgprot))));
+ 
+ 		start	  += PMD_SIZE;
+ 		cpa->pfn  += PMD_SIZE;
+@@ -1022,7 +1023,8 @@ static int populate_pud(struct cpa_data
+ 	 * Map everything starting from the Gb boundary, possibly with 1G pages
+ 	 */
+ 	while (end - start >= PUD_SIZE) {
+-		set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
++		set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn >> PAGE_SHIFT,
++				   canon_pgprot(pgprot))));
+ 
+ 		start	  += PUD_SIZE;
+ 		cpa->pfn  += PUD_SIZE;

diff --git a/queue-3.16/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch b/queue-3.16/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch
new file mode 100644
index 0000000..889f66d
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch

@@ -0,0 +1,235 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:26 -0700
+Subject: x86/speculation/l1tf: Add sysfs reporting for l1tf
+
+commit 17dbca119312b4e8173d4e25ff64262119fcef38 upstream
+
+L1TF core kernel workarounds are cheap and normally always enabled, However
+they still should be reported in sysfs if the system is vulnerable or
+mitigated. Add the necessary CPU feature/bug bits.
+
+- Extend the existing checks for Meltdowns to determine if the system is
+  vulnerable. All CPUs which are not vulnerable to Meltdown are also not
+  vulnerable to L1TF
+
+- Check for 32bit non PAE and emit a warning as there is no practical way
+  for mitigation due to the limited physical address bits
+
+- If the system has more than MAX_PA/2 physical memory the invert page
+  workarounds don't protect the system against the L1TF attack anymore,
+  because an inverted physical address will also point to valid
+  memory. Print a warning in this case and report that the system is
+  vulnerable.
+
+Add a function which returns the PFN limit for the L1TF mitigation, which
+will be used in follow up patches for sanity and range checks.
+
+[ tglx: Renamed the CPU feature bit to L1TF_PTEINV ]
+[ dwmw2: Backport to 4.9 (cpufeatures.h, E820) ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 3.16:
+ - Assign the next available bits from feature word 7 and bug word 0
+ - CONFIG_PGTABLE_LEVELS is not defined; use other config symbols in the
+   condition
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/cpufeature.h |  3 ++-
+ arch/x86/include/asm/processor.h  |  5 ++++
+ arch/x86/kernel/cpu/bugs.c        | 40 ++++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/common.c      | 20 +++++++++++++++
+ drivers/base/cpu.c                |  8 ++++++
+ include/linux/cpu.h               |  2 ++
+ 6 files changed, 77 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -199,6 +199,7 @@
+ #define X86_FEATURE_MSR_SPEC_CTRL (7*32+19) /* "" MSR SPEC_CTRL is implemented */
+ #define X86_FEATURE_SSBD	(7*32+20) /* Speculative Store Bypass Disable */
+ #define X86_FEATURE_ZEN		(7*32+21) /* "" CPU is AMD family 0x17 (Zen) */
++#define X86_FEATURE_L1TF_PTEINV	(7*32+22) /* "" L1TF workaround PTE inversion */
+ 
+ #define X86_FEATURE_RETPOLINE	(7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+@@ -271,6 +272,7 @@
+ #define X86_BUG_SPECTRE_V1	X86_BUG(6) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+ #define X86_BUG_SPECTRE_V2	X86_BUG(7) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(8) /* CPU is affected by speculative store bypass attack */
++#define X86_BUG_L1TF		X86_BUG(9) /* CPU is affected by L1 Terminal Fault */
+ 
+ #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+ 
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -165,6 +165,11 @@ extern const struct seq_operations cpuin
+ extern void cpu_detect(struct cpuinfo_x86 *c);
+ extern void fpu_detect(struct cpuinfo_x86 *c);
+ 
++static inline unsigned long l1tf_pfn_limit(void)
++{
++	return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
++}
++
+ extern void early_cpu_init(void);
+ extern void identify_boot_cpu(void);
+ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -26,9 +26,11 @@
+ #include <asm/pgtable.h>
+ #include <asm/cacheflush.h>
+ #include <asm/intel-family.h>
++#include <asm/e820.h>
+ 
+ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
++static void __init l1tf_select_mitigation(void);
+ 
+ /*
+  * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+@@ -138,6 +140,8 @@ void __init check_bugs(void)
+ 	 */
+ 	ssb_select_mitigation();
+ 
++	l1tf_select_mitigation();
++
+ #ifdef CONFIG_X86_32
+ 	/*
+ 	 * Check whether we are able to run this kernel safely on SMP.
+@@ -266,6 +270,32 @@ static void x86_amd_ssb_disable(void)
+ 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
+ }
+ 
++static void __init l1tf_select_mitigation(void)
++{
++	u64 half_pa;
++
++	if (!boot_cpu_has_bug(X86_BUG_L1TF))
++		return;
++
++#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
++	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
++	return;
++#endif
++
++	/*
++	 * This is extremely unlikely to happen because almost all
++	 * systems have far more MAX_PA/2 than RAM can be fit into
++	 * DIMM slots.
++	 */
++	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
++	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
++		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
++		return;
++	}
++
++	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
++}
++
+ #ifdef RETPOLINE
+ static bool spectre_v2_bad_module;
+ 
+@@ -718,6 +748,11 @@ static ssize_t cpu_show_common(struct de
+ 	case X86_BUG_SPEC_STORE_BYPASS:
+ 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+ 
++	case X86_BUG_L1TF:
++		if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
++			return sprintf(buf, "Mitigation: Page Table Inversion\n");
++		break;
++
+ 	default:
+ 		break;
+ 	}
+@@ -744,4 +779,9 @@ ssize_t cpu_show_spec_store_bypass(struc
+ {
+ 	return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
+ }
++
++ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
++{
++	return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
++}
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -842,6 +842,21 @@ static const __initconst struct x86_cpu_
+ 	{}
+ };
+ 
++static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
++	/* in addition to cpu_no_speculation */
++	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT1	},
++	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT2	},
++	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_AIRMONT		},
++	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MERRIFIELD	},
++	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MOOREFIELD	},
++	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GOLDMONT	},
++	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_DENVERTON	},
++	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GEMINI_LAKE	},
++	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNL		},
++	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNM		},
++	{}
++};
++
+ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+ {
+ 	u64 ia32_cap = 0;
+@@ -867,6 +882,11 @@ static void __init cpu_set_bug_bits(stru
+ 		return;
+ 
+ 	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
++
++	if (x86_match_cpu(cpu_no_l1tf))
++		return;
++
++	setup_force_cpu_bug(X86_BUG_L1TF);
+ }
+ 
+ /*
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -444,16 +444,24 @@ ssize_t __weak cpu_show_spec_store_bypas
+ 	return sprintf(buf, "Not affected\n");
+ }
+ 
++ssize_t __weak cpu_show_l1tf(struct device *dev,
++			     struct device_attribute *attr, char *buf)
++{
++	return sprintf(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+ static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
++static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
+ 
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ 	&dev_attr_meltdown.attr,
+ 	&dev_attr_spectre_v1.attr,
+ 	&dev_attr_spectre_v2.attr,
+ 	&dev_attr_spec_store_bypass.attr,
++	&dev_attr_l1tf.attr,
+ 	NULL
+ };
+ 
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -47,6 +47,8 @@ extern ssize_t cpu_show_spectre_v2(struc
+ 				   struct device_attribute *attr, char *buf);
+ extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
+ 					  struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_l1tf(struct device *dev,
++			     struct device_attribute *attr, char *buf);
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+ extern void unregister_cpu(struct cpu *cpu);

diff --git a/queue-3.16/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch b/queue-3.16/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch
new file mode 100644
index 0000000..83bf5c3
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch

@@ -0,0 +1,108 @@
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 13 Jun 2018 15:48:22 -0700
+Subject: x86/speculation/l1tf: Change order of offset/type in swap entry
+
+commit bcd11afa7adad8d720e7ba5ef58bdcd9775cf45f upstream.
+
+If pages are swapped out, the swap entry is stored in the corresponding
+PTE, which has the Present bit cleared. CPUs vulnerable to L1TF speculate
+on PTE entries which have the present bit set and would treat the swap
+entry as phsyical address (PFN). To mitigate that the upper bits of the PTE
+must be set so the PTE points to non existent memory.
+
+The swap entry stores the type and the offset of a swapped out page in the
+PTE. type is stored in bit 9-13 and offset in bit 14-63. The hardware
+ignores the bits beyond the phsyical address space limit, so to make the
+mitigation effective its required to start 'offset' at the lowest possible
+bit so that even large swap offsets do not reach into the physical address
+space limit bits.
+
+Move offset to bit 9-58 and type to bit 59-63 which are the bits that
+hardware generally doesn't care about.
+
+That, in turn, means that if you on desktop chip with only 40 bits of
+physical addressing, now that the offset starts at bit 9, there needs to be
+30 bits of offset actually *in use* until bit 39 ends up being set, which
+means when inverted it will again point into existing memory.
+
+So that's 4 terabyte of swap space (because the offset is counted in pages,
+so 30 bits of offset is 42 bits of actual coverage). With bigger physical
+addressing, that obviously grows further, until the limit of the offset is
+hit (at 50 bits of offset - 62 bits of actual swap file coverage).
+
+This is a preparatory change for the actual swap entry inversion to protect
+against L1TF.
+
+[ AK: Updated description and minor tweaks. Split into two parts ]
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+[bwh: Backported to 3.16: Bit 9 may be reserved for PAGE_BIT_NUMA here]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable_64.h | 31 ++++++++++++++++++++-----------
+ 1 file changed, 20 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -167,7 +167,7 @@ static inline int pgd_large(pgd_t pgd) {
+  *
+  * |     ...                | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
+  * |     ...                |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
+- * | OFFSET (15->63) | TYPE (10-14) | 0 |0|0|X|X| X| X|X|SD|0| <- swp entry
++ * | TYPE (59-63) |  OFFSET (10-58) | 0 |0|0|X|X| X| X|X|SD|0| <- swp entry
+  *
+  * G (8) is aliased and used as a PROT_NONE indicator for
+  * !present ptes.  We need to start storing swap entries above
+@@ -181,24 +181,33 @@ static inline int pgd_large(pgd_t pgd) {
+  * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
+  * but also L and G.
+  */
++#define SWP_TYPE_BITS		5
++
+ #ifdef CONFIG_NUMA_BALANCING
+ /* Automatic NUMA balancing needs to be distinguishable from swap entries */
+-#define SWP_TYPE_FIRST_SHIFT (_PAGE_BIT_PROTNONE + 2)
++#define SWP_OFFSET_FIRST_BIT	(_PAGE_BIT_PROTNONE + 2)
+ #else
+-#define SWP_TYPE_FIRST_SHIFT (_PAGE_BIT_PROTNONE + 1)
++#define SWP_OFFSET_FIRST_BIT	(_PAGE_BIT_PROTNONE + 1)
+ #endif
+-#define SWP_TYPE_BITS 5
+-/* Place the offset above the type: */
+-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
++
++/* We always extract/encode the offset by shifting it all the way up, and then down again */
++#define SWP_OFFSET_SHIFT	(SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS)
+ 
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+ 
+-#define __swp_type(x)			(((x).val >> (SWP_TYPE_FIRST_BIT)) \
+-					 & ((1U << SWP_TYPE_BITS) - 1))
+-#define __swp_offset(x)			((x).val >> SWP_OFFSET_FIRST_BIT)
+-#define __swp_entry(type, offset)	((swp_entry_t) { \
+-					 ((type) << (SWP_TYPE_FIRST_BIT)) \
+-					 | ((offset) << SWP_OFFSET_FIRST_BIT) })
++/* Extract the high bits for type */
++#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
++
++/* Shift up (to get rid of type), then down to get value */
++#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
++
++/*
++ * Shift the offset up "too far" by TYPE bits, then down again
++ */
++#define __swp_entry(type, offset) ((swp_entry_t) { \
++	((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
++	| ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
++
+ #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
+ #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
+ 

diff --git a/queue-3.16/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none.patch b/queue-3.16/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none.patch
new file mode 100644
index 0000000..38919fd
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none.patch

@@ -0,0 +1,274 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:27 -0700
+Subject: x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE
+ mappings
+
+commit 42e4089c7890725fcd329999252dc489b72f2921 upstream
+
+For L1TF PROT_NONE mappings are protected by inverting the PFN in the page
+table entry. This sets the high bits in the CPU's address space, thus
+making sure to point to not point an unmapped entry to valid cached memory.
+
+Some server system BIOSes put the MMIO mappings high up in the physical
+address space. If such an high mapping was mapped to unprivileged users
+they could attack low memory by setting such a mapping to PROT_NONE. This
+could happen through a special device driver which is not access
+protected. Normal /dev/mem is of course access protected.
+
+To avoid this forbid PROT_NONE mappings or mprotect for high MMIO mappings.
+
+Valid page mappings are allowed because the system is then unsafe anyways.
+
+It's not expected that users commonly use PROT_NONE on MMIO. But to
+minimize any impact this is only enforced if the mapping actually refers to
+a high MMIO address (defined as the MAX_PA-1 bit being set), and also skip
+the check for root.
+
+For mmaps this is straight forward and can be handled in vm_insert_pfn and
+in remap_pfn_range().
+
+For mprotect it's a bit trickier. At the point where the actual PTEs are
+accessed a lot of state has been changed and it would be difficult to undo
+on an error. Since this is a uncommon case use a separate early page talk
+walk pass for MMIO PROT_NONE mappings that checks for this condition
+early. For non MMIO and non PROT_NONE there are no changes.
+
+[dwmw2: Backport to 4.9]
+[groeck: Backport to 4.4]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable.h |  8 ++++++
+ arch/x86/mm/mmap.c             | 21 +++++++++++++++
+ include/asm-generic/pgtable.h  | 12 +++++++++
+ mm/memory.c                    | 29 +++++++++++++++-----
+ mm/mprotect.c                  | 49 ++++++++++++++++++++++++++++++++++
+ 5 files changed, 112 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -940,6 +940,14 @@ static inline pte_t pte_swp_clear_soft_d
+ }
+ #endif
+ 
++#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
++extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);
++
++static inline bool arch_has_pfn_modify_check(void)
++{
++	return boot_cpu_has_bug(X86_BUG_L1TF);
++}
++
+ #include <asm-generic/pgtable.h>
+ #endif	/* __ASSEMBLY__ */
+ 
+--- a/arch/x86/mm/mmap.c
++++ b/arch/x86/mm/mmap.c
+@@ -114,3 +114,24 @@ void arch_pick_mmap_layout(struct mm_str
+ 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ 	}
+ }
++
++/*
++ * Only allow root to set high MMIO mappings to PROT_NONE.
++ * This prevents an unpriv. user to set them to PROT_NONE and invert
++ * them, then pointing to valid memory for L1TF speculation.
++ *
++ * Note: for locked down kernels may want to disable the root override.
++ */
++bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
++{
++	if (!boot_cpu_has_bug(X86_BUG_L1TF))
++		return true;
++	if (!__pte_needs_invert(pgprot_val(prot)))
++		return true;
++	/* If it's real memory always allow */
++	if (pfn_valid(pfn))
++		return true;
++	if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
++		return false;
++	return true;
++}
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -812,4 +812,16 @@ static inline void pmdp_set_numa(struct
+ #define io_remap_pfn_range remap_pfn_range
+ #endif
+ 
++#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
++static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
++{
++	return true;
++}
++
++static inline bool arch_has_pfn_modify_check(void)
++{
++	return false;
++}
++#endif
++
+ #endif /* _ASM_GENERIC_PGTABLE_H */
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1614,6 +1614,9 @@ int vm_insert_pfn_prot(struct vm_area_st
+ 	if (track_pfn_insert(vma, &pgprot, pfn))
+ 		return -EINVAL;
+ 
++	if (!pfn_modify_allowed(pfn, pgprot))
++		return -EACCES;
++
+ 	ret = insert_pfn(vma, addr, pfn, pgprot);
+ 
+ 	return ret;
+@@ -1632,6 +1635,9 @@ int vm_insert_mixed(struct vm_area_struc
+ 	if (track_pfn_insert(vma, &pgprot, pfn))
+ 		return -EINVAL;
+ 
++	if (!pfn_modify_allowed(pfn, pgprot))
++		return -EACCES;
++
+ 	/*
+ 	 * If we don't have pte special, then we have to use the pfn_valid()
+ 	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
+@@ -1660,6 +1666,7 @@ static int remap_pte_range(struct mm_str
+ {
+ 	pte_t *pte;
+ 	spinlock_t *ptl;
++	int err = 0;
+ 
+ 	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+ 	if (!pte)
+@@ -1667,12 +1674,16 @@ static int remap_pte_range(struct mm_str
+ 	arch_enter_lazy_mmu_mode();
+ 	do {
+ 		BUG_ON(!pte_none(*pte));
++		if (!pfn_modify_allowed(pfn, prot)) {
++			err = -EACCES;
++			break;
++		}
+ 		set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
+ 		pfn++;
+ 	} while (pte++, addr += PAGE_SIZE, addr != end);
+ 	arch_leave_lazy_mmu_mode();
+ 	pte_unmap_unlock(pte - 1, ptl);
+-	return 0;
++	return err;
+ }
+ 
+ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+@@ -1681,6 +1692,7 @@ static inline int remap_pmd_range(struct
+ {
+ 	pmd_t *pmd;
+ 	unsigned long next;
++	int err;
+ 
+ 	pfn -= addr >> PAGE_SHIFT;
+ 	pmd = pmd_alloc(mm, pud, addr);
+@@ -1689,9 +1701,10 @@ static inline int remap_pmd_range(struct
+ 	VM_BUG_ON(pmd_trans_huge(*pmd));
+ 	do {
+ 		next = pmd_addr_end(addr, end);
+-		if (remap_pte_range(mm, pmd, addr, next,
+-				pfn + (addr >> PAGE_SHIFT), prot))
+-			return -ENOMEM;
++		err = remap_pte_range(mm, pmd, addr, next,
++				pfn + (addr >> PAGE_SHIFT), prot);
++		if (err)
++			return err;
+ 	} while (pmd++, addr = next, addr != end);
+ 	return 0;
+ }
+@@ -1702,6 +1715,7 @@ static inline int remap_pud_range(struct
+ {
+ 	pud_t *pud;
+ 	unsigned long next;
++	int err;
+ 
+ 	pfn -= addr >> PAGE_SHIFT;
+ 	pud = pud_alloc(mm, pgd, addr);
+@@ -1709,9 +1723,10 @@ static inline int remap_pud_range(struct
+ 		return -ENOMEM;
+ 	do {
+ 		next = pud_addr_end(addr, end);
+-		if (remap_pmd_range(mm, pud, addr, next,
+-				pfn + (addr >> PAGE_SHIFT), prot))
+-			return -ENOMEM;
++		err = remap_pmd_range(mm, pud, addr, next,
++				pfn + (addr >> PAGE_SHIFT), prot);
++		if (err)
++			return err;
+ 	} while (pud++, addr = next, addr != end);
+ 	return 0;
+ }
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -258,6 +258,42 @@ unsigned long change_protection(struct v
+ 	return pages;
+ }
+ 
++static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
++			       unsigned long next, struct mm_walk *walk)
++{
++	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
++		0 : -EACCES;
++}
++
++static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
++				   unsigned long addr, unsigned long next,
++				   struct mm_walk *walk)
++{
++	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
++		0 : -EACCES;
++}
++
++static int prot_none_test(unsigned long addr, unsigned long next,
++			  struct mm_walk *walk)
++{
++	return 0;
++}
++
++static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
++			   unsigned long end, unsigned long newflags)
++{
++	pgprot_t new_pgprot = vm_get_page_prot(newflags);
++	struct mm_walk prot_none_walk = {
++		.pte_entry = prot_none_pte_entry,
++		.hugetlb_entry = prot_none_hugetlb_entry,
++		.test_walk = prot_none_test,
++		.mm = current->mm,
++		.private = &new_pgprot,
++	};
++
++	return walk_page_range(start, end, &prot_none_walk);
++}
++
+ int
+ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
+ 	unsigned long start, unsigned long end, unsigned long newflags)
+@@ -276,6 +312,19 @@ mprotect_fixup(struct vm_area_struct *vm
+ 	}
+ 
+ 	/*
++	 * Do PROT_NONE PFN permission checks here when we can still
++	 * bail out without undoing a lot of state. This is a rather
++	 * uncommon case, so doesn't need to be very optimized.
++	 */
++	if (arch_has_pfn_modify_check() &&
++	    (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
++	    (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
++		error = prot_none_walk(vma, start, end, newflags);
++		if (error)
++			return error;
++	}
++
++	/*
+ 	 * If we make a private mapping writable we increase our commit;
+ 	 * but (without finer accounting) cannot reduce our commit if we
+ 	 * make it unwritable again. hugetlb mapping were accounted for

diff --git a/queue-3.16/x86-speculation-l1tf-exempt-zeroed-ptes-from-inversion.patch b/queue-3.16/x86-speculation-l1tf-exempt-zeroed-ptes-from-inversion.patch
new file mode 100644
index 0000000..0fdd958
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-exempt-zeroed-ptes-from-inversion.patch

@@ -0,0 +1,69 @@
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+Date: Fri, 17 Aug 2018 10:27:36 -0700
+Subject: x86/speculation/l1tf: Exempt zeroed PTEs from inversion
+
+commit f19f5c49bbc3ffcc9126cc245fc1b24cc29f4a37 upstream.
+
+It turns out that we should *not* invert all not-present mappings,
+because the all zeroes case is obviously special.
+
+clear_page() does not undergo the XOR logic to invert the address bits,
+i.e. PTE, PMD and PUD entries that have not been individually written
+will have val=0 and so will trigger __pte_needs_invert(). As a result,
+{pte,pmd,pud}_pfn() will return the wrong PFN value, i.e. all ones
+(adjusted by the max PFN mask) instead of zero. A zeroed entry is ok
+because the page at physical address 0 is reserved early in boot
+specifically to mitigate L1TF, so explicitly exempt them from the
+inversion when reading the PFN.
+
+Manifested as an unexpected mprotect(..., PROT_NONE) failure when called
+on a VMA that has VM_PFNMAP and was mmap'd to as something other than
+PROT_NONE but never used. mprotect() sends the PROT_NONE request down
+prot_none_walk(), which walks the PTEs to check the PFNs.
+prot_none_pte_entry() gets the bogus PFN from pte_pfn() and returns
+-EACCES because it thinks mprotect() is trying to adjust a high MMIO
+address.
+
+[ This is a very modified version of Sean's original patch, but all
+  credit goes to Sean for doing this and also pointing out that
+  sometimes the __pte_needs_invert() function only gets the protection
+  bits, not the full eventual pte.  But zero remains special even in
+  just protection bits, so that's ok.   - Linus ]
+
+Fixes: f22cc87f6c1f ("x86/speculation/l1tf: Invert all not present mappings")
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Acked-by: Andi Kleen <ak@linux.intel.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable-invert.h | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/pgtable-invert.h
++++ b/arch/x86/include/asm/pgtable-invert.h
+@@ -4,9 +4,18 @@
+ 
+ #ifndef __ASSEMBLY__
+ 
++/*
++ * A clear pte value is special, and doesn't get inverted.
++ *
++ * Note that even users that only pass a pgprot_t (rather
++ * than a full pte) won't trigger the special zero case,
++ * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED
++ * set. So the all zero case really is limited to just the
++ * cleared page table entry case.
++ */
+ static inline bool __pte_needs_invert(u64 val)
+ {
+-	return !(val & _PAGE_PRESENT);
++	return val && !(val & _PAGE_PRESENT);
+ }
+ 
+ /* Get a mask to xor with the page table entry to get the correct pfn. */

diff --git a/queue-3.16/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch b/queue-3.16/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch
new file mode 100644
index 0000000..96858d1
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch

@@ -0,0 +1,42 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 21 Jun 2018 12:36:29 +0200
+Subject: x86/speculation/l1tf: Extend 64bit swap file size limit
+
+commit 1a7ed1ba4bba6c075d5ad61bb75e3fbc870840d6 upstream.
+
+The previous patch has limited swap file size so that large offsets cannot
+clear bits above MAX_PA/2 in the pte and interfere with L1TF mitigation.
+
+It assumed that offsets are encoded starting with bit 12, same as pfn. But
+on x86_64, offsets are encoded starting with bit 9.
+
+Thus the limit can be raised by 3 bits. That means 16TB with 42bit MAX_PA
+and 256TB with 46bit MAX_PA.
+
+Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/mm/init.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -710,7 +710,15 @@ unsigned long max_swapfile_size(void)
+ 
+ 	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+ 		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+-		pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
++		unsigned long l1tf_limit = l1tf_pfn_limit() + 1;
++		/*
++		 * We encode swap offsets also with 3 bits below those for pfn
++		 * which makes the usable limit higher.
++		 */
++#ifdef CONFIG_X86_64
++		l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
++#endif
++		pages = min_t(unsigned long, l1tf_limit, pages);
+ 	}
+ 	return pages;
+ }

diff --git a/queue-3.16/x86-speculation-l1tf-fix-off-by-one-error-when-warning-that-system.patch b/queue-3.16/x86-speculation-l1tf-fix-off-by-one-error-when-warning-that-system.patch
new file mode 100644
index 0000000..0b76a3f
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-fix-off-by-one-error-when-warning-that-system.patch

@@ -0,0 +1,78 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 23 Aug 2018 15:44:18 +0200
+Subject: x86/speculation/l1tf: Fix off-by-one error when warning that system
+ has too much RAM
+
+commit b0a182f875689647b014bc01d36b340217792852 upstream.
+
+Two users have reported [1] that they have an "extremely unlikely" system
+with more than MAX_PA/2 memory and L1TF mitigation is not effective. In
+fact it's a CPU with 36bits phys limit (64GB) and 32GB memory, but due to
+holes in the e820 map, the main region is almost 500MB over the 32GB limit:
+
+[    0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000081effffff] usable
+
+Suggestions to use 'mem=32G' to enable the L1TF mitigation while losing the
+500MB revealed, that there's an off-by-one error in the check in
+l1tf_select_mitigation().
+
+l1tf_pfn_limit() returns the last usable pfn (inclusive) and the range
+check in the mitigation path does not take this into account.
+
+Instead of amending the range check, make l1tf_pfn_limit() return the first
+PFN which is over the limit which is less error prone. Adjust the other
+users accordingly.
+
+[1] https://bugzilla.suse.com/show_bug.cgi?id=1105536
+
+Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
+Reported-by: George Anchev <studio@anchev.net>
+Reported-by: Christopher Snowhill <kode54@gmail.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: "H . Peter Anvin" <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Link: https://lkml.kernel.org/r/20180823134418.17008-1-vbabka@suse.cz
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/processor.h | 2 +-
+ arch/x86/mm/init.c               | 2 +-
+ arch/x86/mm/mmap.c               | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -167,7 +167,7 @@ extern void fpu_detect(struct cpuinfo_x8
+ 
+ static inline unsigned long long l1tf_pfn_limit(void)
+ {
+-	return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
++	return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT);
+ }
+ 
+ extern void early_cpu_init(void);
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -710,7 +710,7 @@ unsigned long max_swapfile_size(void)
+ 
+ 	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+ 		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+-		unsigned long long l1tf_limit = l1tf_pfn_limit() + 1;
++		unsigned long long l1tf_limit = l1tf_pfn_limit();
+ 		/*
+ 		 * We encode swap offsets also with 3 bits below those for pfn
+ 		 * which makes the usable limit higher.
+--- a/arch/x86/mm/mmap.c
++++ b/arch/x86/mm/mmap.c
+@@ -131,7 +131,7 @@ bool pfn_modify_allowed(unsigned long pf
+ 	/* If it's real memory always allow */
+ 	if (pfn_valid(pfn))
+ 		return true;
+-	if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
++	if (pfn >= l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
+ 		return false;
+ 	return true;
+ }

diff --git a/queue-3.16/x86-speculation-l1tf-fix-overflow-in-l1tf_pfn_limit-on-32bit.patch b/queue-3.16/x86-speculation-l1tf-fix-overflow-in-l1tf_pfn_limit-on-32bit.patch
new file mode 100644
index 0000000..9fbc2d1
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-fix-overflow-in-l1tf_pfn_limit-on-32bit.patch

@@ -0,0 +1,70 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Mon, 20 Aug 2018 11:58:35 +0200
+Subject: x86/speculation/l1tf: Fix overflow in l1tf_pfn_limit() on 32bit
+
+commit 9df9516940a61d29aedf4d91b483ca6597e7d480 upstream.
+
+On 32bit PAE kernels on 64bit hardware with enough physical bits,
+l1tf_pfn_limit() will overflow unsigned long. This in turn affects
+max_swapfile_size() and can lead to swapon returning -EINVAL. This has been
+observed in a 32bit guest with 42 bits physical address size, where
+max_swapfile_size() overflows exactly to 1 << 32, thus zero, and produces
+the following warning to dmesg:
+
+[    6.396845] Truncating oversized swap area, only using 0k out of 2047996k
+
+Fix this by using unsigned long long instead.
+
+Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
+Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
+Reported-by: Dominique Leuenberger <dimstar@suse.de>
+Reported-by: Adrian Schroeter <adrian@suse.de>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Andi Kleen <ak@linux.intel.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: "H . Peter Anvin" <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Link: https://lkml.kernel.org/r/20180820095835.5298-1-vbabka@suse.cz
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/processor.h | 4 ++--
+ arch/x86/mm/init.c               | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -165,9 +165,9 @@ extern const struct seq_operations cpuin
+ extern void cpu_detect(struct cpuinfo_x86 *c);
+ extern void fpu_detect(struct cpuinfo_x86 *c);
+ 
+-static inline unsigned long l1tf_pfn_limit(void)
++static inline unsigned long long l1tf_pfn_limit(void)
+ {
+-	return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
++	return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
+ }
+ 
+ extern void early_cpu_init(void);
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -710,7 +710,7 @@ unsigned long max_swapfile_size(void)
+ 
+ 	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+ 		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+-		unsigned long l1tf_limit = l1tf_pfn_limit() + 1;
++		unsigned long long l1tf_limit = l1tf_pfn_limit() + 1;
+ 		/*
+ 		 * We encode swap offsets also with 3 bits below those for pfn
+ 		 * which makes the usable limit higher.
+@@ -718,7 +718,7 @@ unsigned long max_swapfile_size(void)
+ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+ 		l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
+ #endif
+-		pages = min_t(unsigned long, l1tf_limit, pages);
++		pages = min_t(unsigned long long, l1tf_limit, pages);
+ 	}
+ 	return pages;
+ }

diff --git a/queue-3.16/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch b/queue-3.16/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch
new file mode 100644
index 0000000..9685834
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch

@@ -0,0 +1,81 @@
+From: Michal Hocko <mhocko@suse.cz>
+Date: Wed, 27 Jun 2018 17:46:50 +0200
+Subject: x86/speculation/l1tf: Fix up pte->pfn conversion for PAE
+
+commit e14d7dfb41f5807a0c1c26a13f2b8ef16af24935 upstream
+
+Jan has noticed that pte_pfn and co. resp. pfn_pte are incorrect for
+CONFIG_PAE because phys_addr_t is wider than unsigned long and so the
+pte_val reps. shift left would get truncated. Fix this up by using proper
+types.
+
+[dwmw2: Backport to 4.9]
+
+Fixes: 6b28baca9b1f ("x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation")
+Reported-by: Jan Beulich <JBeulich@suse.com>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 3.16: Adjust context.  Also restore the fix to pfn_pud().]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable.h | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -147,21 +147,21 @@ static inline u64 protnone_mask(u64 val)
+ 
+ static inline unsigned long pte_pfn(pte_t pte)
+ {
+-	unsigned long pfn = pte_val(pte);
++	phys_addr_t pfn = pte_val(pte);
+ 	pfn ^= protnone_mask(pfn);
+ 	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+ 
+ static inline unsigned long pmd_pfn(pmd_t pmd)
+ {
+-	unsigned long pfn = pmd_val(pmd);
++	phys_addr_t pfn = pmd_val(pmd);
+ 	pfn ^= protnone_mask(pfn);
+ 	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+ 
+ static inline unsigned long pud_pfn(pud_t pud)
+ {
+-	unsigned long pfn = pud_val(pud);
++	phys_addr_t pfn = pud_val(pud);
+ 	pfn ^= protnone_mask(pfn);
+ 	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+@@ -371,7 +371,7 @@ static inline pgprotval_t massage_pgprot
+ 
+ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+ {
+-	phys_addr_t pfn = page_nr << PAGE_SHIFT;
++	phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+ 	pfn ^= protnone_mask(pgprot_val(pgprot));
+ 	pfn &= PTE_PFN_MASK;
+ 	return __pte(pfn | massage_pgprot(pgprot));
+@@ -379,7 +379,7 @@ static inline pte_t pfn_pte(unsigned lon
+ 
+ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+ {
+-	phys_addr_t pfn = page_nr << PAGE_SHIFT;
++	phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+ 	pfn ^= protnone_mask(pgprot_val(pgprot));
+ 	pfn &= PTE_PFN_MASK;
+ 	return __pmd(pfn | massage_pgprot(pgprot));
+@@ -387,7 +387,7 @@ static inline pmd_t pfn_pmd(unsigned lon
+ 
+ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
+ {
+-	phys_addr_t pfn = page_nr << PAGE_SHIFT;
++	phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+ 	pfn ^= protnone_mask(pgprot_val(pgprot));
+ 	pfn &= PTE_PFN_MASK;
+ 	return __pud(pfn | massage_pgprot(pgprot));

diff --git a/queue-3.16/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch b/queue-3.16/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch
new file mode 100644
index 0000000..ef156ed
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch

@@ -0,0 +1,77 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:21 -0700
+Subject: x86/speculation/l1tf: Increase 32bit PAE __PHYSICAL_PAGE_SHIFT
+
+commit 50896e180c6aa3a9c61a26ced99e15d602666a4c upstream.
+
+L1 Terminal Fault (L1TF) is a speculation related vulnerability. The CPU
+speculates on PTE entries which do not have the PRESENT bit set, if the
+content of the resulting physical address is available in the L1D cache.
+
+The OS side mitigation makes sure that a !PRESENT PTE entry points to a
+physical address outside the actually existing and cachable memory
+space. This is achieved by inverting the upper bits of the PTE. Due to the
+address space limitations this only works for 64bit and 32bit PAE kernels,
+but not for 32bit non PAE.
+
+This mitigation applies to both host and guest kernels, but in case of a
+64bit host (hypervisor) and a 32bit PAE guest, inverting the upper bits of
+the PAE address space (44bit) is not enough if the host has more than 43
+bits of populated memory address space, because the speculation treats the
+PTE content as a physical host address bypassing EPT.
+
+The host (hypervisor) protects itself against the guest by flushing L1D as
+needed, but pages inside the guest are not protected against attacks from
+other processes inside the same guest.
+
+For the guest the inverted PTE mask has to match the host to provide the
+full protection for all pages the host could possibly map into the
+guest. The hosts populated address space is not known to the guest, so the
+mask must cover the possible maximal host address space, i.e. 52 bit.
+
+On 32bit PAE the maximum PTE mask is currently set to 44 bit because that
+is the limit imposed by 32bit unsigned long PFNs in the VMs. This limits
+the mask to be below what the host could possible use for physical pages.
+
+The L1TF PROT_NONE protection code uses the PTE masks to determine which
+bits to invert to make sure the higher bits are set for unmapped entries to
+prevent L1TF speculation attacks against EPT inside guests.
+
+In order to invert all bits that could be used by the host, increase
+__PHYSICAL_PAGE_SHIFT to 52 to match 64bit.
+
+The real limit for a 32bit PAE kernel is still 44 bits because all Linux
+PTEs are created from unsigned long PFNs, so they cannot be higher than 44
+bits on a 32bit kernel. So these extra PFN bits should be never set. The
+only users of this macro are using it to look at PTEs, so it's safe.
+
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/page_32_types.h | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/page_32_types.h
++++ b/arch/x86/include/asm/page_32_types.h
+@@ -27,8 +27,13 @@
+ #define N_EXCEPTION_STACKS 1
+ 
+ #ifdef CONFIG_X86_PAE
+-/* 44=32+12, the limit we can fit into an unsigned long pfn */
+-#define __PHYSICAL_MASK_SHIFT	44
++/*
++ * This is beyond the 44 bit limit imposed by the 32bit long pfns,
++ * but we need the full mask to make sure inverted PROT_NONE
++ * entries have all the host bits set in a guest.
++ * The real limit is still 44 bits.
++ */
++#define __PHYSICAL_MASK_SHIFT	52
+ #define __VIRTUAL_MASK_SHIFT	32
+ 
+ #else  /* !CONFIG_X86_PAE */

diff --git a/queue-3.16/x86-speculation-l1tf-invert-all-not-present-mappings.patch b/queue-3.16/x86-speculation-l1tf-invert-all-not-present-mappings.patch
new file mode 100644
index 0000000..9a5eb67
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-invert-all-not-present-mappings.patch

@@ -0,0 +1,31 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:36 -0700
+Subject: x86/speculation/l1tf: Invert all not present mappings
+
+commit f22cc87f6c1f771b57c407555cfefd811cdd9507 upstream.
+
+For kernel mappings PAGE_PROTNONE is not necessarily set for a non present
+mapping, but the inversion logic explicitely checks for !PRESENT and
+PROT_NONE.
+
+Remove the PROT_NONE check and make the inversion unconditional for all not
+present mappings.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable-invert.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/pgtable-invert.h
++++ b/arch/x86/include/asm/pgtable-invert.h
+@@ -6,7 +6,7 @@
+ 
+ static inline bool __pte_needs_invert(u64 val)
+ {
+-	return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
++	return !(val & _PAGE_PRESENT);
+ }
+ 
+ /* Get a mask to xor with the page table entry to get the correct pfn. */

diff --git a/queue-3.16/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch b/queue-3.16/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch
new file mode 100644
index 0000000..3f61b90
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch

@@ -0,0 +1,128 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:28 -0700
+Subject: x86/speculation/l1tf: Limit swap file size to MAX_PA/2
+
+commit 377eeaa8e11fe815b1d07c81c4a0e2843a8c15eb upstream.
+
+For the L1TF workaround its necessary to limit the swap file size to below
+MAX_PA/2, so that the higher bits of the swap offset inverted never point
+to valid memory.
+
+Add a mechanism for the architecture to override the swap file size check
+in swapfile.c and add a x86 specific max swapfile check function that
+enforces that limit.
+
+The check is only enabled if the CPU is vulnerable to L1TF.
+
+In VMs with 42bit MAX_PA the typical limit is 2TB now, on a native system
+with 46bit PA it is 32TB. The limit is only per individual swap file, so
+it's always possible to exceed these limits with multiple swap files or
+partitions.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -4,6 +4,8 @@
+ #include <linux/swap.h>
+ #include <linux/memblock.h>
+ #include <linux/bootmem.h>	/* for max_low_pfn */
++#include <linux/swapfile.h>
++#include <linux/swapops.h>
+ 
+ #include <asm/cacheflush.h>
+ #include <asm/e820.h>
+@@ -699,3 +701,15 @@ void __init zone_sizes_init(void)
+ 	free_area_init_nodes(max_zone_pfns);
+ }
+ 
++unsigned long max_swapfile_size(void)
++{
++	unsigned long pages;
++
++	pages = generic_max_swapfile_size();
++
++	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
++		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
++		pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
++	}
++	return pages;
++}
+--- a/include/linux/swapfile.h
++++ b/include/linux/swapfile.h
+@@ -9,5 +9,7 @@ extern spinlock_t swap_lock;
+ extern struct plist_head swap_active_head;
+ extern struct swap_info_struct *swap_info[];
+ extern int try_to_unuse(unsigned int, bool, unsigned long);
++extern unsigned long generic_max_swapfile_size(void);
++extern unsigned long max_swapfile_size(void);
+ 
+ #endif /* _LINUX_SWAPFILE_H */
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -2166,6 +2166,35 @@ static int claim_swapfile(struct swap_in
+ 	return 0;
+ }
+ 
++
++/*
++ * Find out how many pages are allowed for a single swap device. There
++ * are two limiting factors:
++ * 1) the number of bits for the swap offset in the swp_entry_t type, and
++ * 2) the number of bits in the swap pte, as defined by the different
++ * architectures.
++ *
++ * In order to find the largest possible bit mask, a swap entry with
++ * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
++ * decoded to a swp_entry_t again, and finally the swap offset is
++ * extracted.
++ *
++ * This will mask all the bits from the initial ~0UL mask that can't
++ * be encoded in either the swp_entry_t or the architecture definition
++ * of a swap pte.
++ */
++unsigned long generic_max_swapfile_size(void)
++{
++	return swp_offset(pte_to_swp_entry(
++			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
++}
++
++/* Can be overridden by an architecture for additional checks. */
++__weak unsigned long max_swapfile_size(void)
++{
++	return generic_max_swapfile_size();
++}
++
+ static unsigned long read_swap_header(struct swap_info_struct *p,
+ 					union swap_header *swap_header,
+ 					struct inode *inode)
+@@ -2201,22 +2230,7 @@ static unsigned long read_swap_header(st
+ 	p->cluster_next = 1;
+ 	p->cluster_nr = 0;
+ 
+-	/*
+-	 * Find out how many pages are allowed for a single swap
+-	 * device. There are two limiting factors: 1) the number
+-	 * of bits for the swap offset in the swp_entry_t type, and
+-	 * 2) the number of bits in the swap pte as defined by the
+-	 * different architectures. In order to find the
+-	 * largest possible bit mask, a swap entry with swap type 0
+-	 * and swap offset ~0UL is created, encoded to a swap pte,
+-	 * decoded to a swp_entry_t again, and finally the swap
+-	 * offset is extracted. This will mask all the bits from
+-	 * the initial ~0UL mask that can't be encoded in either
+-	 * the swp_entry_t or the architecture definition of a
+-	 * swap pte.
+-	 */
+-	maxpages = swp_offset(pte_to_swp_entry(
+-			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
++	maxpages = max_swapfile_size();
+ 	last_page = swap_header->info.last_page;
+ 	if (last_page > maxpages) {
+ 		pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",

diff --git a/queue-3.16/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch b/queue-3.16/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch
new file mode 100644
index 0000000..744870c
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch

@@ -0,0 +1,55 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:37 -0700
+Subject: x86/speculation/l1tf: Make pmd/pud_mknotpresent() invert
+
+commit 0768f91530ff46683e0b372df14fd79fe8d156e5 upstream.
+
+Some cases in THP like:
+  - MADV_FREE
+  - mprotect
+  - split
+
+mark the PMD non present for temporarily to prevent races. The window for
+an L1TF attack in these contexts is very small, but it wants to be fixed
+for correctness sake.
+
+Use the proper low level functions for pmd/pud_mknotpresent() to address
+this.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 3.16:
+ - Drop change to pud_mknotpresent()
+ - pmd_mknotpresent() does not touch _PAGE_NONE]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable.h | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -308,11 +308,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pm
+ 	return pmd_set_flags(pmd, _PAGE_RW);
+ }
+ 
+-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+-{
+-	return pmd_clear_flags(pmd, _PAGE_PRESENT);
+-}
+-
+ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+ {
+ 	pudval_t v = native_pud_val(pud);
+@@ -393,6 +388,12 @@ static inline pud_t pfn_pud(unsigned lon
+ 	return __pud(pfn | massage_pgprot(pgprot));
+ }
+ 
++static inline pmd_t pmd_mknotpresent(pmd_t pmd)
++{
++	return pfn_pmd(pmd_pfn(pmd),
++		       __pgprot(pmd_flags(pmd) & ~_PAGE_PRESENT));
++}
++
+ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
+ 
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)

diff --git a/queue-3.16/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch b/queue-3.16/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch
new file mode 100644
index 0000000..d5f661e
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch

@@ -0,0 +1,39 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:25 -0700
+Subject: x86/speculation/l1tf: Make sure the first page is always reserved
+
+commit 10a70416e1f067f6c4efda6ffd8ea96002ac4223 upstream.
+
+The L1TF workaround doesn't make any attempt to mitigate speculate accesses
+to the first physical page for zeroed PTEs. Normally it only contains some
+data from the early real mode BIOS.
+
+It's not entirely clear that the first page is reserved in all
+configurations, so add an extra reservation call to make sure it is really
+reserved. In most configurations (e.g.  with the standard reservations)
+it's likely a nop.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/kernel/setup.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -860,6 +860,12 @@ void __init setup_arch(char **cmdline_p)
+ 	memblock_reserve(__pa_symbol(_text),
+ 			 (unsigned long)__bss_stop - (unsigned long)_text);
+ 
++	/*
++	 * Make sure page 0 is always reserved because on systems with
++	 * L1TF its contents can be leaked to user processes.
++	 */
++	memblock_reserve(0, PAGE_SIZE);
++
+ 	early_reserve_initrd();
+ 
+ 	/*

diff --git a/queue-3.16/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch b/queue-3.16/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch
new file mode 100644
index 0000000..580794a
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch

@@ -0,0 +1,88 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Fri, 22 Jun 2018 17:39:33 +0200
+Subject: x86/speculation/l1tf: Protect PAE swap entries against L1TF
+
+commit 0d0f6249058834ffe1ceaad0bb31464af66f6e7a upstream.
+
+The PAE 3-level paging code currently doesn't mitigate L1TF by flipping the
+offset bits, and uses the high PTE word, thus bits 32-36 for type, 37-63 for
+offset. The lower word is zeroed, thus systems with less than 4GB memory are
+safe. With 4GB to 128GB the swap type selects the memory locations vulnerable
+to L1TF; with even more memory, also the swap offfset influences the address.
+This might be a problem with 32bit PAE guests running on large 64bit hosts.
+
+By continuing to keep the whole swap entry in either high or low 32bit word of
+PTE we would limit the swap size too much. Thus this patch uses the whole PAE
+PTE with the same layout as the 64bit version does. The macros just become a
+bit tricky since they assume the arch-dependent swp_entry_t to be 32bit.
+
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+[bwh: Backported to 3.16: CONFIG_PGTABLE_LEVELS is not defined; use other
+ config symbols in the condition.]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable-3level.h | 35 +++++++++++++++++++++++++--
+ arch/x86/mm/init.c                    |  2 +-
+ 2 files changed, 34 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable-3level.h
++++ b/arch/x86/include/asm/pgtable-3level.h
+@@ -177,12 +177,43 @@ static inline pmd_t native_pmdp_get_and_
+ #endif
+ 
+ /* Encode and de-code a swap entry */
++#define SWP_TYPE_BITS		5
++
++#define SWP_OFFSET_FIRST_BIT	(_PAGE_BIT_PROTNONE + 1)
++
++/* We always extract/encode the offset by shifting it all the way up, and then down again */
++#define SWP_OFFSET_SHIFT	(SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
++
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
+ #define __swp_type(x)			(((x).val) & 0x1f)
+ #define __swp_offset(x)			((x).val >> 5)
+ #define __swp_entry(type, offset)	((swp_entry_t){(type) | (offset) << 5})
+-#define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
+-#define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
++
++/*
++ * Normally, __swp_entry() converts from arch-independent swp_entry_t to
++ * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result
++ * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the
++ * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to
++ * __swp_entry_to_pte() through the following helper macro based on 64bit
++ * __swp_entry().
++ */
++#define __swp_pteval_entry(type, offset) ((pteval_t) { \
++	(~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
++	| ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) })
++
++#define __swp_entry_to_pte(x)	((pte_t){ .pte = \
++		__swp_pteval_entry(__swp_type(x), __swp_offset(x)) })
++/*
++ * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent
++ * swp_entry_t, but also has to convert it from 64bit to the 32bit
++ * intermediate representation, using the following macros based on 64bit
++ * __swp_type() and __swp_offset().
++ */
++#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS)))
++#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT))
++
++#define __pte_to_swp_entry(pte)	(__swp_entry(__pteval_swp_type(pte), \
++					     __pteval_swp_offset(pte)))
+ 
+ #include <asm/pgtable-invert.h>
+ 
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -715,7 +715,7 @@ unsigned long max_swapfile_size(void)
+ 		 * We encode swap offsets also with 3 bits below those for pfn
+ 		 * which makes the usable limit higher.
+ 		 */
+-#ifdef CONFIG_X86_64
++#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+ 		l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
+ #endif
+ 		pages = min_t(unsigned long, l1tf_limit, pages);

diff --git a/queue-3.16/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch b/queue-3.16/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch
new file mode 100644
index 0000000..23a73c6
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch

@@ -0,0 +1,254 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:24 -0700
+Subject: x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation
+
+commit 6b28baca9b1f0d4a42b865da7a05b1c81424bd5c upstream.
+
+When PTEs are set to PROT_NONE the kernel just clears the Present bit and
+preserves the PFN, which creates attack surface for L1TF speculation
+speculation attacks.
+
+This is important inside guests, because L1TF speculation bypasses physical
+page remapping. While the host has its own migitations preventing leaking
+data from other VMs into the guest, this would still risk leaking the wrong
+page inside the current guest.
+
+This uses the same technique as Linus' swap entry patch: while an entry is
+is in PROTNONE state invert the complete PFN part part of it. This ensures
+that the the highest bit will point to non existing memory.
+
+The invert is done by pte/pmd_modify and pfn/pmd/pud_pte for PROTNONE and
+pte/pmd/pud_pfn undo it.
+
+This assume that no code path touches the PFN part of a PTE directly
+without using these primitives.
+
+This doesn't handle the case that MMIO is on the top of the CPU physical
+memory. If such an MMIO region was exposed by an unpriviledged driver for
+mmap it would be possible to attack some real memory.  However this
+situation is all rather unlikely.
+
+For 32bit non PAE the inversion is not done because there are really not
+enough bits to protect anything.
+
+Q: Why does the guest need to be protected when the HyperVisor already has
+   L1TF mitigations?
+
+A: Here's an example:
+
+   Physical pages 1 2 get mapped into a guest as
+   GPA 1 -> PA 2
+   GPA 2 -> PA 1
+   through EPT.
+
+   The L1TF speculation ignores the EPT remapping.
+
+   Now the guest kernel maps GPA 1 to process A and GPA 2 to process B, and
+   they belong to different users and should be isolated.
+
+   A sets the GPA 1 PA 2 PTE to PROT_NONE to bypass the EPT remapping and
+   gets read access to the underlying physical page. Which in this case
+   points to PA 2, so it can read process B's data, if it happened to be in
+   L1, so isolation inside the guest is broken.
+
+   There's nothing the hypervisor can do about this. This mitigation has to
+   be done in the guest itself.
+
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+[bwh: Backported to 3.16:
+ - s/check_pgprot/massage_pgprot/
+ - Keep using PTE_PFN_MASK to extract PFN from pmd_pfn() and pud_pfn(),
+   as we don't need to worry about the PAT bit being set here]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable-2level.h | 17 +++++++++++
+ arch/x86/include/asm/pgtable-3level.h |  2 ++
+ arch/x86/include/asm/pgtable-invert.h | 32 +++++++++++++++++++
+ arch/x86/include/asm/pgtable.h        | 44 +++++++++++++++++++--------
+ arch/x86/include/asm/pgtable_64.h     |  2 ++
+ 5 files changed, 84 insertions(+), 13 deletions(-)
+ create mode 100644 arch/x86/include/asm/pgtable-invert.h
+
+--- a/arch/x86/include/asm/pgtable-2level.h
++++ b/arch/x86/include/asm/pgtable-2level.h
+@@ -77,4 +77,21 @@ static inline unsigned long pte_bitop(un
+ #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
+ #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
+ 
++/* No inverted PFNs on 2 level page tables */
++
++static inline u64 protnone_mask(u64 val)
++{
++	return 0;
++}
++
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
++{
++	return val;
++}
++
++static inline bool __pte_needs_invert(u64 val)
++{
++	return false;
++}
++
+ #endif /* _ASM_X86_PGTABLE_2LEVEL_H */
+--- a/arch/x86/include/asm/pgtable-3level.h
++++ b/arch/x86/include/asm/pgtable-3level.h
+@@ -184,4 +184,6 @@ static inline pmd_t native_pmdp_get_and_
+ #define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
+ #define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
+ 
++#include <asm/pgtable-invert.h>
++
+ #endif /* _ASM_X86_PGTABLE_3LEVEL_H */
+--- /dev/null
++++ b/arch/x86/include/asm/pgtable-invert.h
+@@ -0,0 +1,32 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_PGTABLE_INVERT_H
++#define _ASM_PGTABLE_INVERT_H 1
++
++#ifndef __ASSEMBLY__
++
++static inline bool __pte_needs_invert(u64 val)
++{
++	return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
++}
++
++/* Get a mask to xor with the page table entry to get the correct pfn. */
++static inline u64 protnone_mask(u64 val)
++{
++	return __pte_needs_invert(val) ?  ~0ull : 0;
++}
++
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
++{
++	/*
++	 * When a PTE transitions from NONE to !NONE or vice-versa
++	 * invert the PFN part to stop speculation.
++	 * pte_pfn undoes this when needed.
++	 */
++	if (__pte_needs_invert(oldval) != __pte_needs_invert(val))
++		val = (val & ~mask) | (~val & mask);
++	return val;
++}
++
++#endif /* __ASSEMBLY__ */
++
++#endif
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -141,19 +141,29 @@ static inline int pte_special(pte_t pte)
+ 		(pte_flags(pte) & (_PAGE_PRESENT|_PAGE_PROTNONE));
+ }
+ 
++/* Entries that were set to PROT_NONE are inverted */
++
++static inline u64 protnone_mask(u64 val);
++
+ static inline unsigned long pte_pfn(pte_t pte)
+ {
+-	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
++	unsigned long pfn = pte_val(pte);
++	pfn ^= protnone_mask(pfn);
++	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+ 
+ static inline unsigned long pmd_pfn(pmd_t pmd)
+ {
+-	return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
++	unsigned long pfn = pmd_val(pmd);
++	pfn ^= protnone_mask(pfn);
++	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+ 
+ static inline unsigned long pud_pfn(pud_t pud)
+ {
+-	return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
++	unsigned long pfn = pud_val(pud);
++	pfn ^= protnone_mask(pfn);
++	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+ 
+ #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
+@@ -361,25 +371,33 @@ static inline pgprotval_t massage_pgprot
+ 
+ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+ {
+-	return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
+-		     massage_pgprot(pgprot));
++	phys_addr_t pfn = page_nr << PAGE_SHIFT;
++	pfn ^= protnone_mask(pgprot_val(pgprot));
++	pfn &= PTE_PFN_MASK;
++	return __pte(pfn | massage_pgprot(pgprot));
+ }
+ 
+ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+ {
+-	return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
+-		     massage_pgprot(pgprot));
++	phys_addr_t pfn = page_nr << PAGE_SHIFT;
++	pfn ^= protnone_mask(pgprot_val(pgprot));
++	pfn &= PTE_PFN_MASK;
++	return __pmd(pfn | massage_pgprot(pgprot));
+ }
+ 
+ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
+ {
+-	return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
+-		     massage_pgprot(pgprot));
++	phys_addr_t pfn = page_nr << PAGE_SHIFT;
++	pfn ^= protnone_mask(pgprot_val(pgprot));
++	pfn &= PTE_PFN_MASK;
++	return __pud(pfn | massage_pgprot(pgprot));
+ }
+ 
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
++
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
+-	pteval_t val = pte_val(pte);
++	pteval_t val = pte_val(pte), oldval = val;
+ 
+ 	/*
+ 	 * Chop off the NX bit (if present), and add the NX portion of
+@@ -387,17 +405,17 @@ static inline pte_t pte_modify(pte_t pte
+ 	 */
+ 	val &= _PAGE_CHG_MASK;
+ 	val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK;
+-
++	val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
+ 	return __pte(val);
+ }
+ 
+ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+ {
+-	pmdval_t val = pmd_val(pmd);
++	pmdval_t val = pmd_val(pmd), oldval = val;
+ 
+ 	val &= _HPAGE_CHG_MASK;
+ 	val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+-
++	val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
+ 	return __pmd(val);
+ }
+ 
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -239,6 +239,8 @@ extern void cleanup_highmap(void);
+ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
+ extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
+ 
++#include <asm/pgtable-invert.h>
++
+ #endif /* !__ASSEMBLY__ */
+ 
+ #endif /* _ASM_X86_PGTABLE_64_H */

diff --git a/queue-3.16/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch b/queue-3.16/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch
new file mode 100644
index 0000000..68ea59b
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch

@@ -0,0 +1,81 @@
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 13 Jun 2018 15:48:23 -0700
+Subject: x86/speculation/l1tf: Protect swap entries against L1TF
+
+commit 2f22b4cd45b67b3496f4aa4c7180a1271c6452f6 upstream.
+
+With L1 terminal fault the CPU speculates into unmapped PTEs, and resulting
+side effects allow to read the memory the PTE is pointing too, if its
+values are still in the L1 cache.
+
+For swapped out pages Linux uses unmapped PTEs and stores a swap entry into
+them.
+
+To protect against L1TF it must be ensured that the swap entry is not
+pointing to valid memory, which requires setting higher bits (between bit
+36 and bit 45) that are inside the CPUs physical address space, but outside
+any real memory.
+
+To do this invert the offset to make sure the higher bits are always set,
+as long as the swap file is not too big.
+
+Note there is no workaround for 32bit !PAE, or on systems which have more
+than MAX_PA/2 worth of memory. The later case is very unlikely to happen on
+real systems.
+
+[AK: updated description and minor tweaks by. Split out from the original
+     patch ]
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+[bwh: Backported to 3.16: Bit 9 may be reserved for PAGE_BIT_NUMA here]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable_64.h | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -167,7 +167,7 @@ static inline int pgd_large(pgd_t pgd) {
+  *
+  * |     ...                | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
+  * |     ...                |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
+- * | TYPE (59-63) |  OFFSET (10-58) | 0 |0|0|X|X| X| X|X|SD|0| <- swp entry
++ * | TYPE (59-63) | ~OFFSET (10-58) | 0 |0|0|X|X| X| X|X|SD|0| <- swp entry
+  *
+  * G (8) is aliased and used as a PROT_NONE indicator for
+  * !present ptes.  We need to start storing swap entries above
+@@ -180,6 +180,9 @@ static inline int pgd_large(pgd_t pgd) {
+  *
+  * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
+  * but also L and G.
++ *
++ * The offset is inverted by a binary not operation to make the high
++ * physical bits set.
+  */
+ #define SWP_TYPE_BITS		5
+ 
+@@ -199,13 +202,15 @@ static inline int pgd_large(pgd_t pgd) {
+ #define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
+ 
+ /* Shift up (to get rid of type), then down to get value */
+-#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
++#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
+ 
+ /*
+  * Shift the offset up "too far" by TYPE bits, then down again
++ * The offset is inverted by a binary not operation to make the high
++ * physical bits set.
+  */
+ #define __swp_entry(type, offset) ((swp_entry_t) { \
+-	((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
++	(~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+ 	| ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
+ 
+ #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })

diff --git a/queue-3.16/x86-speculation-l1tf-suggest-what-to-do-on-systems-with-too-much-ram.patch b/queue-3.16/x86-speculation-l1tf-suggest-what-to-do-on-systems-with-too-much-ram.patch
new file mode 100644
index 0000000..34c4327
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-suggest-what-to-do-on-systems-with-too-much-ram.patch

@@ -0,0 +1,41 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 23 Aug 2018 16:21:29 +0200
+Subject: x86/speculation/l1tf: Suggest what to do on systems with too much RAM
+
+commit 6a012288d6906fee1dbc244050ade1dafe4a9c8d upstream.
+
+Two users have reported [1] that they have an "extremely unlikely" system
+with more than MAX_PA/2 memory and L1TF mitigation is not effective.
+
+Make the warning more helpful by suggesting the proper mem=X kernel boot
+parameter to make it effective and a link to the L1TF document to help
+decide if the mitigation is worth the unusable RAM.
+
+[1] https://bugzilla.suse.com/show_bug.cgi?id=1105536
+
+Suggested-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: "H . Peter Anvin" <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Link: https://lkml.kernel.org/r/966571f0-9d7f-43dc-92c6-a10eec7a1254@suse.cz
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/kernel/cpu/bugs.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -717,6 +717,10 @@ static void __init l1tf_select_mitigatio
+ 	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+ 	if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
+ 		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
++		pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
++				half_pa);
++		pr_info("However, doing so will make a part of your RAM unusable.\n");
++		pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n");
+ 		return;
+ 	}
+ 

diff --git a/queue-3.16/x86-speculation-l1tf-unbreak-__have_arch_pfn_modify_allowed.patch b/queue-3.16/x86-speculation-l1tf-unbreak-__have_arch_pfn_modify_allowed.patch
new file mode 100644
index 0000000..f899578
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-unbreak-__have_arch_pfn_modify_allowed.patch

@@ -0,0 +1,63 @@
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Sat, 14 Jul 2018 21:56:13 +0200
+Subject: x86/speculation/l1tf: Unbreak !__HAVE_ARCH_PFN_MODIFY_ALLOWED
+ architectures
+
+commit 8f2adf3d2118cc0822b83a7bb43475f9149a1d26 upstream.
+
+commit 6c26fcd2abfe0a56bbd95271fce02df2896cfd24 upstream.
+
+pfn_modify_allowed() and arch_has_pfn_modify_check() are outside of the
+!__ASSEMBLY__ section in include/asm-generic/pgtable.h, which confuses
+assembler on archs that don't have __HAVE_ARCH_PFN_MODIFY_ALLOWED (e.g.
+ia64) and breaks build:
+
+    include/asm-generic/pgtable.h: Assembler messages:
+    include/asm-generic/pgtable.h:538: Error: Unknown opcode `static inline bool pfn_modify_allowed(unsigned long pfn,pgprot_t prot)'
+    include/asm-generic/pgtable.h:540: Error: Unknown opcode `return true'
+    include/asm-generic/pgtable.h:543: Error: Unknown opcode `static inline bool arch_has_pfn_modify_check(void)'
+    include/asm-generic/pgtable.h:545: Error: Unknown opcode `return false'
+    arch/ia64/kernel/entry.S:69: Error: `mov' does not fit into bundle
+
+Move those two static inlines into the !__ASSEMBLY__ section so that they
+don't confuse the asm build pass.
+
+Fixes: 42e4089c7890 ("x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings")
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[groeck: Context changes]
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/asm-generic/pgtable.h | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -806,12 +806,6 @@ static inline void pmdp_set_numa(struct
+ 
+ #endif /* CONFIG_MMU */
+ 
+-#endif /* !__ASSEMBLY__ */
+-
+-#ifndef io_remap_pfn_range
+-#define io_remap_pfn_range remap_pfn_range
+-#endif
+-
+ #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
+ static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+ {
+@@ -822,6 +816,12 @@ static inline bool arch_has_pfn_modify_c
+ {
+ 	return false;
+ }
++#endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
++
++#endif /* !__ASSEMBLY__ */
++
++#ifndef io_remap_pfn_range
++#define io_remap_pfn_range remap_pfn_range
+ #endif
+ 
+ #endif /* _ASM_GENERIC_PGTABLE_H */
commit	ac5ee8cb77fe9723357b26d1b605a5e6907d4607	[log] [tgz]
author	Ben Hutchings <ben@decadent.org.uk>	Fri Sep 28 04:01:44 2018 +0100
committer	Ben Hutchings <ben@decadent.org.uk>	Fri Sep 28 04:49:54 2018 +0100
tree	dc46252790d2d3a6d232c417e6d01719d5845fb3
parent	84e721a6cc8601a459500305fc8fe4c01905696d [diff]