| From: Borislav Petkov <bp@suse.de> |
| Date: Thu, 4 Jan 2018 17:42:45 +0100 |
| Subject: Map the vsyscall page with _PAGE_USER |
| |
| From: Borislav Petkov <bp@suse.de> |
| |
| This needs to happen early in kaiser_pagetable_walk(), before the |
| hierarchy is established so that _PAGE_USER permission can be really |
| set. |
| |
| A proper fix would be to teach kaiser_pagetable_walk() to update those |
| permissions but the vsyscall page is the only exception here so ... |
| |
| Signed-off-by: Borislav Petkov <bp@suse.de> |
| Acked-by: Hugh Dickins <hughd@google.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| arch/x86/entry/vsyscall/vsyscall_64.c | 5 +++++ |
| arch/x86/include/asm/vsyscall.h | 2 ++ |
| arch/x86/mm/kaiser.c | 34 ++++++++++++++++++++++++++++++---- |
| 3 files changed, 37 insertions(+), 4 deletions(-) |
| |
| --- a/arch/x86/entry/vsyscall/vsyscall_64.c |
| +++ b/arch/x86/entry/vsyscall/vsyscall_64.c |
| @@ -66,6 +66,11 @@ static int __init vsyscall_setup(char *s |
| } |
| early_param("vsyscall", vsyscall_setup); |
| |
| +bool vsyscall_enabled(void) |
| +{ |
| + return vsyscall_mode != NONE; |
| +} |
| + |
| static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, |
| const char *message) |
| { |
| --- a/arch/x86/include/asm/vsyscall.h |
| +++ b/arch/x86/include/asm/vsyscall.h |
| @@ -12,12 +12,14 @@ extern void map_vsyscall(void); |
| * Returns true if handled. |
| */ |
| extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); |
| +extern bool vsyscall_enabled(void); |
| #else |
| static inline void map_vsyscall(void) {} |
| static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) |
| { |
| return false; |
| } |
| +static inline bool vsyscall_enabled(void) { return false; } |
| #endif |
| |
| #endif /* _ASM_X86_VSYSCALL_H */ |
| --- a/arch/x86/mm/kaiser.c |
| +++ b/arch/x86/mm/kaiser.c |
| @@ -19,6 +19,7 @@ |
| #include <asm/pgalloc.h> |
| #include <asm/desc.h> |
| #include <asm/cmdline.h> |
| +#include <asm/vsyscall.h> |
| |
| int kaiser_enabled __read_mostly = 1; |
| EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ |
| @@ -110,12 +111,13 @@ static inline unsigned long get_pa_from_ |
| * |
| * Returns a pointer to a PTE on success, or NULL on failure. |
| */ |
| -static pte_t *kaiser_pagetable_walk(unsigned long address) |
| +static pte_t *kaiser_pagetable_walk(unsigned long address, bool user) |
| { |
| pmd_t *pmd; |
| pud_t *pud; |
| pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); |
| gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); |
| + unsigned long prot = _KERNPG_TABLE; |
| |
| if (pgd_none(*pgd)) { |
| WARN_ONCE(1, "All shadow pgds should have been populated"); |
| @@ -123,6 +125,17 @@ static pte_t *kaiser_pagetable_walk(unsi |
| } |
| BUILD_BUG_ON(pgd_large(*pgd) != 0); |
| |
| + if (user) { |
| + /* |
| + * The vsyscall page is the only page that will have |
| + * _PAGE_USER set. Catch everything else. |
| + */ |
| + BUG_ON(address != VSYSCALL_ADDR); |
| + |
| + set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); |
| + prot = _PAGE_TABLE; |
| + } |
| + |
| pud = pud_offset(pgd, address); |
| /* The shadow page tables do not use large mappings: */ |
| if (pud_large(*pud)) { |
| @@ -135,7 +148,7 @@ static pte_t *kaiser_pagetable_walk(unsi |
| return NULL; |
| spin_lock(&shadow_table_allocation_lock); |
| if (pud_none(*pud)) { |
| - set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); |
| + set_pud(pud, __pud(prot | __pa(new_pmd_page))); |
| __inc_zone_page_state(virt_to_page((void *) |
| new_pmd_page), NR_KAISERTABLE); |
| } else |
| @@ -155,7 +168,7 @@ static pte_t *kaiser_pagetable_walk(unsi |
| return NULL; |
| spin_lock(&shadow_table_allocation_lock); |
| if (pmd_none(*pmd)) { |
| - set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); |
| + set_pmd(pmd, __pmd(prot | __pa(new_pte_page))); |
| __inc_zone_page_state(virt_to_page((void *) |
| new_pte_page), NR_KAISERTABLE); |
| } else |
| @@ -191,7 +204,7 @@ static int kaiser_add_user_map(const voi |
| ret = -EIO; |
| break; |
| } |
| - pte = kaiser_pagetable_walk(address); |
| + pte = kaiser_pagetable_walk(address, flags & _PAGE_USER); |
| if (!pte) { |
| ret = -ENOMEM; |
| break; |
| @@ -318,6 +331,19 @@ void __init kaiser_init(void) |
| |
| kaiser_init_all_pgds(); |
| |
| + /* |
| + * Note that this sets _PAGE_USER and it needs to happen when the |
| + * pagetable hierarchy gets created, i.e., early. Otherwise |
| + * kaiser_pagetable_walk() will encounter initialized PTEs in the |
| + * hierarchy and not set the proper permissions, leading to the |
| + * pagefaults with page-protection violations when trying to read the |
| + * vsyscall page. For example. |
| + */ |
| + if (vsyscall_enabled()) |
| + kaiser_add_user_map_early((void *)VSYSCALL_ADDR, |
| + PAGE_SIZE, |
| + __PAGE_KERNEL_VSYSCALL); |
| + |
| for_each_possible_cpu(cpu) { |
| void *percpu_vaddr = __per_cpu_user_mapped_start + |
| per_cpu_offset(cpu); |