| From 763802b53a427ed3cbd419dbba255c414fdd9e7c Mon Sep 17 00:00:00 2001 |
| From: Joerg Roedel <jroedel@suse.de> |
| Date: Sat, 21 Mar 2020 18:22:41 -0700 |
| Subject: x86/mm: split vmalloc_sync_all() |
| |
| From: Joerg Roedel <jroedel@suse.de> |
| |
| commit 763802b53a427ed3cbd419dbba255c414fdd9e7c upstream. |
| |
| Commit 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in |
| __purge_vmap_area_lazy()") introduced a call to vmalloc_sync_all() in |
| the vunmap() code-path. While this change was necessary to maintain |
| correctness on x86-32-pae kernels, it also adds additional cycles for |
| architectures that don't need it. |
| |
| Specifically on x86-64 with CONFIG_VMAP_STACK=y some people reported |
| severe performance regressions in micro-benchmarks because it now also |
| calls the x86-64 implementation of vmalloc_sync_all() on vunmap(). But |
| the vmalloc_sync_all() implementation on x86-64 is only needed for newly |
| created mappings. |
| |
| To avoid the unnecessary work on x86-64 and to gain the performance |
| back, split up vmalloc_sync_all() into two functions: |
| |
| * vmalloc_sync_mappings(), and |
| * vmalloc_sync_unmappings() |
| |
| Most call-sites to vmalloc_sync_all() only care about new mappings being |
| synchronized. The only exception is the new call-site added in the |
| above mentioned commit. |
| |
| Shile Zhang directed us to a report of an 80% regression in reaim |
| throughput. |
| |
| Fixes: 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") |
| Reported-by: kernel test robot <oliver.sang@intel.com> |
| Reported-by: Shile Zhang <shile.zhang@linux.alibaba.com> |
| Signed-off-by: Joerg Roedel <jroedel@suse.de> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Tested-by: Borislav Petkov <bp@suse.de> |
| Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> [GHES] |
| Cc: Dave Hansen <dave.hansen@linux.intel.com> |
| Cc: Andy Lutomirski <luto@kernel.org> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Cc: Ingo Molnar <mingo@redhat.com> |
| Cc: <stable@vger.kernel.org> |
| Link: http://lkml.kernel.org/r/20191009124418.8286-1-joro@8bytes.org |
| Link: https://lists.01.org/hyperkitty/list/lkp@lists.01.org/thread/4D3JPPHBNOSPFK2KEPC6KGKS6J25AIDB/ |
| Link: http://lkml.kernel.org/r/20191113095530.228959-1-shile.zhang@linux.alibaba.com |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| arch/x86/mm/fault.c | 26 ++++++++++++++++++++++++-- |
| drivers/acpi/apei/ghes.c | 2 +- |
| include/linux/vmalloc.h | 5 +++-- |
| kernel/notifier.c | 2 +- |
| mm/nommu.c | 10 +++++++--- |
| mm/vmalloc.c | 11 +++++++---- |
| 6 files changed, 43 insertions(+), 13 deletions(-) |
| |
| --- a/arch/x86/mm/fault.c |
| +++ b/arch/x86/mm/fault.c |
| @@ -228,7 +228,7 @@ static inline pmd_t *vmalloc_sync_one(pg |
| return pmd_k; |
| } |
| |
| -void vmalloc_sync_all(void) |
| +static void vmalloc_sync(void) |
| { |
| unsigned long address; |
| |
| @@ -255,6 +255,16 @@ void vmalloc_sync_all(void) |
| } |
| } |
| |
| +void vmalloc_sync_mappings(void) |
| +{ |
| + vmalloc_sync(); |
| +} |
| + |
| +void vmalloc_sync_unmappings(void) |
| +{ |
| + vmalloc_sync(); |
| +} |
| + |
| /* |
| * 32-bit: |
| * |
| @@ -349,11 +359,23 @@ out: |
| |
| #else /* CONFIG_X86_64: */ |
| |
| -void vmalloc_sync_all(void) |
| +void vmalloc_sync_mappings(void) |
| { |
| + /* |
| + * 64-bit mappings might allocate new p4d/pud pages |
| + * that need to be propagated to all tasks' PGDs. |
| + */ |
| sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0); |
| } |
| |
| +void vmalloc_sync_unmappings(void) |
| +{ |
| + /* |
| + * Unmappings never allocate or free p4d/pud pages. |
| + * No work is required here. |
| + */ |
| +} |
| + |
| /* |
| * 64-bit: |
| * |
| --- a/drivers/acpi/apei/ghes.c |
| +++ b/drivers/acpi/apei/ghes.c |
| @@ -229,7 +229,7 @@ static int ghes_estatus_pool_expand(unsi |
| * New allocation must be visible in all pgd before it can be found by |
| * an NMI allocating from the pool. |
| */ |
| - vmalloc_sync_all(); |
| + vmalloc_sync_mappings(); |
| |
| return gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); |
| } |
| --- a/include/linux/vmalloc.h |
| +++ b/include/linux/vmalloc.h |
| @@ -92,8 +92,9 @@ extern int remap_vmalloc_range_partial(s |
| |
| extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, |
| unsigned long pgoff); |
| -void vmalloc_sync_all(void); |
| - |
| +void vmalloc_sync_mappings(void); |
| +void vmalloc_sync_unmappings(void); |
| + |
| /* |
| * Lowlevel-APIs (not for driver use!) |
| */ |
| --- a/kernel/notifier.c |
| +++ b/kernel/notifier.c |
| @@ -552,7 +552,7 @@ NOKPROBE_SYMBOL(notify_die); |
| |
| int register_die_notifier(struct notifier_block *nb) |
| { |
| - vmalloc_sync_all(); |
| + vmalloc_sync_mappings(); |
| return atomic_notifier_chain_register(&die_chain, nb); |
| } |
| EXPORT_SYMBOL_GPL(register_die_notifier); |
| --- a/mm/nommu.c |
| +++ b/mm/nommu.c |
| @@ -472,10 +472,14 @@ void vm_unmap_aliases(void) |
| EXPORT_SYMBOL_GPL(vm_unmap_aliases); |
| |
| /* |
| - * Implement a stub for vmalloc_sync_all() if the architecture chose not to |
| - * have one. |
| + * Implement a stub for vmalloc_sync_[un]mapping() if the architecture |
| + * chose not to have one. |
| */ |
| -void __weak vmalloc_sync_all(void) |
| +void __weak vmalloc_sync_mappings(void) |
| +{ |
| +} |
| + |
| +void __weak vmalloc_sync_unmappings(void) |
| { |
| } |
| |
| --- a/mm/vmalloc.c |
| +++ b/mm/vmalloc.c |
| @@ -1681,7 +1681,7 @@ void *__vmalloc_node_range(unsigned long |
| * First make sure the mappings are removed from all page-tables |
| * before they are freed. |
| */ |
| - vmalloc_sync_all(); |
| + vmalloc_sync_unmappings(); |
| |
| /* |
| * In this function, newly allocated vm_struct has VM_UNINITIALIZED |
| @@ -2218,16 +2218,19 @@ int remap_vmalloc_range(struct vm_area_s |
| EXPORT_SYMBOL(remap_vmalloc_range); |
| |
| /* |
| - * Implement a stub for vmalloc_sync_all() if the architecture chose not to |
| - * have one. |
| + * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose |
| + * not to have one. |
| * |
| * The purpose of this function is to make sure the vmalloc area |
| * mappings are identical in all page-tables in the system. |
| */ |
| -void __weak vmalloc_sync_all(void) |
| +void __weak vmalloc_sync_mappings(void) |
| { |
| } |
| |
| +void __weak vmalloc_sync_unmappings(void) |
| +{ |
| +} |
| |
| static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) |
| { |