| From deb84649d0bec1be6dd2228bf50a5ec76d0edf60 Mon Sep 17 00:00:00 2001 |
| From: Joerg Roedel <jroedel@suse.de> |
| Date: Sat, 21 Mar 2020 18:22:41 -0700 |
| Subject: [PATCH] x86/mm: split vmalloc_sync_all() |
| |
| commit 763802b53a427ed3cbd419dbba255c414fdd9e7c upstream. |
| |
| Commit 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in |
| __purge_vmap_area_lazy()") introduced a call to vmalloc_sync_all() in |
| the vunmap() code-path. While this change was necessary to maintain |
| correctness on x86-32-pae kernels, it also adds additional cycles for |
| architectures that don't need it. |
| |
| Specifically on x86-64 with CONFIG_VMAP_STACK=y some people reported |
| severe performance regressions in micro-benchmarks because it now also |
| calls the x86-64 implementation of vmalloc_sync_all() on vunmap(). But |
| the vmalloc_sync_all() implementation on x86-64 is only needed for newly |
| created mappings. |
| |
| To avoid the unnecessary work on x86-64 and to gain the performance |
| back, split up vmalloc_sync_all() into two functions: |
| |
| * vmalloc_sync_mappings(), and |
| * vmalloc_sync_unmappings() |
| |
| Most call-sites to vmalloc_sync_all() only care about new mappings being |
| synchronized. The only exception is the new call-site added in the |
| above mentioned commit. |
| |
| Shile Zhang directed us to a report of an 80% regression in reaim |
| throughput. |
| |
| Fixes: 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") |
| Reported-by: kernel test robot <oliver.sang@intel.com> |
| Reported-by: Shile Zhang <shile.zhang@linux.alibaba.com> |
| Signed-off-by: Joerg Roedel <jroedel@suse.de> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Tested-by: Borislav Petkov <bp@suse.de> |
| Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> [GHES] |
| Cc: Dave Hansen <dave.hansen@linux.intel.com> |
| Cc: Andy Lutomirski <luto@kernel.org> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Cc: Ingo Molnar <mingo@redhat.com> |
| Cc: <stable@vger.kernel.org> |
| Link: http://lkml.kernel.org/r/20191009124418.8286-1-joro@8bytes.org |
| Link: https://lists.01.org/hyperkitty/list/lkp@lists.01.org/thread/4D3JPPHBNOSPFK2KEPC6KGKS6J25AIDB/ |
| Link: http://lkml.kernel.org/r/20191113095530.228959-1-shile.zhang@linux.alibaba.com |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c |
| index 3589db475280..7aa188950d42 100644 |
| --- a/arch/x86/mm/fault.c |
| +++ b/arch/x86/mm/fault.c |
| @@ -206,7 +206,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) |
| return pmd_k; |
| } |
| |
| -void vmalloc_sync_all(void) |
| +static void vmalloc_sync(void) |
| { |
| unsigned long address; |
| |
| @@ -233,6 +233,16 @@ void vmalloc_sync_all(void) |
| } |
| } |
| |
| +void vmalloc_sync_mappings(void) |
| +{ |
| + vmalloc_sync(); |
| +} |
| + |
| +void vmalloc_sync_unmappings(void) |
| +{ |
| + vmalloc_sync(); |
| +} |
| + |
| /* |
| * 32-bit: |
| * |
| @@ -335,11 +345,23 @@ static void dump_pagetable(unsigned long address) |
| |
| #else /* CONFIG_X86_64: */ |
| |
| -void vmalloc_sync_all(void) |
| +void vmalloc_sync_mappings(void) |
| { |
| + /* |
| + * 64-bit mappings might allocate new p4d/pud pages |
| + * that need to be propagated to all tasks' PGDs. |
| + */ |
| sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); |
| } |
| |
| +void vmalloc_sync_unmappings(void) |
| +{ |
| + /* |
| + * Unmappings never allocate or free p4d/pud pages. |
| + * No work is required here. |
| + */ |
| +} |
| + |
| /* |
| * 64-bit: |
| * |
| diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c |
| index 6875bf629f16..3760b5ebf1a6 100644 |
| --- a/drivers/acpi/apei/ghes.c |
| +++ b/drivers/acpi/apei/ghes.c |
| @@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes) |
| * New allocation must be visible in all pgd before it can be found by |
| * an NMI allocating from the pool. |
| */ |
| - vmalloc_sync_all(); |
| + vmalloc_sync_mappings(); |
| |
| rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); |
| if (rc) |
| diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h |
| index 51e131245379..5851ea142929 100644 |
| --- a/include/linux/vmalloc.h |
| +++ b/include/linux/vmalloc.h |
| @@ -116,8 +116,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, |
| |
| extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, |
| unsigned long pgoff); |
| -void vmalloc_sync_all(void); |
| - |
| +void vmalloc_sync_mappings(void); |
| +void vmalloc_sync_unmappings(void); |
| + |
| /* |
| * Lowlevel-APIs (not for driver use!) |
| */ |
| diff --git a/kernel/notifier.c b/kernel/notifier.c |
| index d9f5081d578d..157d7c29f720 100644 |
| --- a/kernel/notifier.c |
| +++ b/kernel/notifier.c |
| @@ -554,7 +554,7 @@ NOKPROBE_SYMBOL(notify_die); |
| |
| int register_die_notifier(struct notifier_block *nb) |
| { |
| - vmalloc_sync_all(); |
| + vmalloc_sync_mappings(); |
| return atomic_notifier_chain_register(&die_chain, nb); |
| } |
| EXPORT_SYMBOL_GPL(register_die_notifier); |
| diff --git a/mm/nommu.c b/mm/nommu.c |
| index b2823519f8cd..aec5a3599a30 100644 |
| --- a/mm/nommu.c |
| +++ b/mm/nommu.c |
| @@ -447,10 +447,14 @@ void vm_unmap_aliases(void) |
| EXPORT_SYMBOL_GPL(vm_unmap_aliases); |
| |
| /* |
| - * Implement a stub for vmalloc_sync_all() if the architecture chose not to |
| - * have one. |
| + * Implement a stub for vmalloc_sync_[un]mapping() if the architecture |
| + * chose not to have one. |
| */ |
| -void __weak vmalloc_sync_all(void) |
| +void __weak vmalloc_sync_mappings(void) |
| +{ |
| +} |
| + |
| +void __weak vmalloc_sync_unmappings(void) |
| { |
| } |
| |
| diff --git a/mm/vmalloc.c b/mm/vmalloc.c |
| index 080d30408ce3..05697439c5a7 100644 |
| --- a/mm/vmalloc.c |
| +++ b/mm/vmalloc.c |
| @@ -1217,7 +1217,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) |
| * First make sure the mappings are removed from all page-tables |
| * before they are freed. |
| */ |
| - vmalloc_sync_all(); |
| + vmalloc_sync_unmappings(); |
| |
| /* |
| * TODO: to calculate a flush range without looping. |
| @@ -3005,16 +3005,19 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, |
| EXPORT_SYMBOL(remap_vmalloc_range); |
| |
| /* |
| - * Implement a stub for vmalloc_sync_all() if the architecture chose not to |
| - * have one. |
| + * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose |
| + * not to have one. |
| * |
| * The purpose of this function is to make sure the vmalloc area |
| * mappings are identical in all page-tables in the system. |
| */ |
| -void __weak vmalloc_sync_all(void) |
| +void __weak vmalloc_sync_mappings(void) |
| { |
| } |
| |
| +void __weak vmalloc_sync_unmappings(void) |
| +{ |
| +} |
| |
| static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) |
| { |
| -- |
| 2.7.4 |
| |