| From foo@baz Wed Jan 3 18:58:12 CET 2018 |
| From: Hugh Dickins <hughd@google.com> |
| Date: Sat, 9 Sep 2017 21:27:32 -0700 |
| Subject: kaiser: vmstat show NR_KAISERTABLE as nr_overhead |
| |
| From: Hugh Dickins <hughd@google.com> |
| |
| |
| The kaiser update made an interesting choice, never to free any shadow |
| page tables. Contention on global spinlock was worrying, particularly |
| with it held across page table scans when freeing. Something had to be |
| done: I was going to add refcounting; but simply never to free them is |
| an appealing choice, minimizing contention without complicating the code |
| (the more a page table is found already, the less the spinlock is used). |
| |
| But leaking pages in this way is also a worry: can we get away with it? |
| At the very least, we need a count to show how bad it actually gets: |
| in principle, one might end up wasting about 1/256 of memory that way |
| (1/512 for when direct-mapped pages have to be user-mapped, plus 1/512 |
| for when they are user-mapped from the vmalloc area on another occasion |
| (but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed). |
| |
| Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the |
| shared pgd entries, and 1 for each intermediate page table added |
| thereafter for user-mapping - but leave out the 1 per mm, for its |
| shadow pgd, because that distracts from the monotonic increase. |
| Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled). |
| |
| In practice, it doesn't look so bad so far: more like 1/12000 after |
| nine hours of gtests below; and movable pageblock segregation should |
| tend to cluster the kaiser tables into a subset of the address space |
| (if not, they will be bad for compaction too). But production may |
| tell a different story: keep an eye on this number, and bring back |
| lighter freeing if it gets out of control (maybe a shrinker). |
| |
| Signed-off-by: Hugh Dickins <hughd@google.com> |
| Acked-by: Jiri Kosina <jkosina@suse.cz> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| arch/x86/mm/kaiser.c | 16 +++++++++++----- |
| include/linux/mmzone.h | 3 ++- |
| mm/vmstat.c | 1 + |
| 3 files changed, 14 insertions(+), 6 deletions(-) |
| |
| --- a/arch/x86/mm/kaiser.c |
| +++ b/arch/x86/mm/kaiser.c |
| @@ -122,9 +122,11 @@ static pte_t *kaiser_pagetable_walk(unsi |
| if (!new_pmd_page) |
| return NULL; |
| spin_lock(&shadow_table_allocation_lock); |
| - if (pud_none(*pud)) |
| + if (pud_none(*pud)) { |
| set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); |
| - else |
| + __inc_zone_page_state(virt_to_page((void *) |
| + new_pmd_page), NR_KAISERTABLE); |
| + } else |
| free_page(new_pmd_page); |
| spin_unlock(&shadow_table_allocation_lock); |
| } |
| @@ -140,9 +142,11 @@ static pte_t *kaiser_pagetable_walk(unsi |
| if (!new_pte_page) |
| return NULL; |
| spin_lock(&shadow_table_allocation_lock); |
| - if (pmd_none(*pmd)) |
| + if (pmd_none(*pmd)) { |
| set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); |
| - else |
| + __inc_zone_page_state(virt_to_page((void *) |
| + new_pte_page), NR_KAISERTABLE); |
| + } else |
| free_page(new_pte_page); |
| spin_unlock(&shadow_table_allocation_lock); |
| } |
| @@ -206,11 +210,13 @@ static void __init kaiser_init_all_pgds( |
| pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); |
| for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { |
| pgd_t new_pgd; |
| - pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE); |
| + pud_t *pud = pud_alloc_one(&init_mm, |
| + PAGE_OFFSET + i * PGDIR_SIZE); |
| if (!pud) { |
| WARN_ON(1); |
| break; |
| } |
| + inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE); |
| new_pgd = __pgd(_KERNPG_TABLE |__pa(pud)); |
| /* |
| * Make sure not to stomp on some other pgd entry. |
| --- a/include/linux/mmzone.h |
| +++ b/include/linux/mmzone.h |
| @@ -131,8 +131,9 @@ enum zone_stat_item { |
| NR_SLAB_RECLAIMABLE, |
| NR_SLAB_UNRECLAIMABLE, |
| NR_PAGETABLE, /* used for pagetables */ |
| - NR_KERNEL_STACK, |
| /* Second 128 byte cacheline */ |
| + NR_KERNEL_STACK, |
| + NR_KAISERTABLE, |
| NR_UNSTABLE_NFS, /* NFS unstable pages */ |
| NR_BOUNCE, |
| NR_VMSCAN_WRITE, |
| --- a/mm/vmstat.c |
| +++ b/mm/vmstat.c |
| @@ -736,6 +736,7 @@ const char * const vmstat_text[] = { |
| "nr_slab_unreclaimable", |
| "nr_page_table_pages", |
| "nr_kernel_stack", |
| + "nr_overhead", |
| "nr_unstable", |
| "nr_bounce", |
| "nr_vmscan_write", |