| From: Frank van der Linden <fvdl@google.com> |
| Subject: mm/hugetlb: add pre-HVO framework |
| Date: Fri, 28 Feb 2025 18:29:18 +0000 |
| |
| Define flags for pre-HVOed bootmem hugetlb pages, and act on them. |
| |
| The most important flag is the HVO flag, signalling that a bootmem |
| allocated gigantic page has already been HVO-ed. If this flag is seen by |
| the hugetlb bootmem gather code, the page is marked as HVO optimized. The |
| HVO code will then not try to optimize it again. Instead, it will just |
| map the tail page mirror pages read-only, completing the HVO steps. |
| |
| No functional change, as nothing sets the flags yet. |
| |
| Link: https://lkml.kernel.org/r/20250228182928.2645936-18-fvdl@google.com |
| Signed-off-by: Frank van der Linden <fvdl@google.com> |
| Cc: Alexander Gordeev <agordeev@linux.ibm.com> |
| Cc: Andy Lutomirski <luto@kernel.org> |
| Cc: Arnd Bergmann <arnd@arndb.de> |
| Cc: Dan Carpenter <dan.carpenter@linaro.org> |
| Cc: Dave Hansen <dave.hansen@linux.intel.com> |
| Cc: David Hildenbrand <david@redhat.com> |
| Cc: Heiko Carstens <hca@linux.ibm.com> |
| Cc: Joao Martins <joao.m.martins@oracle.com> |
| Cc: Johannes Weiner <hannes@cmpxchg.org> |
| Cc: Madhavan Srinivasan <maddy@linux.ibm.com> |
| Cc: Michael Ellerman <mpe@ellerman.id.au> |
| Cc: Muchun Song <muchun.song@linux.dev> |
| Cc: Oscar Salvador <osalvador@suse.de> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Roman Gushchin (Cruise) <roman.gushchin@linux.dev> |
| Cc: Usama Arif <usamaarif642@gmail.com> |
| Cc: Vasily Gorbik <gor@linux.ibm.com> |
| Cc: Yu Zhao <yuzhao@google.com> |
| Cc: Zi Yan <ziy@nvidia.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| arch/powerpc/mm/hugetlbpage.c | 1 |
| include/linux/hugetlb.h | 4 ++ |
| mm/hugetlb.c | 24 ++++++++++++++- |
| mm/hugetlb_vmemmap.c | 50 ++++++++++++++++++++++++++++++-- |
| mm/hugetlb_vmemmap.h | 7 ++++ |
| 5 files changed, 83 insertions(+), 3 deletions(-) |
| |
| --- a/arch/powerpc/mm/hugetlbpage.c~mm-hugetlb-add-pre-hvo-framework |
| +++ a/arch/powerpc/mm/hugetlbpage.c |
| @@ -113,6 +113,7 @@ static int __init pseries_alloc_bootmem_ |
| gpage_freearray[nr_gpages] = 0; |
| list_add(&m->list, &huge_boot_pages[0]); |
| m->hstate = hstate; |
| + m->flags = 0; |
| return 1; |
| } |
| |
| --- a/include/linux/hugetlb.h~mm-hugetlb-add-pre-hvo-framework |
| +++ a/include/linux/hugetlb.h |
| @@ -681,8 +681,12 @@ struct hstate { |
| struct huge_bootmem_page { |
| struct list_head list; |
| struct hstate *hstate; |
| + unsigned long flags; |
| }; |
| |
| +#define HUGE_BOOTMEM_HVO 0x0001 |
| +#define HUGE_BOOTMEM_ZONES_VALID 0x0002 |
| + |
| int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); |
| int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); |
| void wait_for_freed_hugetlb_folios(void); |
| --- a/mm/hugetlb.c~mm-hugetlb-add-pre-hvo-framework |
| +++ a/mm/hugetlb.c |
| @@ -3227,6 +3227,7 @@ found: |
| INIT_LIST_HEAD(&m->list); |
| list_add(&m->list, &huge_boot_pages[node]); |
| m->hstate = h; |
| + m->flags = 0; |
| return 1; |
| } |
| |
| @@ -3294,7 +3295,7 @@ static void __init prep_and_add_bootmem_ |
| struct folio *folio, *tmp_f; |
| |
| /* Send list for bulk vmemmap optimization processing */ |
| - hugetlb_vmemmap_optimize_folios(h, folio_list); |
| + hugetlb_vmemmap_optimize_bootmem_folios(h, folio_list); |
| |
| list_for_each_entry_safe(folio, tmp_f, folio_list, lru) { |
| if (!folio_test_hugetlb_vmemmap_optimized(folio)) { |
| @@ -3323,6 +3324,13 @@ static bool __init hugetlb_bootmem_page_ |
| unsigned long start_pfn; |
| bool valid; |
| |
| + if (m->flags & HUGE_BOOTMEM_ZONES_VALID) { |
| + /* |
| + * Already validated, skip check. |
| + */ |
| + return true; |
| + } |
| + |
| start_pfn = virt_to_phys(m) >> PAGE_SHIFT; |
| |
| valid = !pfn_range_intersects_zones(nid, start_pfn, |
| @@ -3355,6 +3363,11 @@ static void __init hugetlb_bootmem_free_ |
| } |
| } |
| |
| +static bool __init hugetlb_bootmem_page_prehvo(struct huge_bootmem_page *m) |
| +{ |
| + return (m->flags & HUGE_BOOTMEM_HVO); |
| +} |
| + |
| /* |
| * Put bootmem huge pages into the standard lists after mem_map is up. |
| * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages. |
| @@ -3395,6 +3408,15 @@ static void __init gather_bootmem_preall |
| hugetlb_folio_init_vmemmap(folio, h, |
| HUGETLB_VMEMMAP_RESERVE_PAGES); |
| init_new_hugetlb_folio(h, folio); |
| + |
| + if (hugetlb_bootmem_page_prehvo(m)) |
| + /* |
| + * If pre-HVO was done, just set the |
| + * flag, the HVO code will then skip |
| + * this folio. |
| + */ |
| + folio_set_hugetlb_vmemmap_optimized(folio); |
| + |
| list_add(&folio->lru, &folio_list); |
| |
| /* |
| --- a/mm/hugetlb_vmemmap.c~mm-hugetlb-add-pre-hvo-framework |
| +++ a/mm/hugetlb_vmemmap.c |
| @@ -649,14 +649,39 @@ static int hugetlb_vmemmap_split_folio(c |
| return vmemmap_remap_split(vmemmap_start, vmemmap_end, vmemmap_reuse); |
| } |
| |
| -void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) |
| +static void __hugetlb_vmemmap_optimize_folios(struct hstate *h, |
| + struct list_head *folio_list, |
| + bool boot) |
| { |
| struct folio *folio; |
| + int nr_to_optimize; |
| LIST_HEAD(vmemmap_pages); |
| unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU; |
| |
| + nr_to_optimize = 0; |
| list_for_each_entry(folio, folio_list, lru) { |
| - int ret = hugetlb_vmemmap_split_folio(h, folio); |
| + int ret; |
| + unsigned long spfn, epfn; |
| + |
| + if (boot && folio_test_hugetlb_vmemmap_optimized(folio)) { |
| + /* |
| + * Already optimized by pre-HVO, just map the |
| + * mirrored tail page structs RO. |
| + */ |
| + spfn = (unsigned long)&folio->page; |
| + epfn = spfn + pages_per_huge_page(h); |
| + vmemmap_wrprotect_hvo(spfn, epfn, folio_nid(folio), |
| + HUGETLB_VMEMMAP_RESERVE_SIZE); |
| + register_page_bootmem_memmap(pfn_to_section_nr(spfn), |
| + &folio->page, |
| + HUGETLB_VMEMMAP_RESERVE_SIZE); |
| + static_branch_inc(&hugetlb_optimize_vmemmap_key); |
| + continue; |
| + } |
| + |
| + nr_to_optimize++; |
| + |
| + ret = hugetlb_vmemmap_split_folio(h, folio); |
| |
| /* |
| * Spliting the PMD requires allocating a page, thus lets fail |
| @@ -668,6 +693,16 @@ void hugetlb_vmemmap_optimize_folios(str |
| break; |
| } |
| |
| + if (!nr_to_optimize) |
| + /* |
| + * All pre-HVO folios, nothing left to do. It's ok if |
| + * there is a mix of pre-HVO and not yet HVO-ed folios |
| + * here, as __hugetlb_vmemmap_optimize_folio() will |
| + * skip any folios that already have the optimized flag |
| + * set, see vmemmap_should_optimize_folio(). |
| + */ |
| + goto out; |
| + |
| flush_tlb_all(); |
| |
| list_for_each_entry(folio, folio_list, lru) { |
| @@ -693,10 +728,21 @@ void hugetlb_vmemmap_optimize_folios(str |
| } |
| } |
| |
| +out: |
| flush_tlb_all(); |
| free_vmemmap_page_list(&vmemmap_pages); |
| } |
| |
| +void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) |
| +{ |
| + __hugetlb_vmemmap_optimize_folios(h, folio_list, false); |
| +} |
| + |
| +void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list) |
| +{ |
| + __hugetlb_vmemmap_optimize_folios(h, folio_list, true); |
| +} |
| + |
| static const struct ctl_table hugetlb_vmemmap_sysctls[] = { |
| { |
| .procname = "hugetlb_optimize_vmemmap", |
| --- a/mm/hugetlb_vmemmap.h~mm-hugetlb-add-pre-hvo-framework |
| +++ a/mm/hugetlb_vmemmap.h |
| @@ -24,6 +24,8 @@ long hugetlb_vmemmap_restore_folios(cons |
| struct list_head *non_hvo_folios); |
| void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio); |
| void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list); |
| +void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list); |
| + |
| |
| static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h) |
| { |
| @@ -64,6 +66,11 @@ static inline void hugetlb_vmemmap_optim |
| { |
| } |
| |
| +static inline void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, |
| + struct list_head *folio_list) |
| +{ |
| +} |
| + |
| static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h) |
| { |
| return 0; |
| _ |