| From: Gang Li <gang.li@linux.dev> |
| Subject: hugetlb: parallelize 1G hugetlb initialization |
| Date: Thu, 22 Feb 2024 22:04:21 +0800 |
| |
| Optimizing the initialization speed of 1G huge pages through |
| parallelization. |
| |
| 1G hugetlbs are allocated from bootmem, a process that is already very |
| fast and does not currently require optimization. Therefore, we focus on |
| parallelizing only the initialization phase in `gather_bootmem_prealloc`. |
| |
| Here are some test results: |
| test case no patch(ms) patched(ms) saved |
| ------------------- -------------- ------------- -------- |
| 256c2T(4 node) 1G 4745 2024 57.34% |
| 128c1T(2 node) 1G 3358 1712 49.02% |
| 12T 1G 77000 18300 76.23% |
| |
| [akpm@linux-foundation.org: s/initialied/initialized/, per Alexey] |
| Link: https://lkml.kernel.org/r/20240222140422.393911-9-gang.li@linux.dev |
| Signed-off-by: Gang Li <ligang.bdlg@bytedance.com> |
| Tested-by: David Rientjes <rientjes@google.com> |
| Reviewed-by: Muchun Song <muchun.song@linux.dev> |
| Cc: Alexey Dobriyan <adobriyan@gmail.com> |
| Cc: Daniel Jordan <daniel.m.jordan@oracle.com> |
| Cc: David Hildenbrand <david@redhat.com> |
| Cc: Jane Chu <jane.chu@oracle.com> |
| Cc: Mike Kravetz <mike.kravetz@oracle.com> |
| Cc: Paul E. McKenney <paulmck@kernel.org> |
| Cc: Randy Dunlap <rdunlap@infradead.org> |
| Cc: Steffen Klassert <steffen.klassert@secunet.com> |
| Cc: Tim Chen <tim.c.chen@linux.intel.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| arch/powerpc/mm/hugetlbpage.c | 2 - |
| include/linux/hugetlb.h | 2 - |
| mm/hugetlb.c | 51 ++++++++++++++++++++++++++------ |
| 3 files changed, 45 insertions(+), 10 deletions(-) |
| |
| --- a/arch/powerpc/mm/hugetlbpage.c~hugetlb-parallelize-1g-hugetlb-initialization |
| +++ a/arch/powerpc/mm/hugetlbpage.c |
| @@ -226,7 +226,7 @@ static int __init pseries_alloc_bootmem_ |
| return 0; |
| m = phys_to_virt(gpage_freearray[--nr_gpages]); |
| gpage_freearray[nr_gpages] = 0; |
| - list_add(&m->list, &huge_boot_pages); |
| + list_add(&m->list, &huge_boot_pages[0]); |
| m->hstate = hstate; |
| return 1; |
| } |
| --- a/include/linux/hugetlb.h~hugetlb-parallelize-1g-hugetlb-initialization |
| +++ a/include/linux/hugetlb.h |
| @@ -178,7 +178,7 @@ pte_t *huge_pmd_share(struct mm_struct * |
| struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); |
| |
| extern int sysctl_hugetlb_shm_group; |
| -extern struct list_head huge_boot_pages; |
| +extern struct list_head huge_boot_pages[MAX_NUMNODES]; |
| |
| /* arch callbacks */ |
| |
| --- a/mm/hugetlb.c~hugetlb-parallelize-1g-hugetlb-initialization |
| +++ a/mm/hugetlb.c |
| @@ -69,7 +69,7 @@ static bool hugetlb_cma_folio(struct fol |
| #endif |
| static unsigned long hugetlb_cma_size __initdata; |
| |
| -__initdata LIST_HEAD(huge_boot_pages); |
| +__initdata struct list_head huge_boot_pages[MAX_NUMNODES]; |
| |
| /* for command line parsing */ |
| static struct hstate * __initdata parsed_hstate; |
| @@ -3301,7 +3301,7 @@ int alloc_bootmem_huge_page(struct hstat |
| int __alloc_bootmem_huge_page(struct hstate *h, int nid) |
| { |
| struct huge_bootmem_page *m = NULL; /* initialize for clang */ |
| - int nr_nodes, node; |
| + int nr_nodes, node = nid; |
| |
| /* do node specific alloc */ |
| if (nid != NUMA_NO_NODE) { |
| @@ -3339,7 +3339,7 @@ found: |
| huge_page_size(h) - PAGE_SIZE); |
| /* Put them into a private list first because mem_map is not up yet */ |
| INIT_LIST_HEAD(&m->list); |
| - list_add(&m->list, &huge_boot_pages); |
| + list_add(&m->list, &huge_boot_pages[node]); |
| m->hstate = h; |
| return 1; |
| } |
| @@ -3390,8 +3390,6 @@ static void __init prep_and_add_bootmem_ |
| /* Send list for bulk vmemmap optimization processing */ |
| hugetlb_vmemmap_optimize_folios(h, folio_list); |
| |
| - /* Add all new pool pages to free lists in one lock cycle */ |
| - spin_lock_irqsave(&hugetlb_lock, flags); |
| list_for_each_entry_safe(folio, tmp_f, folio_list, lru) { |
| if (!folio_test_hugetlb_vmemmap_optimized(folio)) { |
| /* |
| @@ -3404,23 +3402,25 @@ static void __init prep_and_add_bootmem_ |
| HUGETLB_VMEMMAP_RESERVE_PAGES, |
| pages_per_huge_page(h)); |
| } |
| + /* Subdivide locks to achieve better parallel performance */ |
| + spin_lock_irqsave(&hugetlb_lock, flags); |
| __prep_account_new_huge_page(h, folio_nid(folio)); |
| enqueue_hugetlb_folio(h, folio); |
| + spin_unlock_irqrestore(&hugetlb_lock, flags); |
| } |
| - spin_unlock_irqrestore(&hugetlb_lock, flags); |
| } |
| |
| /* |
| * Put bootmem huge pages into the standard lists after mem_map is up. |
| * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages. |
| */ |
| -static void __init gather_bootmem_prealloc(void) |
| +static void __init gather_bootmem_prealloc_node(unsigned long nid) |
| { |
| LIST_HEAD(folio_list); |
| struct huge_bootmem_page *m; |
| struct hstate *h = NULL, *prev_h = NULL; |
| |
| - list_for_each_entry(m, &huge_boot_pages, list) { |
| + list_for_each_entry(m, &huge_boot_pages[nid], list) { |
| struct page *page = virt_to_page(m); |
| struct folio *folio = (void *)page; |
| |
| @@ -3453,6 +3453,31 @@ static void __init gather_bootmem_preall |
| prep_and_add_bootmem_folios(h, &folio_list); |
| } |
| |
| +static void __init gather_bootmem_prealloc_parallel(unsigned long start, |
| + unsigned long end, void *arg) |
| +{ |
| + int nid; |
| + |
| + for (nid = start; nid < end; nid++) |
| + gather_bootmem_prealloc_node(nid); |
| +} |
| + |
| +static void __init gather_bootmem_prealloc(void) |
| +{ |
| + struct padata_mt_job job = { |
| + .thread_fn = gather_bootmem_prealloc_parallel, |
| + .fn_arg = NULL, |
| + .start = 0, |
| + .size = num_node_state(N_MEMORY), |
| + .align = 1, |
| + .min_chunk = 1, |
| + .max_threads = num_node_state(N_MEMORY), |
| + .numa_aware = true, |
| + }; |
| + |
| + padata_do_multithreaded(&job); |
| +} |
| + |
| static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid) |
| { |
| unsigned long i; |
| @@ -3600,6 +3625,7 @@ static unsigned long __init hugetlb_page |
| static void __init hugetlb_hstate_alloc_pages(struct hstate *h) |
| { |
| unsigned long allocated; |
| + static bool initialized __initdata; |
| |
| /* skip gigantic hugepages allocation if hugetlb_cma enabled */ |
| if (hstate_is_gigantic(h) && hugetlb_cma_size) { |
| @@ -3607,6 +3633,15 @@ static void __init hugetlb_hstate_alloc_ |
| return; |
| } |
| |
| + /* hugetlb_hstate_alloc_pages will be called many times, initialize huge_boot_pages once */ |
| + if (!initialized) { |
| + int i = 0; |
| + |
| + for (i = 0; i < MAX_NUMNODES; i++) |
| + INIT_LIST_HEAD(&huge_boot_pages[i]); |
| + initialized = true; |
| + } |
| + |
| /* do node specific alloc */ |
| if (hugetlb_hstate_alloc_pages_specific_nodes(h)) |
| return; |
| _ |