| From: Huang Ying <ying.huang@intel.com> |
| Subject: mm and cache_info: remove unnecessary CPU cache info update |
| Date: Fri, 26 Jan 2024 16:19:44 +0800 |
| |
| For each CPU hotplug event, we will update per-CPU data slice size and |
| corresponding PCP configuration for every online CPU to make the |
| implementation simple. But, Kyle reported that this takes tens seconds |
| during boot on a machine with 34 zones and 3840 CPUs. |
| |
| So, in this patch, for each CPU hotplug event, we only update per-CPU data |
| slice size and corresponding PCP configuration for the CPUs that share |
| caches with the hotplugged CPU. With the patch, the system boot time |
| reduces 67 seconds on the machine. |
| |
| Link: https://lkml.kernel.org/r/20240126081944.414520-1-ying.huang@intel.com |
| Fixes: 362d37a106dd ("mm, pcp: reduce lock contention for draining high-order pages") |
| Signed-off-by: "Huang, Ying" <ying.huang@intel.com> |
| Originally-by: Kyle Meyer <kyle.meyer@hpe.com> |
| Reported-and-tested-by: Kyle Meyer <kyle.meyer@hpe.com> |
| Cc: Sudeep Holla <sudeep.holla@arm.com> |
| Cc: Mel Gorman <mgorman@techsingularity.net> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| drivers/base/cacheinfo.c | 50 ++++++++++++++++++++++++++++++++----- |
| include/linux/gfp.h | 2 - |
| mm/page_alloc.c | 39 +++++++++++++--------------- |
| 3 files changed, 63 insertions(+), 28 deletions(-) |
| |
| --- a/drivers/base/cacheinfo.c~mm-and-cache_info-remove-unnecessary-cpu-cache-info-update |
| +++ a/drivers/base/cacheinfo.c |
| @@ -898,6 +898,37 @@ err: |
| return rc; |
| } |
| |
| +static unsigned int cpu_map_shared_cache(bool online, unsigned int cpu, |
| + cpumask_t **map) |
| +{ |
| + struct cacheinfo *llc, *sib_llc; |
| + unsigned int sibling; |
| + |
| + if (!last_level_cache_is_valid(cpu)) |
| + return 0; |
| + |
| + llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1); |
| + |
| + if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED) |
| + return 0; |
| + |
| + if (online) { |
| + *map = &llc->shared_cpu_map; |
| + return cpumask_weight(*map); |
| + } |
| + |
| + /* shared_cpu_map of offlined CPU will be cleared, so use sibling map */ |
| + for_each_cpu(sibling, &llc->shared_cpu_map) { |
| + if (sibling == cpu || !last_level_cache_is_valid(sibling)) |
| + continue; |
| + sib_llc = per_cpu_cacheinfo_idx(sibling, cache_leaves(sibling) - 1); |
| + *map = &sib_llc->shared_cpu_map; |
| + return cpumask_weight(*map); |
| + } |
| + |
| + return 0; |
| +} |
| + |
| /* |
| * Calculate the size of the per-CPU data cache slice. This can be |
| * used to estimate the size of the data cache slice that can be used |
| @@ -929,28 +960,31 @@ static void update_per_cpu_data_slice_si |
| ci->per_cpu_data_slice_size = llc->size / nr_shared; |
| } |
| |
| -static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu) |
| +static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu, |
| + cpumask_t *cpu_map) |
| { |
| unsigned int icpu; |
| |
| - for_each_online_cpu(icpu) { |
| + for_each_cpu(icpu, cpu_map) { |
| if (!cpu_online && icpu == cpu) |
| continue; |
| update_per_cpu_data_slice_size_cpu(icpu); |
| + setup_pcp_cacheinfo(icpu); |
| } |
| } |
| |
| static int cacheinfo_cpu_online(unsigned int cpu) |
| { |
| int rc = detect_cache_attributes(cpu); |
| + cpumask_t *cpu_map; |
| |
| if (rc) |
| return rc; |
| rc = cache_add_dev(cpu); |
| if (rc) |
| goto err; |
| - update_per_cpu_data_slice_size(true, cpu); |
| - setup_pcp_cacheinfo(); |
| + if (cpu_map_shared_cache(true, cpu, &cpu_map)) |
| + update_per_cpu_data_slice_size(true, cpu, cpu_map); |
| return 0; |
| err: |
| free_cache_attributes(cpu); |
| @@ -959,12 +993,16 @@ err: |
| |
| static int cacheinfo_cpu_pre_down(unsigned int cpu) |
| { |
| + cpumask_t *cpu_map; |
| + unsigned int nr_shared; |
| + |
| + nr_shared = cpu_map_shared_cache(false, cpu, &cpu_map); |
| if (cpumask_test_and_clear_cpu(cpu, &cache_dev_map)) |
| cpu_cache_sysfs_exit(cpu); |
| |
| free_cache_attributes(cpu); |
| - update_per_cpu_data_slice_size(false, cpu); |
| - setup_pcp_cacheinfo(); |
| + if (nr_shared > 1) |
| + update_per_cpu_data_slice_size(false, cpu, cpu_map); |
| return 0; |
| } |
| |
| --- a/include/linux/gfp.h~mm-and-cache_info-remove-unnecessary-cpu-cache-info-update |
| +++ a/include/linux/gfp.h |
| @@ -334,7 +334,7 @@ void drain_all_pages(struct zone *zone); |
| void drain_local_pages(struct zone *zone); |
| |
| void page_alloc_init_late(void); |
| -void setup_pcp_cacheinfo(void); |
| +void setup_pcp_cacheinfo(unsigned int cpu); |
| |
| /* |
| * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what |
| --- a/mm/page_alloc.c~mm-and-cache_info-remove-unnecessary-cpu-cache-info-update |
| +++ a/mm/page_alloc.c |
| @@ -5572,37 +5572,34 @@ static void zone_pcp_update(struct zone |
| mutex_unlock(&pcp_batch_high_lock); |
| } |
| |
| -static void zone_pcp_update_cacheinfo(struct zone *zone) |
| +static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu) |
| { |
| - int cpu; |
| struct per_cpu_pages *pcp; |
| struct cpu_cacheinfo *cci; |
| |
| - for_each_online_cpu(cpu) { |
| - pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); |
| - cci = get_cpu_cacheinfo(cpu); |
| - /* |
| - * If data cache slice of CPU is large enough, "pcp->batch" |
| - * pages can be preserved in PCP before draining PCP for |
| - * consecutive high-order pages freeing without allocation. |
| - * This can reduce zone lock contention without hurting |
| - * cache-hot pages sharing. |
| - */ |
| - spin_lock(&pcp->lock); |
| - if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) |
| - pcp->flags |= PCPF_FREE_HIGH_BATCH; |
| - else |
| - pcp->flags &= ~PCPF_FREE_HIGH_BATCH; |
| - spin_unlock(&pcp->lock); |
| - } |
| + pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); |
| + cci = get_cpu_cacheinfo(cpu); |
| + /* |
| + * If data cache slice of CPU is large enough, "pcp->batch" |
| + * pages can be preserved in PCP before draining PCP for |
| + * consecutive high-order pages freeing without allocation. |
| + * This can reduce zone lock contention without hurting |
| + * cache-hot pages sharing. |
| + */ |
| + spin_lock(&pcp->lock); |
| + if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) |
| + pcp->flags |= PCPF_FREE_HIGH_BATCH; |
| + else |
| + pcp->flags &= ~PCPF_FREE_HIGH_BATCH; |
| + spin_unlock(&pcp->lock); |
| } |
| |
| -void setup_pcp_cacheinfo(void) |
| +void setup_pcp_cacheinfo(unsigned int cpu) |
| { |
| struct zone *zone; |
| |
| for_each_populated_zone(zone) |
| - zone_pcp_update_cacheinfo(zone); |
| + zone_pcp_update_cacheinfo(zone, cpu); |
| } |
| |
| /* |
| _ |