| From: David Hildenbrand <david@redhat.com> |
| Subject: mm/highmem: reimplement totalhigh_pages() by walking zones |
| Date: Fri, 7 Jun 2024 10:37:10 +0200 |
| |
| Patch series "mm/highmem: don't track highmem pages manually". |
| |
| Let's remove highmem special-casing from adjust_managed_page_count(), to |
| result in less confusion why memblock manually adjusts totalram_pages, and |
| __free_pages_core() only adjusts the zone's managed pages -- what about |
| the highmem pages that adjust_managed_page_count() updates? |
| |
| Now, we only maintain totalram_pages and a zone's managed pages |
| independent of highmem support. We can derive the number of highmem pages |
| simply by looking at the relevant zone's managed pages. I don't think |
| there is any particular fast path that needs a maximum-efficient |
| totalhigh_pages() implementation. |
| |
| Note that highmem memory is currently initialized using |
| free_highmem_page()->free_reserved_page(), not __free_pages_core(). In |
| the future we might want to also use __free_pages_core() to initialize |
| highmem memory, to make that less special, and consider moving |
| totalram_pages updates into __free_pages_core() [1], so we can just use |
| adjust_managed_page_count() in there as well. |
| |
| Booting a simple kernel in QEMU reveals no highmem accounting change: |
| |
| Before: |
| Memory: 3095448K/3145208K available (14802K kernel code, 2073K rwdata, |
| 5000K rodata, 740K init, 556K bss, 49760K reserved, 0K cma-reserved, |
| 2244488K highmem) |
| |
| After: |
| Memory: 3095276K/3145208K available (14802K kernel code, 2073K rwdata, |
| 5000K rodata, 740K init, 556K bss, 49932K reserved, 0K cma-reserved, |
| 2244488K highmem) |
| |
| [1] https://lkml.kernel.org/r/20240601133402.2675-1-richard.weiyang@gmail.com |
| |
| |
| This patch (of 2): |
| |
| Can we get rid of the highmem ifdef in adjust_managed_page_count()? |
| Likely yes: we don't have that many totalhigh_pages() users, and they all |
| don't seem to be very performance critical. |
| |
| So let's implement totalhigh_pages() like nr_free_highpages(), collecting |
| information from all zones. This is now similar to what we do in |
| si_meminfo_node() to collect the per-node highmem page count. |
| |
| In the common case (single node, 3-4 zones), we really shouldn't care. We |
| could optimize a bit further (only walk ZONE_HIGHMEM and ZONE_MOVABLE if |
| required), but there doesn't seem a real need for that. |
| |
| [david@redhat.com: fix build bot complaint] |
| Link: https://lkml.kernel.org/r/b57e5bc4-eb72-40e3-add4-57dfa6e03df6@redhat.com |
| Link: https://lkml.kernel.org/r/20240607083711.62833-1-david@redhat.com |
| Link: https://lkml.kernel.org/r/20240607083711.62833-2-david@redhat.com |
| Signed-off-by: David Hildenbrand <david@redhat.com> |
| Reviewed-by: Wei Yang <richard.weiyang@gmail.com> |
| Reviewed-by: Oscar Salvador <osalvador@suse.de> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| include/linux/highmem-internal.h | 9 ++------- |
| mm/highmem.c | 17 ++++++++++++++--- |
| mm/page_alloc.c | 4 ---- |
| 3 files changed, 16 insertions(+), 14 deletions(-) |
| |
| --- a/include/linux/highmem-internal.h~mm-highmem-reimplement-totalhigh_pages-by-walking-zones |
| +++ a/include/linux/highmem-internal.h |
| @@ -132,7 +132,7 @@ static inline void __kunmap_atomic(const |
| } |
| |
| unsigned int __nr_free_highpages(void); |
| -extern atomic_long_t _totalhigh_pages; |
| +unsigned long __totalhigh_pages(void); |
| |
| static inline unsigned int nr_free_highpages(void) |
| { |
| @@ -141,12 +141,7 @@ static inline unsigned int nr_free_highp |
| |
| static inline unsigned long totalhigh_pages(void) |
| { |
| - return (unsigned long)atomic_long_read(&_totalhigh_pages); |
| -} |
| - |
| -static inline void totalhigh_pages_add(long count) |
| -{ |
| - atomic_long_add(count, &_totalhigh_pages); |
| + return __totalhigh_pages(); |
| } |
| |
| static inline bool is_kmap_addr(const void *x) |
| --- a/mm/highmem.c~mm-highmem-reimplement-totalhigh_pages-by-walking-zones |
| +++ a/mm/highmem.c |
| @@ -111,9 +111,6 @@ static inline wait_queue_head_t *get_pkm |
| } |
| #endif |
| |
| -atomic_long_t _totalhigh_pages __read_mostly; |
| -EXPORT_SYMBOL(_totalhigh_pages); |
| - |
| unsigned int __nr_free_highpages(void) |
| { |
| struct zone *zone; |
| @@ -127,6 +124,20 @@ unsigned int __nr_free_highpages(void) |
| return pages; |
| } |
| |
| +unsigned long __totalhigh_pages(void) |
| +{ |
| + unsigned long pages = 0; |
| + struct zone *zone; |
| + |
| + for_each_populated_zone(zone) { |
| + if (is_highmem(zone)) |
| + pages += zone_managed_pages(zone); |
| + } |
| + |
| + return pages; |
| +} |
| +EXPORT_SYMBOL(__totalhigh_pages); |
| + |
| static int pkmap_count[LAST_PKMAP]; |
| static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); |
| |
| --- a/mm/page_alloc.c~mm-highmem-reimplement-totalhigh_pages-by-walking-zones |
| +++ a/mm/page_alloc.c |
| @@ -5756,10 +5756,6 @@ void adjust_managed_page_count(struct pa |
| { |
| atomic_long_add(count, &page_zone(page)->managed_pages); |
| totalram_pages_add(count); |
| -#ifdef CONFIG_HIGHMEM |
| - if (PageHighMem(page)) |
| - totalhigh_pages_add(count); |
| -#endif |
| } |
| EXPORT_SYMBOL(adjust_managed_page_count); |
| |
| _ |