| From: Luiz Capitulino <luizcap@redhat.com> |
| Subject: mm/util: introduce snapshot_page() |
| Date: Mon, 14 Jul 2025 09:16:52 -0400 |
| |
| This commit refactors __dump_page() into snapshot_page(). |
| |
| snapshot_page() tries to take a faithful snapshot of a page and its folio |
| representation. The snapshot is returned in the struct page_snapshot |
| parameter along with additional flags that are best retrieved at snapshot |
| creation time to reduce race windows. |
| |
| This function is intended to be used by callers that need a stable |
| representation of a struct page and struct folio so that pointers or page |
| information doesn't change while working on a page. |
| |
| The idea and original implementation of snapshot_page() comes from Matthew |
| Wilcox with suggestions for improvements from David Hildenbrand. All bugs |
| and misconceptions are mine. |
| |
| [luizcap@redhat.com: fix set_ps_flags() commentary] |
| Link: https://lkml.kernel.org/r/d5c75701-b353-4536-a306-187fab0655b3@redhat.com |
| Link: https://lkml.kernel.org/r/637a03a05cb2e3df88f84ff9e9f9642374ef813a.1752499009.git.luizcap@redhat.com |
| Signed-off-by: Luiz Capitulino <luizcap@redhat.com> |
| Reviewed-by: Shivank Garg <shivankg@amd.com> |
| Tested-by: Harry Yoo <harry.yoo@oracle.com> |
| Acked-by: David Hildenbrand <david@redhat.com> |
| Cc: Matthew Wilcox (Oracle) <willy@infradead.org> |
| Cc: Oscar Salvador <osalvador@suse.de> |
| Cc: SeongJae Park <sj@kernel.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| include/linux/mm.h | 19 ++++++++++ |
| mm/debug.c | 42 ++-------------------- |
| mm/util.c | 81 +++++++++++++++++++++++++++++++++++++++++++ |
| 3 files changed, 104 insertions(+), 38 deletions(-) |
| |
| --- a/include/linux/mm.h~mm-util-introduce-snapshot_page |
| +++ a/include/linux/mm.h |
| @@ -4199,4 +4199,23 @@ static inline bool page_pool_page_is_pp( |
| } |
| #endif |
| |
| +#define PAGE_SNAPSHOT_FAITHFUL (1 << 0) |
| +#define PAGE_SNAPSHOT_PG_BUDDY (1 << 1) |
| +#define PAGE_SNAPSHOT_PG_IDLE (1 << 2) |
| + |
| +struct page_snapshot { |
| + struct folio folio_snapshot; |
| + struct page page_snapshot; |
| + unsigned long pfn; |
| + unsigned long idx; |
| + unsigned long flags; |
| +}; |
| + |
| +static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps) |
| +{ |
| + return ps->flags & PAGE_SNAPSHOT_FAITHFUL; |
| +} |
| + |
| +void snapshot_page(struct page_snapshot *ps, const struct page *page); |
| + |
| #endif /* _LINUX_MM_H */ |
| --- a/mm/debug.c~mm-util-introduce-snapshot_page |
| +++ a/mm/debug.c |
| @@ -129,47 +129,13 @@ static void __dump_folio(struct folio *f |
| |
| static void __dump_page(const struct page *page) |
| { |
| - struct folio *foliop, folio; |
| - struct page precise; |
| - unsigned long head; |
| - unsigned long pfn = page_to_pfn(page); |
| - unsigned long idx, nr_pages = 1; |
| - int loops = 5; |
| + struct page_snapshot ps; |
| |
| -again: |
| - memcpy(&precise, page, sizeof(*page)); |
| - head = precise.compound_head; |
| - if ((head & 1) == 0) { |
| - foliop = (struct folio *)&precise; |
| - idx = 0; |
| - if (!folio_test_large(foliop)) |
| - goto dump; |
| - foliop = (struct folio *)page; |
| - } else { |
| - foliop = (struct folio *)(head - 1); |
| - idx = folio_page_idx(foliop, page); |
| - } |
| - |
| - if (idx < MAX_FOLIO_NR_PAGES) { |
| - memcpy(&folio, foliop, 2 * sizeof(struct page)); |
| - nr_pages = folio_nr_pages(&folio); |
| - if (nr_pages > 1) |
| - memcpy(&folio.__page_2, &foliop->__page_2, |
| - sizeof(struct page)); |
| - foliop = &folio; |
| - } |
| - |
| - if (idx > nr_pages) { |
| - if (loops-- > 0) |
| - goto again; |
| + snapshot_page(&ps, page); |
| + if (!snapshot_page_is_faithful(&ps)) |
| pr_warn("page does not match folio\n"); |
| - precise.compound_head &= ~1UL; |
| - foliop = (struct folio *)&precise; |
| - idx = 0; |
| - } |
| |
| -dump: |
| - __dump_folio(foliop, &precise, pfn, idx); |
| + __dump_folio(&ps.folio_snapshot, &ps.page_snapshot, ps.pfn, ps.idx); |
| } |
| |
| void dump_page(const struct page *page, const char *reason) |
| --- a/mm/util.c~mm-util-introduce-snapshot_page |
| +++ a/mm/util.c |
| @@ -25,6 +25,7 @@ |
| #include <linux/sizes.h> |
| #include <linux/compat.h> |
| #include <linux/fsnotify.h> |
| +#include <linux/page_idle.h> |
| |
| #include <linux/uaccess.h> |
| |
| @@ -1172,6 +1173,86 @@ int compat_vma_mmap_prepare(struct file |
| } |
| EXPORT_SYMBOL(compat_vma_mmap_prepare); |
| |
| +static void set_ps_flags(struct page_snapshot *ps, const struct folio *folio, |
| + const struct page *page) |
| +{ |
| + /* |
| + * Only the first page of a high-order buddy page has PageBuddy() set. |
| + * So we have to check manually whether this page is part of a high- |
| + * order buddy page. |
| + */ |
| + if (PageBuddy(page)) |
| + ps->flags |= PAGE_SNAPSHOT_PG_BUDDY; |
| + else if (page_count(page) == 0 && is_free_buddy_page(page)) |
| + ps->flags |= PAGE_SNAPSHOT_PG_BUDDY; |
| + |
| + if (folio_test_idle(folio)) |
| + ps->flags |= PAGE_SNAPSHOT_PG_IDLE; |
| +} |
| + |
| +/** |
| + * snapshot_page() - Create a snapshot of a struct page |
| + * @ps: Pointer to a struct page_snapshot to store the page snapshot |
| + * @page: The page to snapshot |
| + * |
| + * Create a snapshot of the page and store both its struct page and struct |
| + * folio representations in @ps. |
| + * |
| + * A snapshot is marked as "faithful" if the compound state of @page was |
| + * stable and allowed safe reconstruction of the folio representation. In |
| + * rare cases where this is not possible (e.g. due to folio splitting), |
| + * snapshot_page() falls back to treating @page as a single page and the |
| + * snapshot is marked as "unfaithful". The snapshot_page_is_faithful() |
| + * helper can be used to check for this condition. |
| + */ |
| +void snapshot_page(struct page_snapshot *ps, const struct page *page) |
| +{ |
| + unsigned long head, nr_pages = 1; |
| + struct folio *foliop; |
| + int loops = 5; |
| + |
| + ps->pfn = page_to_pfn(page); |
| + ps->flags = PAGE_SNAPSHOT_FAITHFUL; |
| + |
| +again: |
| + memset(&ps->folio_snapshot, 0, sizeof(struct folio)); |
| + memcpy(&ps->page_snapshot, page, sizeof(*page)); |
| + head = ps->page_snapshot.compound_head; |
| + if ((head & 1) == 0) { |
| + ps->idx = 0; |
| + foliop = (struct folio *)&ps->page_snapshot; |
| + if (!folio_test_large(foliop)) { |
| + set_ps_flags(ps, page_folio(page), page); |
| + memcpy(&ps->folio_snapshot, foliop, |
| + sizeof(struct page)); |
| + return; |
| + } |
| + foliop = (struct folio *)page; |
| + } else { |
| + foliop = (struct folio *)(head - 1); |
| + ps->idx = folio_page_idx(foliop, page); |
| + } |
| + |
| + if (ps->idx < MAX_FOLIO_NR_PAGES) { |
| + memcpy(&ps->folio_snapshot, foliop, 2 * sizeof(struct page)); |
| + nr_pages = folio_nr_pages(&ps->folio_snapshot); |
| + if (nr_pages > 1) |
| + memcpy(&ps->folio_snapshot.__page_2, &foliop->__page_2, |
| + sizeof(struct page)); |
| + set_ps_flags(ps, foliop, page); |
| + } |
| + |
| + if (ps->idx > nr_pages) { |
| + if (loops-- > 0) |
| + goto again; |
| + clear_compound_head(&ps->page_snapshot); |
| + foliop = (struct folio *)&ps->page_snapshot; |
| + memcpy(&ps->folio_snapshot, foliop, sizeof(struct page)); |
| + ps->flags = 0; |
| + ps->idx = 0; |
| + } |
| +} |
| + |
| #ifdef CONFIG_MMU |
| /** |
| * folio_pte_batch - detect a PTE batch for a large folio |
| _ |