| From: Lance Yang <ioworker0@gmail.com> |
| Subject: mm/rmap: integrate PMD-mapped folio splitting into pagewalk loop |
| Date: Fri, 14 Jun 2024 09:51:37 +0800 |
| |
| In preparation for supporting try_to_unmap_one() to unmap PMD-mapped |
| folios, start the pagewalk first, then call split_huge_pmd_address() to |
| split the folio. |
| |
| Link: https://lkml.kernel.org/r/20240614015138.31461-3-ioworker0@gmail.com |
| Signed-off-by: Lance Yang <ioworker0@gmail.com> |
| Suggested-by: David Hildenbrand <david@redhat.com> |
| Acked-by: David Hildenbrand <david@redhat.com> |
| Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com> |
| Acked-by: Zi Yan <ziy@nvidia.com> |
| Cc: Bang Li <libang.li@antgroup.com> |
| Cc: Barry Song <baohua@kernel.org> |
| Cc: Fangrui Song <maskray@google.com> |
| Cc: Jeff Xie <xiehuan09@gmail.com> |
| Cc: Kefeng Wang <wangkefeng.wang@huawei.com> |
| Cc: Matthew Wilcox (Oracle) <willy@infradead.org> |
| Cc: Michal Hocko <mhocko@suse.com> |
| Cc: Minchan Kim <minchan@kernel.org> |
| Cc: Muchun Song <songmuchun@bytedance.com> |
| Cc: Peter Xu <peterx@redhat.com> |
| Cc: Ryan Roberts <ryan.roberts@arm.com> |
| Cc: SeongJae Park <sj@kernel.org> |
| Cc: Yang Shi <shy828301@gmail.com> |
| Cc: Yin Fengwei <fengwei.yin@intel.com> |
| Cc: Zach O'Keefe <zokeefe@google.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| include/linux/huge_mm.h | 6 +++++ |
| include/linux/rmap.h | 24 +++++++++++++++++++++ |
| mm/huge_memory.c | 42 +++++++++++++++++++------------------- |
| mm/rmap.c | 21 +++++++++++++------ |
| 4 files changed, 67 insertions(+), 26 deletions(-) |
| |
| --- a/include/linux/huge_mm.h~mm-rmap-integrate-pmd-mapped-folio-splitting-into-pagewalk-loop |
| +++ a/include/linux/huge_mm.h |
| @@ -428,6 +428,9 @@ static inline bool thp_migration_support |
| return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); |
| } |
| |
| +void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, |
| + pmd_t *pmd, bool freeze, struct folio *folio); |
| + |
| #else /* CONFIG_TRANSPARENT_HUGEPAGE */ |
| |
| static inline bool folio_test_pmd_mappable(struct folio *folio) |
| @@ -490,6 +493,9 @@ static inline void __split_huge_pmd(stru |
| unsigned long address, bool freeze, struct folio *folio) {} |
| static inline void split_huge_pmd_address(struct vm_area_struct *vma, |
| unsigned long address, bool freeze, struct folio *folio) {} |
| +static inline void split_huge_pmd_locked(struct vm_area_struct *vma, |
| + unsigned long address, pmd_t *pmd, |
| + bool freeze, struct folio *folio) {} |
| |
| #define split_huge_pud(__vma, __pmd, __address) \ |
| do { } while (0) |
| --- a/include/linux/rmap.h~mm-rmap-integrate-pmd-mapped-folio-splitting-into-pagewalk-loop |
| +++ a/include/linux/rmap.h |
| @@ -703,6 +703,30 @@ static inline void page_vma_mapped_walk_ |
| spin_unlock(pvmw->ptl); |
| } |
| |
| +/** |
| + * page_vma_mapped_walk_restart - Restart the page table walk. |
| + * @pvmw: Pointer to struct page_vma_mapped_walk. |
| + * |
| + * It restarts the page table walk when changes occur in the page |
| + * table, such as splitting a PMD. Ensures that the PTL held during |
| + * the previous walk is released and resets the state to allow for |
| + * a new walk starting at the current address stored in pvmw->address. |
| + */ |
| +static inline void |
| +page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw) |
| +{ |
| + WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte); |
| + |
| + if (likely(pvmw->ptl)) |
| + spin_unlock(pvmw->ptl); |
| + else |
| + WARN_ON_ONCE(1); |
| + |
| + pvmw->ptl = NULL; |
| + pvmw->pmd = NULL; |
| + pvmw->pte = NULL; |
| +} |
| + |
| bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); |
| |
| /* |
| --- a/mm/huge_memory.c~mm-rmap-integrate-pmd-mapped-folio-splitting-into-pagewalk-loop |
| +++ a/mm/huge_memory.c |
| @@ -2583,6 +2583,27 @@ static void __split_huge_pmd_locked(stru |
| pmd_populate(mm, pmd, pgtable); |
| } |
| |
| +void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, |
| + pmd_t *pmd, bool freeze, struct folio *folio) |
| +{ |
| + VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio)); |
| + VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE)); |
| + VM_WARN_ON_ONCE(folio && !folio_test_locked(folio)); |
| + VM_BUG_ON(freeze && !folio); |
| + |
| + /* |
| + * When the caller requests to set up a migration entry, we |
| + * require a folio to check the PMD against. Otherwise, there |
| + * is a risk of replacing the wrong folio. |
| + */ |
| + if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || |
| + is_pmd_migration_entry(*pmd)) { |
| + if (folio && folio != pmd_folio(*pmd)) |
| + return; |
| + __split_huge_pmd_locked(vma, pmd, address, freeze); |
| + } |
| +} |
| + |
| void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, |
| unsigned long address, bool freeze, struct folio *folio) |
| { |
| @@ -2594,26 +2615,7 @@ void __split_huge_pmd(struct vm_area_str |
| (address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE); |
| mmu_notifier_invalidate_range_start(&range); |
| ptl = pmd_lock(vma->vm_mm, pmd); |
| - |
| - /* |
| - * If caller asks to setup a migration entry, we need a folio to check |
| - * pmd against. Otherwise we can end up replacing wrong folio. |
| - */ |
| - VM_BUG_ON(freeze && !folio); |
| - VM_WARN_ON_ONCE(folio && !folio_test_locked(folio)); |
| - |
| - if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || |
| - is_pmd_migration_entry(*pmd)) { |
| - /* |
| - * It's safe to call pmd_page when folio is set because it's |
| - * guaranteed that pmd is present. |
| - */ |
| - if (folio && folio != pmd_folio(*pmd)) |
| - goto out; |
| - __split_huge_pmd_locked(vma, pmd, range.start, freeze); |
| - } |
| - |
| -out: |
| + split_huge_pmd_locked(vma, range.start, pmd, freeze, folio); |
| spin_unlock(ptl); |
| mmu_notifier_invalidate_range_end(&range); |
| } |
| --- a/mm/rmap.c~mm-rmap-integrate-pmd-mapped-folio-splitting-into-pagewalk-loop |
| +++ a/mm/rmap.c |
| @@ -1642,9 +1642,6 @@ static bool try_to_unmap_one(struct foli |
| if (flags & TTU_SYNC) |
| pvmw.flags = PVMW_SYNC; |
| |
| - if (flags & TTU_SPLIT_HUGE_PMD) |
| - split_huge_pmd_address(vma, address, false, folio); |
| - |
| /* |
| * For THP, we have to assume the worse case ie pmd for invalidation. |
| * For hugetlb, it could be much worse if we need to do pud |
| @@ -1670,9 +1667,6 @@ static bool try_to_unmap_one(struct foli |
| mmu_notifier_invalidate_range_start(&range); |
| |
| while (page_vma_mapped_walk(&pvmw)) { |
| - /* Unexpected PMD-mapped THP? */ |
| - VM_BUG_ON_FOLIO(!pvmw.pte, folio); |
| - |
| /* |
| * If the folio is in an mlock()d vma, we must not swap it out. |
| */ |
| @@ -1684,6 +1678,21 @@ static bool try_to_unmap_one(struct foli |
| goto walk_abort; |
| } |
| |
| + if (!pvmw.pte && (flags & TTU_SPLIT_HUGE_PMD)) { |
| + /* |
| + * We temporarily have to drop the PTL and start once |
| + * again from that now-PTE-mapped page table. |
| + */ |
| + split_huge_pmd_locked(vma, pvmw.address, pvmw.pmd, |
| + false, folio); |
| + flags &= ~TTU_SPLIT_HUGE_PMD; |
| + page_vma_mapped_walk_restart(&pvmw); |
| + continue; |
| + } |
| + |
| + /* Unexpected PMD-mapped THP? */ |
| + VM_BUG_ON_FOLIO(!pvmw.pte, folio); |
| + |
| pfn = pte_pfn(ptep_get(pvmw.pte)); |
| subpage = folio_page(folio, pfn - folio_pfn(folio)); |
| address = pvmw.address; |
| _ |