| From: Hugh Dickins <hughd@google.com> |
| Subject: mm,thp,rmap: subpages_mapcount of PTE-mapped subpages |
| Date: Tue, 22 Nov 2022 01:42:04 -0800 (PST) |
| |
| Patch series "mm,thp,rmap: rework the use of subpages_mapcount", v2. |
| |
| |
| This patch (of 3): |
| |
| Following suggestion from Linus, instead of counting every PTE map of a |
| compound page in subpages_mapcount, just count how many of its subpages |
| are PTE-mapped: this yields the exact number needed for NR_ANON_MAPPED and |
| NR_FILE_MAPPED stats, without any need for a locked scan of subpages; and |
| requires updating the count less often. |
| |
| This does then revert total_mapcount() and folio_mapcount() to needing a |
| scan of subpages; but they are inherently racy, and need no locking, so |
| Linus is right that the scans are much better done there. Plus (unlike in |
| 6.1 and previous) subpages_mapcount lets us avoid the scan in the common |
| case of no PTE maps. And page_mapped() and folio_mapped() remain scanless |
| and just as efficient with the new meaning of subpages_mapcount: those are |
| the functions which I most wanted to remove the scan from. |
| |
| The updated page_dup_compound_rmap() is no longer suitable for use by anon |
| THP's __split_huge_pmd_locked(); but page_add_anon_rmap() can be used for |
| that, so long as its VM_BUG_ON_PAGE(!PageLocked) is deleted. |
| |
| Evidence is that this way goes slightly faster than the previous |
| implementation for most cases; but significantly faster in the (now |
| scanless) pmds after ptes case, which started out at 870ms and was brought |
| down to 495ms by the previous series, now takes around 105ms. |
| |
| Link: https://lkml.kernel.org/r/a5849eca-22f1-3517-bf29-95d982242742@google.com |
| Link: https://lkml.kernel.org/r/eec17e16-4e1-7c59-f1bc-5bca90dac919@google.com |
| Signed-off-by: Hugh Dickins <hughd@google.com> |
| Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> |
| Cc: Dan Carpenter <error27@gmail.com> |
| Cc: David Hildenbrand <david@redhat.com> |
| Cc: James Houghton <jthoughton@google.com> |
| Cc: Johannes Weiner <hannes@cmpxchg.org> |
| Cc: John Hubbard <jhubbard@nvidia.com> |
| Cc: Matthew Wilcox <willy@infradead.org> |
| Cc: Miaohe Lin <linmiaohe@huawei.com> |
| Cc: Mike Kravetz <mike.kravetz@oracle.com> |
| Cc: Mina Almasry <almasrymina@google.com> |
| Cc: Muchun Song <songmuchun@bytedance.com> |
| Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev> |
| Cc: Peter Xu <peterx@redhat.com> |
| Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com> |
| Cc: Vlastimil Babka <vbabka@suse.cz> |
| Cc: Yang Shi <shy828301@gmail.com> |
| Cc: Yu Zhao <yuzhao@google.com> |
| Cc: Zach O'Keefe <zokeefe@google.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| Documentation/mm/transhuge.rst | 3 |
| include/linux/mm.h | 52 ++++++---- |
| include/linux/rmap.h | 9 - |
| mm/huge_memory.c | 2 |
| mm/rmap.c | 160 +++++++++++++------------------ |
| 5 files changed, 107 insertions(+), 119 deletions(-) |
| |
| --- a/Documentation/mm/transhuge.rst~mmthprmap-subpages_mapcount-of-pte-mapped-subpages |
| +++ a/Documentation/mm/transhuge.rst |
| @@ -122,7 +122,8 @@ pages: |
| |
| - map/unmap of sub-pages with PTE entry increment/decrement ->_mapcount |
| on relevant sub-page of the compound page, and also increment/decrement |
| - ->subpages_mapcount, stored in first tail page of the compound page. |
| + ->subpages_mapcount, stored in first tail page of the compound page, when |
| + _mapcount goes from -1 to 0 or 0 to -1: counting sub-pages mapped by PTE. |
| In order to have race-free accounting of sub-pages mapped, changes to |
| sub-page ->_mapcount, ->subpages_mapcount and ->compound_mapcount are |
| are all locked by bit_spin_lock of PG_locked in the first tail ->flags. |
| --- a/include/linux/mm.h~mmthprmap-subpages_mapcount-of-pte-mapped-subpages |
| +++ a/include/linux/mm.h |
| @@ -837,7 +837,7 @@ static inline int head_compound_mapcount |
| } |
| |
| /* |
| - * Sum of mapcounts of sub-pages, does not include compound mapcount. |
| + * Number of sub-pages mapped by PTE, does not include compound mapcount. |
| * Must be called only on head of compound page. |
| */ |
| static inline int head_subpages_mapcount(struct page *head) |
| @@ -873,23 +873,7 @@ static inline int page_mapcount(struct p |
| return head_compound_mapcount(page) + mapcount; |
| } |
| |
| -static inline int total_mapcount(struct page *page) |
| -{ |
| - if (likely(!PageCompound(page))) |
| - return atomic_read(&page->_mapcount) + 1; |
| - page = compound_head(page); |
| - return head_compound_mapcount(page) + head_subpages_mapcount(page); |
| -} |
| - |
| -/* |
| - * Return true if this page is mapped into pagetables. |
| - * For compound page it returns true if any subpage of compound page is mapped, |
| - * even if this particular subpage is not itself mapped by any PTE or PMD. |
| - */ |
| -static inline bool page_mapped(struct page *page) |
| -{ |
| - return total_mapcount(page) > 0; |
| -} |
| +int total_compound_mapcount(struct page *head); |
| |
| /** |
| * folio_mapcount() - Calculate the number of mappings of this folio. |
| @@ -906,8 +890,20 @@ static inline int folio_mapcount(struct |
| { |
| if (likely(!folio_test_large(folio))) |
| return atomic_read(&folio->_mapcount) + 1; |
| - return atomic_read(folio_mapcount_ptr(folio)) + 1 + |
| - atomic_read(folio_subpages_mapcount_ptr(folio)); |
| + return total_compound_mapcount(&folio->page); |
| +} |
| + |
| +static inline int total_mapcount(struct page *page) |
| +{ |
| + if (likely(!PageCompound(page))) |
| + return atomic_read(&page->_mapcount) + 1; |
| + return total_compound_mapcount(compound_head(page)); |
| +} |
| + |
| +static inline bool folio_large_is_mapped(struct folio *folio) |
| +{ |
| + return atomic_read(folio_mapcount_ptr(folio)) + |
| + atomic_read(folio_subpages_mapcount_ptr(folio)) >= 0; |
| } |
| |
| /** |
| @@ -918,7 +914,21 @@ static inline int folio_mapcount(struct |
| */ |
| static inline bool folio_mapped(struct folio *folio) |
| { |
| - return folio_mapcount(folio) > 0; |
| + if (likely(!folio_test_large(folio))) |
| + return atomic_read(&folio->_mapcount) >= 0; |
| + return folio_large_is_mapped(folio); |
| +} |
| + |
| +/* |
| + * Return true if this page is mapped into pagetables. |
| + * For compound page it returns true if any sub-page of compound page is mapped, |
| + * even if this particular sub-page is not itself mapped by any PTE or PMD. |
| + */ |
| +static inline bool page_mapped(struct page *page) |
| +{ |
| + if (likely(!PageCompound(page))) |
| + return atomic_read(&page->_mapcount) >= 0; |
| + return folio_large_is_mapped(page_folio(page)); |
| } |
| |
| static inline struct page *virt_to_head_page(const void *x) |
| --- a/include/linux/rmap.h~mmthprmap-subpages_mapcount-of-pte-mapped-subpages |
| +++ a/include/linux/rmap.h |
| @@ -204,14 +204,15 @@ void hugepage_add_anon_rmap(struct page |
| void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, |
| unsigned long address); |
| |
| -void page_dup_compound_rmap(struct page *page, bool compound); |
| +void page_dup_compound_rmap(struct page *page); |
| |
| static inline void page_dup_file_rmap(struct page *page, bool compound) |
| { |
| - if (PageCompound(page)) |
| - page_dup_compound_rmap(page, compound); |
| - else |
| + /* Is page being mapped by PTE? */ |
| + if (likely(!compound)) |
| atomic_inc(&page->_mapcount); |
| + else |
| + page_dup_compound_rmap(page); |
| } |
| |
| /** |
| --- a/mm/huge_memory.c~mmthprmap-subpages_mapcount-of-pte-mapped-subpages |
| +++ a/mm/huge_memory.c |
| @@ -2221,7 +2221,7 @@ static void __split_huge_pmd_locked(stru |
| BUG_ON(!pte_none(*pte)); |
| set_pte_at(mm, addr, pte, entry); |
| if (!pmd_migration) |
| - page_dup_compound_rmap(page + i, false); |
| + page_add_anon_rmap(page + i, vma, addr, false); |
| pte_unmap(pte); |
| } |
| |
| --- a/mm/rmap.c~mmthprmap-subpages_mapcount-of-pte-mapped-subpages |
| +++ a/mm/rmap.c |
| @@ -1117,55 +1117,36 @@ static void unlock_compound_mapcounts(st |
| bit_spin_unlock(PG_locked, &head[1].flags); |
| } |
| |
| -/* |
| - * When acting on a compound page under lock_compound_mapcounts(), avoid the |
| - * unnecessary overhead of an actual atomic operation on its subpage mapcount. |
| - * Return true if this is the first increment or the last decrement |
| - * (remembering that page->_mapcount -1 represents logical mapcount 0). |
| - */ |
| -static bool subpage_mapcount_inc(struct page *page) |
| -{ |
| - int orig_mapcount = atomic_read(&page->_mapcount); |
| - |
| - atomic_set(&page->_mapcount, orig_mapcount + 1); |
| - return orig_mapcount < 0; |
| -} |
| - |
| -static bool subpage_mapcount_dec(struct page *page) |
| -{ |
| - int orig_mapcount = atomic_read(&page->_mapcount); |
| - |
| - atomic_set(&page->_mapcount, orig_mapcount - 1); |
| - return orig_mapcount == 0; |
| -} |
| - |
| -/* |
| - * When mapping a THP's first pmd, or unmapping its last pmd, if that THP |
| - * also has pte mappings, then those must be discounted: in order to maintain |
| - * NR_ANON_MAPPED and NR_FILE_MAPPED statistics exactly, without any drift, |
| - * and to decide when an anon THP should be put on the deferred split queue. |
| - * This function must be called between lock_ and unlock_compound_mapcounts(). |
| - */ |
| -static int nr_subpages_unmapped(struct page *head, int nr_subpages) |
| +int total_compound_mapcount(struct page *head) |
| { |
| - int nr = nr_subpages; |
| + int mapcount = head_compound_mapcount(head); |
| + int nr_subpages; |
| int i; |
| |
| - /* Discount those subpages mapped by pte */ |
| + /* In the common case, avoid the loop when no subpages mapped by PTE */ |
| + if (head_subpages_mapcount(head) == 0) |
| + return mapcount; |
| + /* |
| + * Add all the PTE mappings of those subpages mapped by PTE. |
| + * Limit the loop, knowing that only subpages_mapcount are mapped? |
| + * Perhaps: given all the raciness, that may be a good or a bad idea. |
| + */ |
| + nr_subpages = thp_nr_pages(head); |
| for (i = 0; i < nr_subpages; i++) |
| - if (atomic_read(&head[i]._mapcount) >= 0) |
| - nr--; |
| - return nr; |
| + mapcount += atomic_read(&head[i]._mapcount); |
| + |
| + /* But each of those _mapcounts was based on -1 */ |
| + mapcount += nr_subpages; |
| + return mapcount; |
| } |
| |
| /* |
| - * page_dup_compound_rmap(), used when copying mm, or when splitting pmd, |
| + * page_dup_compound_rmap(), used when copying mm, |
| * provides a simple example of using lock_ and unlock_compound_mapcounts(). |
| */ |
| -void page_dup_compound_rmap(struct page *page, bool compound) |
| +void page_dup_compound_rmap(struct page *head) |
| { |
| struct compound_mapcounts mapcounts; |
| - struct page *head; |
| |
| /* |
| * Hugetlb pages could use lock_compound_mapcounts(), like THPs do; |
| @@ -1176,20 +1157,15 @@ void page_dup_compound_rmap(struct page |
| * Note that hugetlb does not call page_add_file_rmap(): |
| * here is where hugetlb shared page mapcount is raised. |
| */ |
| - if (PageHuge(page)) { |
| - atomic_inc(compound_mapcount_ptr(page)); |
| - return; |
| - } |
| + if (PageHuge(head)) { |
| + atomic_inc(compound_mapcount_ptr(head)); |
| + } else if (PageTransHuge(head)) { |
| + /* That test is redundant: it's for safety or to optimize out */ |
| |
| - head = compound_head(page); |
| - lock_compound_mapcounts(head, &mapcounts); |
| - if (compound) { |
| + lock_compound_mapcounts(head, &mapcounts); |
| mapcounts.compound_mapcount++; |
| - } else { |
| - mapcounts.subpages_mapcount++; |
| - subpage_mapcount_inc(page); |
| + unlock_compound_mapcounts(head, &mapcounts); |
| } |
| - unlock_compound_mapcounts(head, &mapcounts); |
| } |
| |
| /** |
| @@ -1304,35 +1280,34 @@ void page_add_anon_rmap(struct page *pag |
| struct compound_mapcounts mapcounts; |
| int nr = 0, nr_pmdmapped = 0; |
| bool compound = flags & RMAP_COMPOUND; |
| - bool first; |
| + bool first = true; |
| |
| if (unlikely(PageKsm(page))) |
| lock_page_memcg(page); |
| - else |
| - VM_BUG_ON_PAGE(!PageLocked(page), page); |
| |
| - if (likely(!PageCompound(page))) { |
| + /* Is page being mapped by PTE? Is this its first map to be added? */ |
| + if (likely(!compound)) { |
| first = atomic_inc_and_test(&page->_mapcount); |
| nr = first; |
| + if (first && PageCompound(page)) { |
| + struct page *head = compound_head(page); |
| + |
| + lock_compound_mapcounts(head, &mapcounts); |
| + mapcounts.subpages_mapcount++; |
| + nr = !mapcounts.compound_mapcount; |
| + unlock_compound_mapcounts(head, &mapcounts); |
| + } |
| + } else if (PageTransHuge(page)) { |
| + /* That test is redundant: it's for safety or to optimize out */ |
| |
| - } else if (compound && PageTransHuge(page)) { |
| lock_compound_mapcounts(page, &mapcounts); |
| first = !mapcounts.compound_mapcount; |
| mapcounts.compound_mapcount++; |
| if (first) { |
| - nr = nr_pmdmapped = thp_nr_pages(page); |
| - if (mapcounts.subpages_mapcount) |
| - nr = nr_subpages_unmapped(page, nr_pmdmapped); |
| + nr_pmdmapped = thp_nr_pages(page); |
| + nr = nr_pmdmapped - mapcounts.subpages_mapcount; |
| } |
| unlock_compound_mapcounts(page, &mapcounts); |
| - } else { |
| - struct page *head = compound_head(page); |
| - |
| - lock_compound_mapcounts(head, &mapcounts); |
| - mapcounts.subpages_mapcount++; |
| - first = subpage_mapcount_inc(page); |
| - nr = first && !mapcounts.compound_mapcount; |
| - unlock_compound_mapcounts(head, &mapcounts); |
| } |
| |
| VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page); |
| @@ -1411,28 +1386,29 @@ void page_add_file_rmap(struct page *pag |
| VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); |
| lock_page_memcg(page); |
| |
| - if (likely(!PageCompound(page))) { |
| + /* Is page being mapped by PTE? Is this its first map to be added? */ |
| + if (likely(!compound)) { |
| first = atomic_inc_and_test(&page->_mapcount); |
| nr = first; |
| + if (first && PageCompound(page)) { |
| + struct page *head = compound_head(page); |
| + |
| + lock_compound_mapcounts(head, &mapcounts); |
| + mapcounts.subpages_mapcount++; |
| + nr = !mapcounts.compound_mapcount; |
| + unlock_compound_mapcounts(head, &mapcounts); |
| + } |
| + } else if (PageTransHuge(page)) { |
| + /* That test is redundant: it's for safety or to optimize out */ |
| |
| - } else if (compound && PageTransHuge(page)) { |
| lock_compound_mapcounts(page, &mapcounts); |
| first = !mapcounts.compound_mapcount; |
| mapcounts.compound_mapcount++; |
| if (first) { |
| - nr = nr_pmdmapped = thp_nr_pages(page); |
| - if (mapcounts.subpages_mapcount) |
| - nr = nr_subpages_unmapped(page, nr_pmdmapped); |
| + nr_pmdmapped = thp_nr_pages(page); |
| + nr = nr_pmdmapped - mapcounts.subpages_mapcount; |
| } |
| unlock_compound_mapcounts(page, &mapcounts); |
| - } else { |
| - struct page *head = compound_head(page); |
| - |
| - lock_compound_mapcounts(head, &mapcounts); |
| - mapcounts.subpages_mapcount++; |
| - first = subpage_mapcount_inc(page); |
| - nr = first && !mapcounts.compound_mapcount; |
| - unlock_compound_mapcounts(head, &mapcounts); |
| } |
| |
| if (nr_pmdmapped) |
| @@ -1471,29 +1447,29 @@ void page_remove_rmap(struct page *page, |
| |
| lock_page_memcg(page); |
| |
| - /* page still mapped by someone else? */ |
| - if (likely(!PageCompound(page))) { |
| + /* Is page being unmapped by PTE? Is this its last map to be removed? */ |
| + if (likely(!compound)) { |
| last = atomic_add_negative(-1, &page->_mapcount); |
| nr = last; |
| + if (last && PageCompound(page)) { |
| + struct page *head = compound_head(page); |
| + |
| + lock_compound_mapcounts(head, &mapcounts); |
| + mapcounts.subpages_mapcount--; |
| + nr = !mapcounts.compound_mapcount; |
| + unlock_compound_mapcounts(head, &mapcounts); |
| + } |
| + } else if (PageTransHuge(page)) { |
| + /* That test is redundant: it's for safety or to optimize out */ |
| |
| - } else if (compound && PageTransHuge(page)) { |
| lock_compound_mapcounts(page, &mapcounts); |
| mapcounts.compound_mapcount--; |
| last = !mapcounts.compound_mapcount; |
| if (last) { |
| - nr = nr_pmdmapped = thp_nr_pages(page); |
| - if (mapcounts.subpages_mapcount) |
| - nr = nr_subpages_unmapped(page, nr_pmdmapped); |
| + nr_pmdmapped = thp_nr_pages(page); |
| + nr = nr_pmdmapped - mapcounts.subpages_mapcount; |
| } |
| unlock_compound_mapcounts(page, &mapcounts); |
| - } else { |
| - struct page *head = compound_head(page); |
| - |
| - lock_compound_mapcounts(head, &mapcounts); |
| - mapcounts.subpages_mapcount--; |
| - last = subpage_mapcount_dec(page); |
| - nr = last && !mapcounts.compound_mapcount; |
| - unlock_compound_mapcounts(head, &mapcounts); |
| } |
| |
| if (nr_pmdmapped) { |
| _ |