| From: Lorenzo Stoakes <lstoakes@gmail.com> |
| Subject: mm: mlock: use folios and a folio batch internally |
| Date: Thu, 12 Jan 2023 12:39:29 +0000 |
| |
| This brings mlock in line with the folio batches declared in mm/swap.c and |
| makes the code more consistent across the two. |
| |
| The existing mechanism for identifying which operation each folio in the |
| batch is undergoing is maintained, i.e. using the lower 2 bits of the |
| struct folio address (previously struct page address). This should |
| continue to function correctly as folios remain at least system |
| word-aligned. |
| |
| All invocations of mlock() pass either a non-compound page or the head of |
| a THP-compound page and no tail pages need updating so this functionality |
| works with struct folios being used internally rather than struct pages. |
| |
| In this patch the external interface is kept identical to before in order |
| to maintain separation between patches in the series, using a rather |
| awkward conversion from struct page to struct folio in relevant functions. |
| |
| However, this maintenance of the existing interface is intended to be |
| temporary - the next patch in the series will update the interfaces to |
| accept folios directly. |
| |
| Link: https://lkml.kernel.org/r/9f894d54d568773f4ed3cb0eef5f8932f62c95f4.1673526881.git.lstoakes@gmail.com |
| Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com> |
| Acked-by: Vlastimil Babka <vbabka@suse.cz> |
| Cc: Christian Brauner <brauner@kernel.org> |
| Cc: Geert Uytterhoeven <geert@linux-m68k.org> |
| Cc: Hugh Dickins <hughd@google.com> |
| Cc: Joel Fernandes (Google) <joel@joelfernandes.org> |
| Cc: Jonathan Corbet <corbet@lwn.net> |
| Cc: Liam R. Howlett <Liam.Howlett@oracle.com> |
| Cc: Matthew Wilcox <willy@infradead.org> |
| Cc: Mike Rapoport (IBM) <rppt@kernel.org> |
| Cc: William Kucharski <william.kucharski@oracle.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| |
| --- a/mm/mlock.c~mm-mlock-use-folios-and-a-folio-batch-internally |
| +++ a/mm/mlock.c |
| @@ -28,12 +28,12 @@ |
| |
| #include "internal.h" |
| |
| -struct mlock_pvec { |
| +struct mlock_fbatch { |
| local_lock_t lock; |
| - struct pagevec vec; |
| + struct folio_batch fbatch; |
| }; |
| |
| -static DEFINE_PER_CPU(struct mlock_pvec, mlock_pvec) = { |
| +static DEFINE_PER_CPU(struct mlock_fbatch, mlock_fbatch) = { |
| .lock = INIT_LOCAL_LOCK(lock), |
| }; |
| |
| @@ -48,192 +48,192 @@ bool can_do_mlock(void) |
| EXPORT_SYMBOL(can_do_mlock); |
| |
| /* |
| - * Mlocked pages are marked with PageMlocked() flag for efficient testing |
| + * Mlocked folios are marked with the PG_mlocked flag for efficient testing |
| * in vmscan and, possibly, the fault path; and to support semi-accurate |
| * statistics. |
| * |
| - * An mlocked page [PageMlocked(page)] is unevictable. As such, it will |
| - * be placed on the LRU "unevictable" list, rather than the [in]active lists. |
| - * The unevictable list is an LRU sibling list to the [in]active lists. |
| - * PageUnevictable is set to indicate the unevictable state. |
| + * An mlocked folio [folio_test_mlocked(folio)] is unevictable. As such, it |
| + * will be ostensibly placed on the LRU "unevictable" list (actually no such |
| + * list exists), rather than the [in]active lists. PG_unevictable is set to |
| + * indicate the unevictable state. |
| */ |
| |
| -static struct lruvec *__mlock_page(struct page *page, struct lruvec *lruvec) |
| +static struct lruvec *__mlock_folio(struct folio *folio, struct lruvec *lruvec) |
| { |
| /* There is nothing more we can do while it's off LRU */ |
| - if (!TestClearPageLRU(page)) |
| + if (!folio_test_clear_lru(folio)) |
| return lruvec; |
| |
| - lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); |
| + lruvec = folio_lruvec_relock_irq(folio, lruvec); |
| |
| - if (unlikely(page_evictable(page))) { |
| + if (unlikely(folio_evictable(folio))) { |
| /* |
| - * This is a little surprising, but quite possible: |
| - * PageMlocked must have got cleared already by another CPU. |
| - * Could this page be on the Unevictable LRU? I'm not sure, |
| - * but move it now if so. |
| + * This is a little surprising, but quite possible: PG_mlocked |
| + * must have got cleared already by another CPU. Could this |
| + * folio be unevictable? I'm not sure, but move it now if so. |
| */ |
| - if (PageUnevictable(page)) { |
| - del_page_from_lru_list(page, lruvec); |
| - ClearPageUnevictable(page); |
| - add_page_to_lru_list(page, lruvec); |
| + if (folio_test_unevictable(folio)) { |
| + lruvec_del_folio(lruvec, folio); |
| + folio_clear_unevictable(folio); |
| + lruvec_add_folio(lruvec, folio); |
| + |
| __count_vm_events(UNEVICTABLE_PGRESCUED, |
| - thp_nr_pages(page)); |
| + folio_nr_pages(folio)); |
| } |
| goto out; |
| } |
| |
| - if (PageUnevictable(page)) { |
| - if (PageMlocked(page)) |
| - page->mlock_count++; |
| + if (folio_test_unevictable(folio)) { |
| + if (folio_test_mlocked(folio)) |
| + folio->mlock_count++; |
| goto out; |
| } |
| |
| - del_page_from_lru_list(page, lruvec); |
| - ClearPageActive(page); |
| - SetPageUnevictable(page); |
| - page->mlock_count = !!PageMlocked(page); |
| - add_page_to_lru_list(page, lruvec); |
| - __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page)); |
| + lruvec_del_folio(lruvec, folio); |
| + folio_clear_active(folio); |
| + folio_set_unevictable(folio); |
| + folio->mlock_count = !!folio_test_mlocked(folio); |
| + lruvec_add_folio(lruvec, folio); |
| + __count_vm_events(UNEVICTABLE_PGCULLED, folio_nr_pages(folio)); |
| out: |
| - SetPageLRU(page); |
| + folio_set_lru(folio); |
| return lruvec; |
| } |
| |
| -static struct lruvec *__mlock_new_page(struct page *page, struct lruvec *lruvec) |
| +static struct lruvec *__mlock_new_folio(struct folio *folio, struct lruvec *lruvec) |
| { |
| - VM_BUG_ON_PAGE(PageLRU(page), page); |
| + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); |
| |
| - lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); |
| + lruvec = folio_lruvec_relock_irq(folio, lruvec); |
| |
| /* As above, this is a little surprising, but possible */ |
| - if (unlikely(page_evictable(page))) |
| + if (unlikely(folio_evictable(folio))) |
| goto out; |
| |
| - SetPageUnevictable(page); |
| - page->mlock_count = !!PageMlocked(page); |
| - __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page)); |
| + folio_set_unevictable(folio); |
| + folio->mlock_count = !!folio_test_mlocked(folio); |
| + __count_vm_events(UNEVICTABLE_PGCULLED, folio_nr_pages(folio)); |
| out: |
| - add_page_to_lru_list(page, lruvec); |
| - SetPageLRU(page); |
| + lruvec_add_folio(lruvec, folio); |
| + folio_set_lru(folio); |
| return lruvec; |
| } |
| |
| -static struct lruvec *__munlock_page(struct page *page, struct lruvec *lruvec) |
| +static struct lruvec *__munlock_folio(struct folio *folio, struct lruvec *lruvec) |
| { |
| - int nr_pages = thp_nr_pages(page); |
| + int nr_pages = folio_nr_pages(folio); |
| bool isolated = false; |
| |
| - if (!TestClearPageLRU(page)) |
| + if (!folio_test_clear_lru(folio)) |
| goto munlock; |
| |
| isolated = true; |
| - lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); |
| + lruvec = folio_lruvec_relock_irq(folio, lruvec); |
| |
| - if (PageUnevictable(page)) { |
| + if (folio_test_unevictable(folio)) { |
| /* Then mlock_count is maintained, but might undercount */ |
| - if (page->mlock_count) |
| - page->mlock_count--; |
| - if (page->mlock_count) |
| + if (folio->mlock_count) |
| + folio->mlock_count--; |
| + if (folio->mlock_count) |
| goto out; |
| } |
| /* else assume that was the last mlock: reclaim will fix it if not */ |
| |
| munlock: |
| - if (TestClearPageMlocked(page)) { |
| - __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); |
| - if (isolated || !PageUnevictable(page)) |
| + if (folio_test_clear_mlocked(folio)) { |
| + __zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages); |
| + if (isolated || !folio_test_unevictable(folio)) |
| __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); |
| else |
| __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); |
| } |
| |
| - /* page_evictable() has to be checked *after* clearing Mlocked */ |
| - if (isolated && PageUnevictable(page) && page_evictable(page)) { |
| - del_page_from_lru_list(page, lruvec); |
| - ClearPageUnevictable(page); |
| - add_page_to_lru_list(page, lruvec); |
| + /* folio_evictable() has to be checked *after* clearing Mlocked */ |
| + if (isolated && folio_test_unevictable(folio) && folio_evictable(folio)) { |
| + lruvec_del_folio(lruvec, folio); |
| + folio_clear_unevictable(folio); |
| + lruvec_add_folio(lruvec, folio); |
| __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); |
| } |
| out: |
| if (isolated) |
| - SetPageLRU(page); |
| + folio_set_lru(folio); |
| return lruvec; |
| } |
| |
| /* |
| - * Flags held in the low bits of a struct page pointer on the mlock_pvec. |
| + * Flags held in the low bits of a struct folio pointer on the mlock_fbatch. |
| */ |
| -#define LRU_PAGE 0x1 |
| -#define NEW_PAGE 0x2 |
| -static inline struct page *mlock_lru(struct page *page) |
| +#define LRU_FOLIO 0x1 |
| +#define NEW_FOLIO 0x2 |
| +static inline struct folio *mlock_lru(struct folio *folio) |
| { |
| - return (struct page *)((unsigned long)page + LRU_PAGE); |
| + return (struct folio *)((unsigned long)folio + LRU_FOLIO); |
| } |
| |
| -static inline struct page *mlock_new(struct page *page) |
| +static inline struct folio *mlock_new(struct folio *folio) |
| { |
| - return (struct page *)((unsigned long)page + NEW_PAGE); |
| + return (struct folio *)((unsigned long)folio + NEW_FOLIO); |
| } |
| |
| /* |
| - * mlock_pagevec() is derived from pagevec_lru_move_fn(): |
| - * perhaps that can make use of such page pointer flags in future, |
| - * but for now just keep it for mlock. We could use three separate |
| - * pagevecs instead, but one feels better (munlocking a full pagevec |
| - * does not need to drain mlocking pagevecs first). |
| + * mlock_folio_batch() is derived from folio_batch_move_lru(): perhaps that can |
| + * make use of such folio pointer flags in future, but for now just keep it for |
| + * mlock. We could use three separate folio batches instead, but one feels |
| + * better (munlocking a full folio batch does not need to drain mlocking folio |
| + * batches first). |
| */ |
| -static void mlock_pagevec(struct pagevec *pvec) |
| +static void mlock_folio_batch(struct folio_batch *fbatch) |
| { |
| struct lruvec *lruvec = NULL; |
| unsigned long mlock; |
| - struct page *page; |
| + struct folio *folio; |
| int i; |
| |
| - for (i = 0; i < pagevec_count(pvec); i++) { |
| - page = pvec->pages[i]; |
| - mlock = (unsigned long)page & (LRU_PAGE | NEW_PAGE); |
| - page = (struct page *)((unsigned long)page - mlock); |
| - pvec->pages[i] = page; |
| - |
| - if (mlock & LRU_PAGE) |
| - lruvec = __mlock_page(page, lruvec); |
| - else if (mlock & NEW_PAGE) |
| - lruvec = __mlock_new_page(page, lruvec); |
| + for (i = 0; i < folio_batch_count(fbatch); i++) { |
| + folio = fbatch->folios[i]; |
| + mlock = (unsigned long)folio & (LRU_FOLIO | NEW_FOLIO); |
| + folio = (struct folio *)((unsigned long)folio - mlock); |
| + fbatch->folios[i] = folio; |
| + |
| + if (mlock & LRU_FOLIO) |
| + lruvec = __mlock_folio(folio, lruvec); |
| + else if (mlock & NEW_FOLIO) |
| + lruvec = __mlock_new_folio(folio, lruvec); |
| else |
| - lruvec = __munlock_page(page, lruvec); |
| + lruvec = __munlock_folio(folio, lruvec); |
| } |
| |
| if (lruvec) |
| unlock_page_lruvec_irq(lruvec); |
| - release_pages(pvec->pages, pvec->nr); |
| - pagevec_reinit(pvec); |
| + release_pages(fbatch->folios, fbatch->nr); |
| + folio_batch_reinit(fbatch); |
| } |
| |
| void mlock_page_drain_local(void) |
| { |
| - struct pagevec *pvec; |
| + struct folio_batch *fbatch; |
| |
| - local_lock(&mlock_pvec.lock); |
| - pvec = this_cpu_ptr(&mlock_pvec.vec); |
| - if (pagevec_count(pvec)) |
| - mlock_pagevec(pvec); |
| - local_unlock(&mlock_pvec.lock); |
| + local_lock(&mlock_fbatch.lock); |
| + fbatch = this_cpu_ptr(&mlock_fbatch.fbatch); |
| + if (folio_batch_count(fbatch)) |
| + mlock_folio_batch(fbatch); |
| + local_unlock(&mlock_fbatch.lock); |
| } |
| |
| void mlock_page_drain_remote(int cpu) |
| { |
| - struct pagevec *pvec; |
| + struct folio_batch *fbatch; |
| |
| WARN_ON_ONCE(cpu_online(cpu)); |
| - pvec = &per_cpu(mlock_pvec.vec, cpu); |
| - if (pagevec_count(pvec)) |
| - mlock_pagevec(pvec); |
| + fbatch = &per_cpu(mlock_fbatch.fbatch, cpu); |
| + if (folio_batch_count(fbatch)) |
| + mlock_folio_batch(fbatch); |
| } |
| |
| bool need_mlock_page_drain(int cpu) |
| { |
| - return pagevec_count(&per_cpu(mlock_pvec.vec, cpu)); |
| + return folio_batch_count(&per_cpu(mlock_fbatch.fbatch, cpu)); |
| } |
| |
| /** |
| @@ -242,10 +242,10 @@ bool need_mlock_page_drain(int cpu) |
| */ |
| void mlock_folio(struct folio *folio) |
| { |
| - struct pagevec *pvec; |
| + struct folio_batch *fbatch; |
| |
| - local_lock(&mlock_pvec.lock); |
| - pvec = this_cpu_ptr(&mlock_pvec.vec); |
| + local_lock(&mlock_fbatch.lock); |
| + fbatch = this_cpu_ptr(&mlock_fbatch.fbatch); |
| |
| if (!folio_test_set_mlocked(folio)) { |
| int nr_pages = folio_nr_pages(folio); |
| @@ -255,10 +255,10 @@ void mlock_folio(struct folio *folio) |
| } |
| |
| folio_get(folio); |
| - if (!pagevec_add(pvec, mlock_lru(&folio->page)) || |
| + if (!folio_batch_add(fbatch, mlock_lru(folio)) || |
| folio_test_large(folio) || lru_cache_disabled()) |
| - mlock_pagevec(pvec); |
| - local_unlock(&mlock_pvec.lock); |
| + mlock_folio_batch(fbatch); |
| + local_unlock(&mlock_fbatch.lock); |
| } |
| |
| /** |
| @@ -267,20 +267,22 @@ void mlock_folio(struct folio *folio) |
| */ |
| void mlock_new_page(struct page *page) |
| { |
| - struct pagevec *pvec; |
| - int nr_pages = thp_nr_pages(page); |
| + struct folio_batch *fbatch; |
| + struct folio *folio = page_folio(page); |
| + int nr_pages = folio_nr_pages(folio); |
| + |
| + local_lock(&mlock_fbatch.lock); |
| + fbatch = this_cpu_ptr(&mlock_fbatch.fbatch); |
| + folio_set_mlocked(folio); |
| |
| - local_lock(&mlock_pvec.lock); |
| - pvec = this_cpu_ptr(&mlock_pvec.vec); |
| - SetPageMlocked(page); |
| - mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); |
| + zone_stat_mod_folio(folio, NR_MLOCK, nr_pages); |
| __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); |
| |
| - get_page(page); |
| - if (!pagevec_add(pvec, mlock_new(page)) || |
| - PageHead(page) || lru_cache_disabled()) |
| - mlock_pagevec(pvec); |
| - local_unlock(&mlock_pvec.lock); |
| + folio_get(folio); |
| + if (!folio_batch_add(fbatch, mlock_new(folio)) || |
| + folio_test_large(folio) || lru_cache_disabled()) |
| + mlock_folio_batch(fbatch); |
| + local_unlock(&mlock_fbatch.lock); |
| } |
| |
| /** |
| @@ -289,20 +291,20 @@ void mlock_new_page(struct page *page) |
| */ |
| void munlock_page(struct page *page) |
| { |
| - struct pagevec *pvec; |
| + struct folio_batch *fbatch; |
| + struct folio *folio = page_folio(page); |
| |
| - local_lock(&mlock_pvec.lock); |
| - pvec = this_cpu_ptr(&mlock_pvec.vec); |
| + local_lock(&mlock_fbatch.lock); |
| + fbatch = this_cpu_ptr(&mlock_fbatch.fbatch); |
| /* |
| - * TestClearPageMlocked(page) must be left to __munlock_page(), |
| - * which will check whether the page is multiply mlocked. |
| + * folio_test_clear_mlocked(folio) must be left to __munlock_folio(), |
| + * which will check whether the folio is multiply mlocked. |
| */ |
| - |
| - get_page(page); |
| - if (!pagevec_add(pvec, page) || |
| - PageHead(page) || lru_cache_disabled()) |
| - mlock_pagevec(pvec); |
| - local_unlock(&mlock_pvec.lock); |
| + folio_get(folio); |
| + if (!folio_batch_add(fbatch, folio) || |
| + folio_test_large(folio) || lru_cache_disabled()) |
| + mlock_folio_batch(fbatch); |
| + local_unlock(&mlock_fbatch.lock); |
| } |
| |
| static int mlock_pte_range(pmd_t *pmd, unsigned long addr, |
| _ |