| From a2e0c0668a3486f96b86c50e02872c8e94fd4f9c Mon Sep 17 00:00:00 2001 |
| From: Usama Arif <usama.arif@linux.dev> |
| Date: Thu, 12 Mar 2026 03:47:23 -0700 |
| Subject: mm: migrate: requeue destination folio on deferred split queue |
| |
| From: Usama Arif <usama.arif@linux.dev> |
| |
| commit a2e0c0668a3486f96b86c50e02872c8e94fd4f9c upstream. |
| |
| During folio migration, __folio_migrate_mapping() removes the source folio |
| from the deferred split queue, but the destination folio is never |
| re-queued. This causes underutilized THPs to escape the shrinker after |
| NUMA migration, since they silently drop off the deferred split list. |
| |
| Fix this by recording whether the source folio was on the deferred split |
| queue and its partially mapped state before move_to_new_folio() unqueues |
| it, and re-queuing the destination folio after a successful migration if |
| it was. |
| |
| By the time migrate_folio_move() runs, partially mapped folios without a |
| pin have already been split by migrate_pages_batch(). So only two cases |
| remain on the deferred list at this point: |
| 1. Partially mapped folios with a pin (split failed). |
| 2. Fully mapped but potentially underused folios. The recorded |
| partially_mapped state is forwarded to deferred_split_folio() so that |
| the destination folio is correctly re-queued in both cases. |
| |
| Because THPs are removed from the deferred_list, THP shinker cannot |
| split the underutilized THPs in time. As a result, users will show |
| less free memory than before. |
| |
| Link: https://lkml.kernel.org/r/20260312104723.1351321-1-usama.arif@linux.dev |
| Fixes: dafff3f4c850 ("mm: split underused THPs") |
| Signed-off-by: Usama Arif <usama.arif@linux.dev> |
| Reported-by: Johannes Weiner <hannes@cmpxchg.org> |
| Acked-by: Johannes Weiner <hannes@cmpxchg.org> |
| Acked-by: Zi Yan <ziy@nvidia.com> |
| Acked-by: David Hildenbrand (Arm) <david@kernel.org> |
| Acked-by: SeongJae Park <sj@kernel.org> |
| Reviewed-by: Wei Yang <richard.weiyang@gmail.com> |
| Cc: Alistair Popple <apopple@nvidia.com> |
| Cc: Byungchul Park <byungchul@sk.com> |
| Cc: Gregory Price <gourry@gourry.net> |
| Cc: "Huang, Ying" <ying.huang@linux.alibaba.com> |
| Cc: Joshua Hahn <joshua.hahnjy@gmail.com> |
| Cc: Matthew Brost <matthew.brost@intel.com> |
| Cc: Matthew Wilcox (Oracle) <willy@infradead.org> |
| Cc: Nico Pache <npache@redhat.com> |
| Cc: Rakie Kim <rakie.kim@sk.com> |
| Cc: Ying Huang <ying.huang@linux.alibaba.com> |
| Cc: <stable@vger.kernel.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| mm/migrate.c | 17 +++++++++++++++++ |
| 1 file changed, 17 insertions(+) |
| |
| --- a/mm/migrate.c |
| +++ b/mm/migrate.c |
| @@ -1358,6 +1358,8 @@ static int migrate_folio_move(free_folio |
| int rc; |
| int old_page_state = 0; |
| struct anon_vma *anon_vma = NULL; |
| + bool src_deferred_split = false; |
| + bool src_partially_mapped = false; |
| struct list_head *prev; |
| |
| __migrate_folio_extract(dst, &old_page_state, &anon_vma); |
| @@ -1371,6 +1373,12 @@ static int migrate_folio_move(free_folio |
| goto out_unlock_both; |
| } |
| |
| + if (folio_order(src) > 1 && |
| + !data_race(list_empty(&src->_deferred_list))) { |
| + src_deferred_split = true; |
| + src_partially_mapped = folio_test_partially_mapped(src); |
| + } |
| + |
| rc = move_to_new_folio(dst, src, mode); |
| if (rc) |
| goto out; |
| @@ -1391,6 +1399,15 @@ static int migrate_folio_move(free_folio |
| if (old_page_state & PAGE_WAS_MAPPED) |
| remove_migration_ptes(src, dst, 0); |
| |
| + /* |
| + * Requeue the destination folio on the deferred split queue if |
| + * the source was on the queue. The source is unqueued in |
| + * __folio_migrate_mapping(), so we recorded the state from |
| + * before move_to_new_folio(). |
| + */ |
| + if (src_deferred_split) |
| + deferred_split_folio(dst, src_partially_mapped); |
| + |
| out_unlock_both: |
| folio_unlock(dst); |
| folio_set_owner_migrate_reason(dst, reason); |