| From: Yu Zhao <yuzhao@google.com> |
| Subject: mm: multi-gen LRU: clarify scan_control flags |
| Date: Wed, 21 Dec 2022 21:19:05 -0700 |
| |
| Among the flags in scan_control: |
| 1. sc->may_swap, which indicates swap constraint due to memsw.max, is |
| supported as usual. |
| 2. sc->proactive, which indicates reclaim by memory.reclaim, may not |
| opportunistically skip the aging path, since it is considered less |
| latency sensitive. |
| 3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers |
| swappiness to prioritize file LRU, since clean file folios are more |
| likely to exist. |
| 4. sc->may_writepage and sc->may_unmap, which indicates opportunistic |
| reclaim, are rejected, since unmapped clean folios are already |
| prioritized. Scanning for more of them is likely futile and can |
| cause high reclaim latency when there is a large number of memcgs. |
| |
| The rest are handled by the existing code. |
| |
| Link: https://lkml.kernel.org/r/20221222041905.2431096-8-yuzhao@google.com |
| Signed-off-by: Yu Zhao <yuzhao@google.com> |
| Cc: Johannes Weiner <hannes@cmpxchg.org> |
| Cc: Jonathan Corbet <corbet@lwn.net> |
| Cc: Michael Larabel <Michael@MichaelLarabel.com> |
| Cc: Michal Hocko <mhocko@kernel.org> |
| Cc: Mike Rapoport <rppt@kernel.org> |
| Cc: Roman Gushchin <roman.gushchin@linux.dev> |
| Cc: Suren Baghdasaryan <surenb@google.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| mm/vmscan.c | 56 +++++++++++++++++++++++++------------------------- |
| 1 file changed, 28 insertions(+), 28 deletions(-) |
| |
| --- a/mm/vmscan.c~mm-multi-gen-lru-clarify-scan_control-flags |
| +++ a/mm/vmscan.c |
| @@ -3210,6 +3210,9 @@ static int get_swappiness(struct lruvec |
| struct mem_cgroup *memcg = lruvec_memcg(lruvec); |
| struct pglist_data *pgdat = lruvec_pgdat(lruvec); |
| |
| + if (!sc->may_swap) |
| + return 0; |
| + |
| if (!can_demote(pgdat->node_id, sc) && |
| mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH) |
| return 0; |
| @@ -4236,7 +4239,7 @@ static void walk_mm(struct lruvec *lruve |
| } while (err == -EAGAIN); |
| } |
| |
| -static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat) |
| +static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc) |
| { |
| struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk; |
| |
| @@ -4244,7 +4247,7 @@ static struct lru_gen_mm_walk *set_mm_wa |
| VM_WARN_ON_ONCE(walk); |
| |
| walk = &pgdat->mm_walk; |
| - } else if (!pgdat && !walk) { |
| + } else if (!walk && force_alloc) { |
| VM_WARN_ON_ONCE(current_is_kswapd()); |
| |
| walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); |
| @@ -4430,7 +4433,7 @@ static bool try_to_inc_max_seq(struct lr |
| goto done; |
| } |
| |
| - walk = set_mm_walk(NULL); |
| + walk = set_mm_walk(NULL, true); |
| if (!walk) { |
| success = iterate_mm_list_nowalk(lruvec, max_seq); |
| goto done; |
| @@ -4499,8 +4502,6 @@ static bool lruvec_is_reclaimable(struct |
| struct mem_cgroup *memcg = lruvec_memcg(lruvec); |
| DEFINE_MIN_SEQ(lruvec); |
| |
| - VM_WARN_ON_ONCE(sc->memcg_low_reclaim); |
| - |
| /* see the comment on lru_gen_folio */ |
| gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); |
| birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); |
| @@ -4756,12 +4757,8 @@ static bool isolate_folio(struct lruvec |
| { |
| bool success; |
| |
| - /* unmapping inhibited */ |
| - if (!sc->may_unmap && folio_mapped(folio)) |
| - return false; |
| - |
| /* swapping inhibited */ |
| - if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) && |
| + if (!(sc->gfp_mask & __GFP_IO) && |
| (folio_test_dirty(folio) || |
| (folio_test_anon(folio) && !folio_test_swapcache(folio)))) |
| return false; |
| @@ -4858,9 +4855,8 @@ static int scan_folios(struct lruvec *lr |
| __count_vm_events(PGSCAN_ANON + type, isolated); |
| |
| /* |
| - * There might not be eligible pages due to reclaim_idx, may_unmap and |
| - * may_writepage. Check the remaining to prevent livelock if it's not |
| - * making progress. |
| + * There might not be eligible folios due to reclaim_idx. Check the |
| + * remaining to prevent livelock if it's not making progress. |
| */ |
| return isolated || !remaining ? scanned : 0; |
| } |
| @@ -5120,9 +5116,7 @@ static long get_nr_to_scan(struct lruvec |
| struct mem_cgroup *memcg = lruvec_memcg(lruvec); |
| DEFINE_MAX_SEQ(lruvec); |
| |
| - if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg) || |
| - (mem_cgroup_below_low(sc->target_mem_cgroup, memcg) && |
| - !sc->memcg_low_reclaim)) |
| + if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg)) |
| return 0; |
| |
| if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) |
| @@ -5150,17 +5144,14 @@ static bool try_to_shrink_lruvec(struct |
| long nr_to_scan; |
| unsigned long scanned = 0; |
| unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); |
| + int swappiness = get_swappiness(lruvec, sc); |
| + |
| + /* clean file folios are more likely to exist */ |
| + if (swappiness && !(sc->gfp_mask & __GFP_IO)) |
| + swappiness = 1; |
| |
| while (true) { |
| int delta; |
| - int swappiness; |
| - |
| - if (sc->may_swap) |
| - swappiness = get_swappiness(lruvec, sc); |
| - else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc)) |
| - swappiness = 1; |
| - else |
| - swappiness = 0; |
| |
| nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); |
| if (nr_to_scan <= 0) |
| @@ -5291,12 +5282,13 @@ static void lru_gen_shrink_lruvec(struct |
| struct blk_plug plug; |
| |
| VM_WARN_ON_ONCE(global_reclaim(sc)); |
| + VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap); |
| |
| lru_add_drain(); |
| |
| blk_start_plug(&plug); |
| |
| - set_mm_walk(lruvec_pgdat(lruvec)); |
| + set_mm_walk(NULL, sc->proactive); |
| |
| if (try_to_shrink_lruvec(lruvec, sc)) |
| lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG); |
| @@ -5352,11 +5344,19 @@ static void lru_gen_shrink_node(struct p |
| |
| VM_WARN_ON_ONCE(!global_reclaim(sc)); |
| |
| + /* |
| + * Unmapped clean folios are already prioritized. Scanning for more of |
| + * them is likely futile and can cause high reclaim latency when there |
| + * is a large number of memcgs. |
| + */ |
| + if (!sc->may_writepage || !sc->may_unmap) |
| + goto done; |
| + |
| lru_add_drain(); |
| |
| blk_start_plug(&plug); |
| |
| - set_mm_walk(pgdat); |
| + set_mm_walk(pgdat, sc->proactive); |
| |
| set_initial_priority(pgdat, sc); |
| |
| @@ -5374,7 +5374,7 @@ static void lru_gen_shrink_node(struct p |
| clear_mm_walk(); |
| |
| blk_finish_plug(&plug); |
| - |
| +done: |
| /* kswapd should never fail */ |
| pgdat->kswapd_failures = 0; |
| } |
| @@ -5943,7 +5943,7 @@ static ssize_t lru_gen_seq_write(struct |
| set_task_reclaim_state(current, &sc.reclaim_state); |
| flags = memalloc_noreclaim_save(); |
| blk_start_plug(&plug); |
| - if (!set_mm_walk(NULL)) { |
| + if (!set_mm_walk(NULL, true)) { |
| err = -ENOMEM; |
| goto done; |
| } |
| _ |