| From: Kinsey Ho <kinseyho@google.com> |
| Subject: mm: restart if multiple traversals raced |
| Date: Thu, 5 Sep 2024 00:30:53 +0000 |
| |
| Currently, if multiple reclaimers raced on the same position, the |
| reclaimers which detect the race will still reclaim from the same memcg. |
| Instead, the reclaimers which detect the race should move on to the next |
| memcg in the hierarchy. |
| |
| So, in the case where multiple traversals race, jump back to the start of |
| the mem_cgroup_iter() function to find the next memcg in the hierarchy to |
| reclaim from. |
| |
| Link: https://lkml.kernel.org/r/20240905003058.1859929-5-kinseyho@google.com |
| Reported-by: syzbot+e099d407346c45275ce9@syzkaller.appspotmail.com |
| Closes: https://lore.kernel.org/000000000000817cf10620e20d33@google.com/ |
| Signed-off-by: Kinsey Ho <kinseyho@google.com> |
| Reviewed-by: T.J. Mercier <tjmercier@google.com> |
| Cc: Johannes Weiner <hannes@cmpxchg.org> |
| Cc: Michal Hocko <mhocko@kernel.org> |
| Cc: Michal Koutný <mkoutny@suse.com> |
| Cc: Muchun Song <muchun.song@linux.dev> |
| Cc: Roman Gushchin <roman.gushchin@linux.dev> |
| Cc: Shakeel Butt <shakeel.butt@linux.dev> |
| Cc: Tejun Heo <tj@kernel.org> |
| Cc: Yosry Ahmed <yosryahmed@google.com> |
| Cc: Zefan Li <lizefan.x@bytedance.com> |
| Cc: Hugh Dickins <hughd@google.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| include/linux/memcontrol.h | 4 ++-- |
| mm/memcontrol.c | 26 +++++++++++++++++--------- |
| 2 files changed, 19 insertions(+), 11 deletions(-) |
| |
| --- a/include/linux/memcontrol.h~mm-restart-if-multiple-traversals-raced |
| +++ a/include/linux/memcontrol.h |
| @@ -57,7 +57,7 @@ enum memcg_memory_event { |
| |
| struct mem_cgroup_reclaim_cookie { |
| pg_data_t *pgdat; |
| - unsigned int generation; |
| + int generation; |
| }; |
| |
| #ifdef CONFIG_MEMCG |
| @@ -78,7 +78,7 @@ struct lruvec_stats; |
| struct mem_cgroup_reclaim_iter { |
| struct mem_cgroup *position; |
| /* scan generation, increased every round-trip */ |
| - unsigned int generation; |
| + atomic_t generation; |
| }; |
| |
| /* |
| --- a/mm/memcontrol.c~mm-restart-if-multiple-traversals-raced |
| +++ a/mm/memcontrol.c |
| @@ -986,8 +986,8 @@ struct mem_cgroup *mem_cgroup_iter(struc |
| struct mem_cgroup_reclaim_cookie *reclaim) |
| { |
| struct mem_cgroup_reclaim_iter *iter; |
| - struct cgroup_subsys_state *css = NULL; |
| - struct mem_cgroup *memcg = NULL; |
| + struct cgroup_subsys_state *css; |
| + struct mem_cgroup *memcg; |
| struct mem_cgroup *pos = NULL; |
| |
| if (mem_cgroup_disabled()) |
| @@ -998,19 +998,23 @@ struct mem_cgroup *mem_cgroup_iter(struc |
| |
| rcu_read_lock(); |
| restart: |
| + memcg = NULL; |
| + |
| if (reclaim) { |
| + int gen; |
| struct mem_cgroup_per_node *mz; |
| |
| mz = root->nodeinfo[reclaim->pgdat->node_id]; |
| iter = &mz->iter; |
| + gen = atomic_read(&iter->generation); |
| |
| /* |
| * On start, join the current reclaim iteration cycle. |
| * Exit when a concurrent walker completes it. |
| */ |
| if (!prev) |
| - reclaim->generation = iter->generation; |
| - else if (reclaim->generation != iter->generation) |
| + reclaim->generation = gen; |
| + else if (reclaim->generation != gen) |
| goto out_unlock; |
| |
| pos = READ_ONCE(iter->position); |
| @@ -1018,8 +1022,7 @@ restart: |
| pos = prev; |
| } |
| |
| - if (pos) |
| - css = &pos->css; |
| + css = pos ? &pos->css : NULL; |
| |
| for (;;) { |
| css = css_next_descendant_pre(css, &root->css); |
| @@ -1033,21 +1036,26 @@ restart: |
| * and kicking, and don't take an extra reference. |
| */ |
| if (css == &root->css || css_tryget(css)) { |
| - memcg = mem_cgroup_from_css(css); |
| break; |
| } |
| } |
| |
| + memcg = mem_cgroup_from_css(css); |
| + |
| if (reclaim) { |
| /* |
| * The position could have already been updated by a competing |
| * thread, so check that the value hasn't changed since we read |
| * it to avoid reclaiming from the same cgroup twice. |
| */ |
| - (void)cmpxchg(&iter->position, pos, memcg); |
| + if (cmpxchg(&iter->position, pos, memcg) != pos) { |
| + if (css && css != &root->css) |
| + css_put(css); |
| + goto restart; |
| + } |
| |
| if (!memcg) { |
| - iter->generation++; |
| + atomic_inc(&iter->generation); |
| |
| /* |
| * Reclaimers share the hierarchy walk, and a |
| _ |