| From: Johannes Weiner <hannes@cmpxchg.org> |
| Subject: mm-memcontrol-dont-throttle-dying-tasks-on-memoryhigh-v2 |
| Date: Thu, 11 Jan 2024 14:28:07 -0500 |
| |
| also handle the case when tasks are being killed during the reclaim |
| |
| Link: https://lkml.kernel.org/r/20240111192807.GA424308@cmpxchg.org |
| Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> |
| Cc: Tejun Heo <tj@kernel.org> |
| Cc: Yosry Ahmed <yosryahmed@google.com> |
| Cc: Shakeel Butt <shakeelb@google.com> |
| Cc: Roman Gushchin <roman.gushchin@linux.dev> |
| Cc: Dan Schatzberg <schatzberg.dan@gmail.com> |
| Cc: Michal Hocko <mhocko@kernel.org> |
| Cc: Muchun Song <muchun.song@linux.dev> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| mm/memcontrol.c | 37 ++++++++++++++++++++----------------- |
| 1 file changed, 20 insertions(+), 17 deletions(-) |
| |
| --- a/mm/memcontrol.c~mm-memcontrol-dont-throttle-dying-tasks-on-memoryhigh-v2 |
| +++ a/mm/memcontrol.c |
| @@ -2624,8 +2624,8 @@ static unsigned long calculate_high_dela |
| |
| /* |
| * Reclaims memory over the high limit. Called directly from |
| - * try_charge() when possible, but also scheduled to be called from |
| - * the userland return path where reclaim is always able to block. |
| + * try_charge() (context permitting), as well as from the userland |
| + * return path where reclaim is always able to block. |
| */ |
| void mem_cgroup_handle_over_high(gfp_t gfp_mask) |
| { |
| @@ -2645,6 +2645,17 @@ void mem_cgroup_handle_over_high(gfp_t g |
| |
| retry_reclaim: |
| /* |
| + * Bail if the task is already exiting. Unlike memory.max, |
| + * memory.high enforcement isn't as strict, and there is no |
| + * OOM killer involved, which means the excess could already |
| + * be much bigger (and still growing) than it could for |
| + * memory.max; the dying task could get stuck in fruitless |
| + * reclaim for a long time, which isn't desirable. |
| + */ |
| + if (task_is_dying()) |
| + goto out; |
| + |
| + /* |
| * The allocating task should reclaim at least the batch size, but for |
| * subsequent retries we only want to do what's necessary to prevent oom |
| * or breaching resource isolation. |
| @@ -2892,24 +2903,16 @@ done_restock: |
| } while ((memcg = parent_mem_cgroup(memcg))); |
| |
| /* |
| - * Reclaim is scheduled for the userland return path already, |
| - * but also attempt synchronous reclaim to avoid excessive |
| - * overrun while the task is still inside the kernel. If this |
| - * is successful, the return path will see it when it rechecks |
| - * the overage, and simply bail out. |
| - * |
| - * Skip if the task is already dying, though. Unlike |
| - * memory.max, memory.high enforcement isn't as strict, and |
| - * there is no OOM killer involved, which means the excess |
| - * could already be much bigger (and still growing) than it |
| - * could for memory.max; the dying task could get stuck in |
| - * fruitless reclaim for a long time, which isn't desirable. |
| + * Reclaim is set up above to be called from the userland |
| + * return path. But also attempt synchronous reclaim to avoid |
| + * excessive overrun while the task is still inside the |
| + * kernel. If this is successful, the return path will see it |
| + * when it rechecks the overage and simply bail out. |
| */ |
| if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH && |
| - !(current->flags & PF_MEMALLOC) && !task_is_dying() && |
| - gfpflags_allow_blocking(gfp_mask)) { |
| + !(current->flags & PF_MEMALLOC) && |
| + gfpflags_allow_blocking(gfp_mask)) |
| mem_cgroup_handle_over_high(gfp_mask); |
| - } |
| return 0; |
| } |
| |
| _ |