| From: Vlastimil Babka <vbabka@suse.cz> |
| Subject: mm, slab: move memcg charging to post-alloc hook |
| Date: Tue, 26 Mar 2024 11:37:38 +0100 |
| |
| Patch series "memcg_kmem hooks refactoring", v3. |
| |
| |
| This patch (of 2): |
| |
| The MEMCG_KMEM integration with slab currently relies on two hooks during |
| allocation. memcg_slab_pre_alloc_hook() determines the objcg and charges |
| it, and memcg_slab_post_alloc_hook() assigns the objcg pointer to the |
| allocated object(s). |
| |
| As Linus pointed out, this is unnecessarily complex. Failing to charge |
| due to memcg limits should be rare, so we can optimistically allocate the |
| object(s) and do the charging together with assigning the objcg pointer in |
| a single post_alloc hook. In the rare case the charging fails, we can |
| free the object(s) back. |
| |
| This simplifies the code (no need to pass around the objcg pointer) and |
| potentially allows to separate charging from allocation in cases where |
| it's common that the allocation would be immediately freed, and the memcg |
| handling overhead could be saved. |
| |
| [vbabka@suse.cz: fix call to memcg_alloc_abort_single()] |
| Link: https://lkml.kernel.org/r/4af50be2-4109-45e5-8a36-2136252a635e@suse.cz |
| [roman.gushchin@linux.dev: comment fixup] |
| Link: https://lkml.kernel.org/r/Zg2LsNm6twOmG69l@P9FQF9L96D.corp.robot.car |
| Link: https://lkml.kernel.org/r/20240326-slab-memcg-v3-0-d85d2563287a@suse.cz |
| Link: https://lkml.kernel.org/r/20240326-slab-memcg-v3-1-d85d2563287a@suse.cz |
| Signed-off-by: Vlastimil Babka <vbabka@suse.cz> |
| Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev> |
| Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/ |
| Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> |
| Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> |
| Cc: Al Viro <viro@ZenIV.linux.org.uk> |
| Cc: Christian Brauner <brauner@kernel.org> |
| Cc: Christoph Lameter <cl@linux.com> |
| Cc: Chuck Lever <chuck.lever@oracle.com> |
| Cc: David Rientjes <rientjes@google.com> |
| Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> |
| Cc: Jan Kara <jack@suse.cz> |
| Cc: Jeff Layton <jlayton@kernel.org> |
| Cc: Johannes Weiner <hannes@cmpxchg.org> |
| Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> |
| Cc: Josh Poimboeuf <jpoimboe@kernel.org> |
| Cc: Kees Cook <kees@kernel.org> |
| Cc: Michal Hocko <mhocko@kernel.org> |
| Cc: Muchun Song <muchun.song@linux.dev> |
| Cc: Pekka Enberg <penberg@kernel.org> |
| Cc: Shakeel Butt <shakeel.butt@linux.dev> |
| Cc: Aishwarya TCV <aishwarya.tcv@arm.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| mm/memcontrol.c | 2 |
| mm/slub.c | 180 +++++++++++++++++++--------------------------- |
| 2 files changed, 78 insertions(+), 104 deletions(-) |
| |
| --- a/mm/memcontrol.c~mm-slab-move-memcg-charging-to-post-alloc-hook |
| +++ a/mm/memcontrol.c |
| @@ -350,7 +350,7 @@ static void memcg_reparent_objcgs(struct |
| |
| /* |
| * A lot of the calls to the cache allocation functions are expected to be |
| - * inlined by the compiler. Since the calls to memcg_slab_pre_alloc_hook() are |
| + * inlined by the compiler. Since the calls to memcg_slab_post_alloc_hook() are |
| * conditional to this static branch, we'll have to allow modules that does |
| * kmem_cache_alloc and the such to see this symbol as well |
| */ |
| --- a/mm/slub.c~mm-slab-move-memcg-charging-to-post-alloc-hook |
| +++ a/mm/slub.c |
| @@ -2092,23 +2092,36 @@ static inline size_t obj_full_size(struc |
| return s->size + sizeof(struct obj_cgroup *); |
| } |
| |
| -/* |
| - * Returns false if the allocation should fail. |
| - */ |
| -static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s, |
| - struct list_lru *lru, |
| - struct obj_cgroup **objcgp, |
| - size_t objects, gfp_t flags) |
| +static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, |
| + struct list_lru *lru, |
| + gfp_t flags, size_t size, |
| + void **p) |
| { |
| + struct obj_cgroup *objcg; |
| + struct slab *slab; |
| + unsigned long off; |
| + size_t i; |
| + |
| /* |
| * The obtained objcg pointer is safe to use within the current scope, |
| * defined by current task or set_active_memcg() pair. |
| * obj_cgroup_get() is used to get a permanent reference. |
| */ |
| - struct obj_cgroup *objcg = current_obj_cgroup(); |
| + objcg = current_obj_cgroup(); |
| if (!objcg) |
| return true; |
| |
| + /* |
| + * slab_alloc_node() avoids the NULL check, so we might be called with a |
| + * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill |
| + * the whole requested size. |
| + * return success as there's nothing to free back |
| + */ |
| + if (unlikely(*p == NULL)) |
| + return true; |
| + |
| + flags &= gfp_allowed_mask; |
| + |
| if (lru) { |
| int ret; |
| struct mem_cgroup *memcg; |
| @@ -2121,71 +2134,51 @@ static bool __memcg_slab_pre_alloc_hook( |
| return false; |
| } |
| |
| - if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) |
| + if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s))) |
| return false; |
| |
| - *objcgp = objcg; |
| + for (i = 0; i < size; i++) { |
| + slab = virt_to_slab(p[i]); |
| + |
| + if (!slab_obj_exts(slab) && |
| + alloc_slab_obj_exts(slab, s, flags, false)) { |
| + obj_cgroup_uncharge(objcg, obj_full_size(s)); |
| + continue; |
| + } |
| + |
| + off = obj_to_index(s, slab, p[i]); |
| + obj_cgroup_get(objcg); |
| + slab_obj_exts(slab)[off].objcg = objcg; |
| + mod_objcg_state(objcg, slab_pgdat(slab), |
| + cache_vmstat_idx(s), obj_full_size(s)); |
| + } |
| + |
| return true; |
| } |
| |
| -/* |
| - * Returns false if the allocation should fail. |
| - */ |
| +static void memcg_alloc_abort_single(struct kmem_cache *s, void *object); |
| + |
| static __fastpath_inline |
| -bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru, |
| - struct obj_cgroup **objcgp, size_t objects, |
| - gfp_t flags) |
| +bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, |
| + gfp_t flags, size_t size, void **p) |
| { |
| - if (!memcg_kmem_online()) |
| + if (likely(!memcg_kmem_online())) |
| return true; |
| |
| if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))) |
| return true; |
| |
| - return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects, |
| - flags)); |
| -} |
| - |
| -static void __memcg_slab_post_alloc_hook(struct kmem_cache *s, |
| - struct obj_cgroup *objcg, |
| - gfp_t flags, size_t size, |
| - void **p) |
| -{ |
| - struct slab *slab; |
| - unsigned long off; |
| - size_t i; |
| - |
| - flags &= gfp_allowed_mask; |
| - |
| - for (i = 0; i < size; i++) { |
| - if (likely(p[i])) { |
| - slab = virt_to_slab(p[i]); |
| - |
| - if (!slab_obj_exts(slab) && |
| - alloc_slab_obj_exts(slab, s, flags, false)) { |
| - obj_cgroup_uncharge(objcg, obj_full_size(s)); |
| - continue; |
| - } |
| + if (likely(__memcg_slab_post_alloc_hook(s, lru, flags, size, p))) |
| + return true; |
| |
| - off = obj_to_index(s, slab, p[i]); |
| - obj_cgroup_get(objcg); |
| - slab_obj_exts(slab)[off].objcg = objcg; |
| - mod_objcg_state(objcg, slab_pgdat(slab), |
| - cache_vmstat_idx(s), obj_full_size(s)); |
| - } else { |
| - obj_cgroup_uncharge(objcg, obj_full_size(s)); |
| - } |
| + if (likely(size == 1)) { |
| + memcg_alloc_abort_single(s, *p); |
| + *p = NULL; |
| + } else { |
| + kmem_cache_free_bulk(s, size, p); |
| } |
| -} |
| - |
| -static __fastpath_inline |
| -void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, |
| - gfp_t flags, size_t size, void **p) |
| -{ |
| - if (likely(!memcg_kmem_online() || !objcg)) |
| - return; |
| |
| - return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p); |
| + return false; |
| } |
| |
| static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, |
| @@ -2224,40 +2217,19 @@ void memcg_slab_free_hook(struct kmem_ca |
| |
| __memcg_slab_free_hook(s, slab, p, objects, obj_exts); |
| } |
| - |
| -static inline |
| -void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects, |
| - struct obj_cgroup *objcg) |
| -{ |
| - if (objcg) |
| - obj_cgroup_uncharge(objcg, objects * obj_full_size(s)); |
| -} |
| #else /* CONFIG_MEMCG_KMEM */ |
| -static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, |
| - struct list_lru *lru, |
| - struct obj_cgroup **objcgp, |
| - size_t objects, gfp_t flags) |
| -{ |
| - return true; |
| -} |
| - |
| -static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, |
| - struct obj_cgroup *objcg, |
| +static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s, |
| + struct list_lru *lru, |
| gfp_t flags, size_t size, |
| void **p) |
| { |
| + return true; |
| } |
| |
| static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, |
| void **p, int objects) |
| { |
| } |
| - |
| -static inline |
| -void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects, |
| - struct obj_cgroup *objcg) |
| -{ |
| -} |
| #endif /* CONFIG_MEMCG_KMEM */ |
| |
| /* |
| @@ -3937,10 +3909,7 @@ noinline int should_failslab(struct kmem |
| ALLOW_ERROR_INJECTION(should_failslab, ERRNO); |
| |
| static __fastpath_inline |
| -struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, |
| - struct list_lru *lru, |
| - struct obj_cgroup **objcgp, |
| - size_t size, gfp_t flags) |
| +struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) |
| { |
| flags &= gfp_allowed_mask; |
| |
| @@ -3949,14 +3918,11 @@ struct kmem_cache *slab_pre_alloc_hook(s |
| if (unlikely(should_failslab(s, flags))) |
| return NULL; |
| |
| - if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))) |
| - return NULL; |
| - |
| return s; |
| } |
| |
| static __fastpath_inline |
| -void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, |
| +bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, |
| gfp_t flags, size_t size, void **p, bool init, |
| unsigned int orig_size) |
| { |
| @@ -4018,7 +3984,7 @@ void slab_post_alloc_hook(struct kmem_ca |
| } |
| } |
| |
| - memcg_slab_post_alloc_hook(s, objcg, flags, size, p); |
| + return memcg_slab_post_alloc_hook(s, lru, flags, size, p); |
| } |
| |
| /* |
| @@ -4035,10 +4001,9 @@ static __fastpath_inline void *slab_allo |
| gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) |
| { |
| void *object; |
| - struct obj_cgroup *objcg = NULL; |
| bool init = false; |
| |
| - s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags); |
| + s = slab_pre_alloc_hook(s, gfpflags); |
| if (unlikely(!s)) |
| return NULL; |
| |
| @@ -4055,8 +4020,10 @@ out: |
| /* |
| * When init equals 'true', like for kzalloc() family, only |
| * @orig_size bytes might be zeroed instead of s->object_size |
| + * In case this fails due to memcg_slab_post_alloc_hook(), |
| + * object is set to NULL |
| */ |
| - slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size); |
| + slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size); |
| |
| return object; |
| } |
| @@ -4496,6 +4463,16 @@ void slab_free(struct kmem_cache *s, str |
| do_slab_free(s, slab, object, object, 1, addr); |
| } |
| |
| +#ifdef CONFIG_MEMCG_KMEM |
| +/* Do not inline the rare memcg charging failed path into the allocation path */ |
| +static noinline |
| +void memcg_alloc_abort_single(struct kmem_cache *s, void *object) |
| +{ |
| + if (likely(slab_free_hook(s, object, slab_want_init_on_free(s)))) |
| + do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_); |
| +} |
| +#endif |
| + |
| static __fastpath_inline |
| void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head, |
| void *tail, void **p, int cnt, unsigned long addr) |
| @@ -4832,29 +4809,26 @@ int kmem_cache_alloc_bulk_noprof(struct |
| void **p) |
| { |
| int i; |
| - struct obj_cgroup *objcg = NULL; |
| |
| if (!size) |
| return 0; |
| |
| - /* memcg and kmem_cache debug support */ |
| - s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags); |
| + s = slab_pre_alloc_hook(s, flags); |
| if (unlikely(!s)) |
| return 0; |
| |
| i = __kmem_cache_alloc_bulk(s, flags, size, p); |
| + if (unlikely(i == 0)) |
| + return 0; |
| |
| /* |
| * memcg and kmem_cache debug support and memory initialization. |
| * Done outside of the IRQ disabled fastpath loop. |
| */ |
| - if (likely(i != 0)) { |
| - slab_post_alloc_hook(s, objcg, flags, size, p, |
| - slab_want_init_on_alloc(flags, s), s->object_size); |
| - } else { |
| - memcg_slab_alloc_error_hook(s, size, objcg); |
| + if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p, |
| + slab_want_init_on_alloc(flags, s), s->object_size))) { |
| + return 0; |
| } |
| - |
| return i; |
| } |
| EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof); |
| _ |