| From: Vitaly Wool <vitaly.wool@konsulko.se> |
| Subject: mm/slub: allow to set node and align in k[v]realloc |
| Date: Wed, 6 Aug 2025 14:41:47 +0200 |
| |
| Reimplement k[v]realloc_node() to be able to set node and alignment should |
| a user need to do so. In order to do that while retaining the maximal |
| backward compatibility, add k[v]realloc_node_align() functions and |
| redefine the rest of API using these new ones. |
| |
| While doing that, we also keep the number of _noprof variants to a |
| minimum, which implies some changes to the existing users of older _noprof |
| functions, that basically being bcachefs. |
| |
| With that change we also provide the ability for the Rust part of the |
| kernel to set node and alignment in its K[v]xxx [re]allocations. |
| |
| Link: https://lkml.kernel.org/r/20250806124147.1724658-1-vitaly.wool@konsulko.se |
| Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se> |
| Reviewed-by: Vlastimil Babka <vbabka@suse.cz> |
| Cc: Alice Ryhl <aliceryhl@google.com> |
| Cc: Danilo Krummrich <dakr@kernel.org> |
| Cc: Herbert Xu <herbert@gondor.apana.org.au> |
| Cc: Jann Horn <jannh@google.com> |
| Cc: Kent Overstreet <kent.overstreet@linux.dev> |
| Cc: Liam Howlett <liam.howlett@oracle.com> |
| Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> |
| Cc: Uladzislau Rezki (Sony) <urezki@gmail.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| fs/bcachefs/darray.c | 2 - |
| fs/bcachefs/util.h | 2 - |
| include/linux/bpfptr.h | 2 - |
| include/linux/slab.h | 39 +++++++++++++++---------- |
| lib/rhashtable.c | 4 +- |
| mm/slub.c | 59 +++++++++++++++++++++++++++++---------- |
| 6 files changed, 74 insertions(+), 34 deletions(-) |
| |
| --- a/fs/bcachefs/darray.c~mm-slub-allow-to-set-node-and-align-in-krealloc |
| +++ a/fs/bcachefs/darray.c |
| @@ -21,7 +21,7 @@ int __bch2_darray_resize_noprof(darray_c |
| return -ENOMEM; |
| |
| void *data = likely(bytes < INT_MAX) |
| - ? kvmalloc_noprof(bytes, gfp) |
| + ? kvmalloc_node_align_noprof(bytes, 1, gfp, NUMA_NO_NODE) |
| : vmalloc_noprof(bytes); |
| if (!data) |
| return -ENOMEM; |
| --- a/fs/bcachefs/util.h~mm-slub-allow-to-set-node-and-align-in-krealloc |
| +++ a/fs/bcachefs/util.h |
| @@ -61,7 +61,7 @@ static inline void *bch2_kvmalloc_noprof |
| { |
| void *p = unlikely(n >= INT_MAX) |
| ? vmalloc_noprof(n) |
| - : kvmalloc_noprof(n, flags & ~__GFP_ZERO); |
| + : kvmalloc_node_align_noprof(n, 1, flags & ~__GFP_ZERO, NUMA_NO_NODE); |
| if (p && (flags & __GFP_ZERO)) |
| memset(p, 0, n); |
| return p; |
| --- a/include/linux/bpfptr.h~mm-slub-allow-to-set-node-and-align-in-krealloc |
| +++ a/include/linux/bpfptr.h |
| @@ -67,7 +67,7 @@ static inline int copy_to_bpfptr_offset( |
| |
| static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len) |
| { |
| - void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN); |
| + void *p = kvmalloc_node_align_noprof(len, 1, GFP_USER | __GFP_NOWARN, NUMA_NO_NODE); |
| |
| if (!p) |
| return ERR_PTR(-ENOMEM); |
| --- a/include/linux/slab.h~mm-slub-allow-to-set-node-and-align-in-krealloc |
| +++ a/include/linux/slab.h |
| @@ -465,9 +465,13 @@ int kmem_cache_shrink(struct kmem_cache |
| /* |
| * Common kmalloc functions provided by all allocators |
| */ |
| -void * __must_check krealloc_noprof(const void *objp, size_t new_size, |
| - gfp_t flags) __realloc_size(2); |
| -#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__)) |
| +void * __must_check krealloc_node_align_noprof(const void *objp, size_t new_size, |
| + unsigned long align, |
| + gfp_t flags, int nid) __realloc_size(2); |
| +#define krealloc_noprof(_o, _s, _f) krealloc_node_align_noprof(_o, _s, 1, _f, NUMA_NO_NODE) |
| +#define krealloc_node_align(...) alloc_hooks(krealloc_node_align_noprof(__VA_ARGS__)) |
| +#define krealloc_node(_o, _s, _f, _n) krealloc_node_align(_o, _s, 1, _f, _n) |
| +#define krealloc(...) krealloc_node(__VA_ARGS__, NUMA_NO_NODE) |
| |
| void kfree(const void *objp); |
| void kfree_sensitive(const void *objp); |
| @@ -1041,18 +1045,20 @@ static inline __alloc_size(1) void *kzal |
| #define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) |
| #define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) |
| |
| -void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1); |
| -#define kvmalloc_node_noprof(size, flags, node) \ |
| - __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node) |
| -#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) |
| - |
| -#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) |
| -#define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE) |
| +void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align, |
| + gfp_t flags, int node) __alloc_size(1); |
| +#define kvmalloc_node_align_noprof(_size, _align, _flags, _node) \ |
| + __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, NULL), _align, _flags, _node) |
| +#define kvmalloc_node_align(...) \ |
| + alloc_hooks(kvmalloc_node_align_noprof(__VA_ARGS__)) |
| +#define kvmalloc_node(_s, _f, _n) kvmalloc_node_align(_s, 1, _f, _n) |
| +#define kvmalloc(...) kvmalloc_node(__VA_ARGS__, NUMA_NO_NODE) |
| #define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) |
| |
| #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) |
| + |
| #define kmem_buckets_valloc(_b, _size, _flags) \ |
| - alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) |
| + alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), 1, _flags, NUMA_NO_NODE)) |
| |
| static inline __alloc_size(1, 2) void * |
| kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) |
| @@ -1062,7 +1068,7 @@ kvmalloc_array_node_noprof(size_t n, siz |
| if (unlikely(check_mul_overflow(n, size, &bytes))) |
| return NULL; |
| |
| - return kvmalloc_node_noprof(bytes, flags, node); |
| + return kvmalloc_node_align_noprof(bytes, 1, flags, node); |
| } |
| |
| #define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE) |
| @@ -1073,9 +1079,12 @@ kvmalloc_array_node_noprof(size_t n, siz |
| #define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) |
| #define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) |
| |
| -void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) |
| - __realloc_size(2); |
| -#define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) |
| +void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align, |
| + gfp_t flags, int nid) __realloc_size(2); |
| +#define kvrealloc_node_align(...) \ |
| + alloc_hooks(kvrealloc_node_align_noprof(__VA_ARGS__)) |
| +#define kvrealloc_node(_p, _s, _f, _n) kvrealloc_node_align(_p, _s, 1, _f, _n) |
| +#define kvrealloc(...) kvrealloc_node(__VA_ARGS__, NUMA_NO_NODE) |
| |
| extern void kvfree(const void *addr); |
| DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) |
| --- a/lib/rhashtable.c~mm-slub-allow-to-set-node-and-align-in-krealloc |
| +++ a/lib/rhashtable.c |
| @@ -184,8 +184,8 @@ static struct bucket_table *bucket_table |
| static struct lock_class_key __key; |
| |
| tbl = alloc_hooks_tag(ht->alloc_tag, |
| - kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets), |
| - gfp|__GFP_ZERO, NUMA_NO_NODE)); |
| + kvmalloc_node_align_noprof(struct_size(tbl, buckets, nbuckets), |
| + 1, gfp|__GFP_ZERO, NUMA_NO_NODE)); |
| |
| size = nbuckets; |
| |
| --- a/mm/slub.c~mm-slub-allow-to-set-node-and-align-in-krealloc |
| +++ a/mm/slub.c |
| @@ -4881,7 +4881,7 @@ void kfree(const void *object) |
| EXPORT_SYMBOL(kfree); |
| |
| static __always_inline __realloc_size(2) void * |
| -__do_krealloc(const void *p, size_t new_size, gfp_t flags) |
| +__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid) |
| { |
| void *ret; |
| size_t ks = 0; |
| @@ -4895,6 +4895,16 @@ __do_krealloc(const void *p, size_t new_ |
| if (!kasan_check_byte(p)) |
| return NULL; |
| |
| + /* |
| + * If reallocation is not necessary (e. g. the new size is less |
| + * than the current allocated size), the current allocation will be |
| + * preserved unless __GFP_THISNODE is set. In the latter case a new |
| + * allocation on the requested node will be attempted. |
| + */ |
| + if (unlikely(flags & __GFP_THISNODE) && nid != NUMA_NO_NODE && |
| + nid != page_to_nid(virt_to_page(p))) |
| + goto alloc_new; |
| + |
| if (is_kfence_address(p)) { |
| ks = orig_size = kfence_ksize(p); |
| } else { |
| @@ -4917,6 +4927,10 @@ __do_krealloc(const void *p, size_t new_ |
| if (new_size > ks) |
| goto alloc_new; |
| |
| + /* If the old object doesn't satisfy the new alignment, allocate a new one */ |
| + if (!IS_ALIGNED((unsigned long)p, align)) |
| + goto alloc_new; |
| + |
| /* Zero out spare memory. */ |
| if (want_init_on_alloc(flags)) { |
| kasan_disable_current(); |
| @@ -4939,7 +4953,7 @@ __do_krealloc(const void *p, size_t new_ |
| return (void *)p; |
| |
| alloc_new: |
| - ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_); |
| + ret = kmalloc_node_track_caller_noprof(new_size, flags, nid, _RET_IP_); |
| if (ret && p) { |
| /* Disable KASAN checks as the object's redzone is accessed. */ |
| kasan_disable_current(); |
| @@ -4951,14 +4965,19 @@ alloc_new: |
| } |
| |
| /** |
| - * krealloc - reallocate memory. The contents will remain unchanged. |
| + * krealloc_node_align - reallocate memory. The contents will remain unchanged. |
| * @p: object to reallocate memory for. |
| * @new_size: how many bytes of memory are required. |
| + * @align: desired alignment. |
| * @flags: the type of memory to allocate. |
| + * @nid: NUMA node or NUMA_NO_NODE |
| * |
| * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size |
| * is 0 and @p is not a %NULL pointer, the object pointed to is freed. |
| * |
| + * Only alignments up to those guaranteed by kmalloc() will be honored. Please see |
| + * Documentation/core-api/memory-allocation.rst for more details. |
| + * |
| * If __GFP_ZERO logic is requested, callers must ensure that, starting with the |
| * initial memory allocation, every subsequent call to this API for the same |
| * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that |
| @@ -4983,7 +5002,8 @@ alloc_new: |
| * |
| * Return: pointer to the allocated memory or %NULL in case of error |
| */ |
| -void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags) |
| +void *krealloc_node_align_noprof(const void *p, size_t new_size, unsigned long align, |
| + gfp_t flags, int nid) |
| { |
| void *ret; |
| |
| @@ -4992,13 +5012,13 @@ void *krealloc_noprof(const void *p, siz |
| return ZERO_SIZE_PTR; |
| } |
| |
| - ret = __do_krealloc(p, new_size, flags); |
| + ret = __do_krealloc(p, new_size, align, flags, nid); |
| if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret)) |
| kfree(p); |
| |
| return ret; |
| } |
| -EXPORT_SYMBOL(krealloc_noprof); |
| +EXPORT_SYMBOL(krealloc_node_align_noprof); |
| |
| static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size) |
| { |
| @@ -5029,9 +5049,13 @@ static gfp_t kmalloc_gfp_adjust(gfp_t fl |
| * failure, fall back to non-contiguous (vmalloc) allocation. |
| * @size: size of the request. |
| * @b: which set of kmalloc buckets to allocate from. |
| + * @align: desired alignment. |
| * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. |
| * @node: numa node to allocate from |
| * |
| + * Only alignments up to those guaranteed by kmalloc() will be honored. Please see |
| + * Documentation/core-api/memory-allocation.rst for more details. |
| + * |
| * Uses kmalloc to get the memory but if the allocation fails then falls back |
| * to the vmalloc allocator. Use kvfree for freeing the memory. |
| * |
| @@ -5041,7 +5065,8 @@ static gfp_t kmalloc_gfp_adjust(gfp_t fl |
| * |
| * Return: pointer to the allocated memory of %NULL in case of failure |
| */ |
| -void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) |
| +void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align, |
| + gfp_t flags, int node) |
| { |
| void *ret; |
| |
| @@ -5071,7 +5096,7 @@ void *__kvmalloc_node_noprof(DECL_BUCKET |
| * about the resulting pointer, and cannot play |
| * protection games. |
| */ |
| - return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END, |
| + return __vmalloc_node_range_noprof(size, align, VMALLOC_START, VMALLOC_END, |
| flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, |
| node, __builtin_return_address(0)); |
| } |
| @@ -5115,14 +5140,19 @@ void kvfree_sensitive(const void *addr, |
| EXPORT_SYMBOL(kvfree_sensitive); |
| |
| /** |
| - * kvrealloc - reallocate memory; contents remain unchanged |
| + * kvrealloc_node_align - reallocate memory; contents remain unchanged |
| * @p: object to reallocate memory for |
| * @size: the size to reallocate |
| + * @align: desired alignment |
| * @flags: the flags for the page level allocator |
| + * @nid: NUMA node id |
| * |
| * If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0 |
| * and @p is not a %NULL pointer, the object pointed to is freed. |
| * |
| + * Only alignments up to those guaranteed by kmalloc() will be honored. Please see |
| + * Documentation/core-api/memory-allocation.rst for more details. |
| + * |
| * If __GFP_ZERO logic is requested, callers must ensure that, starting with the |
| * initial memory allocation, every subsequent call to this API for the same |
| * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that |
| @@ -5136,17 +5166,18 @@ EXPORT_SYMBOL(kvfree_sensitive); |
| * |
| * Return: pointer to the allocated memory or %NULL in case of error |
| */ |
| -void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) |
| +void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align, |
| + gfp_t flags, int nid) |
| { |
| void *n; |
| |
| if (is_vmalloc_addr(p)) |
| - return vrealloc_noprof(p, size, flags); |
| + return vrealloc_node_align_noprof(p, size, align, flags, nid); |
| |
| - n = krealloc_noprof(p, size, kmalloc_gfp_adjust(flags, size)); |
| + n = krealloc_node_align_noprof(p, size, align, kmalloc_gfp_adjust(flags, size), nid); |
| if (!n) { |
| /* We failed to krealloc(), fall back to kvmalloc(). */ |
| - n = kvmalloc_noprof(size, flags); |
| + n = kvmalloc_node_align_noprof(size, align, flags, nid); |
| if (!n) |
| return NULL; |
| |
| @@ -5162,7 +5193,7 @@ void *kvrealloc_noprof(const void *p, si |
| |
| return n; |
| } |
| -EXPORT_SYMBOL(kvrealloc_noprof); |
| +EXPORT_SYMBOL(kvrealloc_node_align_noprof); |
| |
| struct detached_freelist { |
| struct slab *slab; |
| _ |