| From: Qi Zheng <zhengqi.arch@bytedance.com> |
| Subject: mm: vmscan: move shrinker-related code into a separate file |
| Date: Mon, 11 Sep 2023 17:25:15 +0800 |
| |
| The mm/vmscan.c file is too large, so separate the shrinker-related code |
| from it into a separate file. No functional changes. |
| |
| Link: https://lkml.kernel.org/r/20230911092517.64141-3-zhengqi.arch@bytedance.com |
| Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> |
| Reviewed-by: Muchun Song <songmuchun@bytedance.com> |
| Cc: Christian Brauner <brauner@kernel.org> |
| Cc: Christian König <christian.koenig@amd.com> |
| Cc: Chuck Lever <cel@kernel.org> |
| Cc: Daniel Vetter <daniel@ffwll.ch> |
| Cc: Daniel Vetter <daniel.vetter@ffwll.ch> |
| Cc: Darrick J. Wong <djwong@kernel.org> |
| Cc: Dave Chinner <david@fromorbit.com> |
| Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| Cc: Joel Fernandes <joel@joelfernandes.org> |
| Cc: Kirill Tkhai <tkhai@ya.ru> |
| Cc: Paul E. McKenney <paulmck@kernel.org> |
| Cc: Roman Gushchin <roman.gushchin@linux.dev> |
| Cc: Sergey Senozhatsky <senozhatsky@chromium.org> |
| Cc: Steven Price <steven.price@arm.com> |
| Cc: Theodore Ts'o <tytso@mit.edu> |
| Cc: Vlastimil Babka <vbabka@suse.cz> |
| Cc: Abhinav Kumar <quic_abhinavk@quicinc.com> |
| Cc: Alasdair Kergon <agk@redhat.com> |
| Cc: Alexander Viro <viro@zeniv.linux.org.uk> |
| Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> |
| Cc: Andreas Dilger <adilger.kernel@dilger.ca> |
| Cc: Andreas Gruenbacher <agruenba@redhat.com> |
| Cc: Anna Schumaker <anna@kernel.org> |
| Cc: Arnd Bergmann <arnd@arndb.de> |
| Cc: Bob Peterson <rpeterso@redhat.com> |
| Cc: Borislav Petkov <bp@alien8.de> |
| Cc: Carlos Llamas <cmllamas@google.com> |
| Cc: Chandan Babu R <chandan.babu@oracle.com> |
| Cc: Chao Yu <chao@kernel.org> |
| Cc: Chris Mason <clm@fb.com> |
| Cc: Coly Li <colyli@suse.de> |
| Cc: Dai Ngo <Dai.Ngo@oracle.com> |
| Cc: Dave Hansen <dave.hansen@linux.intel.com> |
| Cc: David Airlie <airlied@gmail.com> |
| Cc: David Hildenbrand <david@redhat.com> |
| Cc: David Sterba <dsterba@suse.com> |
| Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> |
| Cc: Gao Xiang <hsiangkao@linux.alibaba.com> |
| Cc: Huang Rui <ray.huang@amd.com> |
| Cc: Ingo Molnar <mingo@redhat.com> |
| Cc: Jaegeuk Kim <jaegeuk@kernel.org> |
| Cc: Jani Nikula <jani.nikula@linux.intel.com> |
| Cc: Jan Kara <jack@suse.cz> |
| Cc: Jason Wang <jasowang@redhat.com> |
| Cc: Jeff Layton <jlayton@kernel.org> |
| Cc: Jeffle Xu <jefflexu@linux.alibaba.com> |
| Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> |
| Cc: Josef Bacik <josef@toxicpanda.com> |
| Cc: Juergen Gross <jgross@suse.com> |
| Cc: Kent Overstreet <kent.overstreet@gmail.com> |
| Cc: Marijn Suijten <marijn.suijten@somainline.org> |
| Cc: "Michael S. Tsirkin" <mst@redhat.com> |
| Cc: Mike Snitzer <snitzer@kernel.org> |
| Cc: Minchan Kim <minchan@kernel.org> |
| Cc: Muchun Song <muchun.song@linux.dev> |
| Cc: Nadav Amit <namit@vmware.com> |
| Cc: Neil Brown <neilb@suse.de> |
| Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> |
| Cc: Olga Kornievskaia <kolga@netapp.com> |
| Cc: Richard Weinberger <richard@nod.at> |
| Cc: Rob Clark <robdclark@gmail.com> |
| Cc: Rob Herring <robh@kernel.org> |
| Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> |
| Cc: Sean Paul <sean@poorly.run> |
| Cc: Song Liu <song@kernel.org> |
| Cc: Stefano Stabellini <sstabellini@kernel.org> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com> |
| Cc: Tom Talpey <tom@talpey.com> |
| Cc: Trond Myklebust <trond.myklebust@hammerspace.com> |
| Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> |
| Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com> |
| Cc: Yue Hu <huyue2@coolpad.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| mm/Makefile | 4 |
| mm/internal.h | 2 |
| mm/shrinker.c | 709 ++++++++++++++++++++++++++++++++++++++++++++++++ |
| mm/vmscan.c | 701 ----------------------------------------------- |
| 4 files changed, 713 insertions(+), 703 deletions(-) |
| |
| --- a/mm/internal.h~mm-vmscan-move-shrinker-related-code-into-a-separate-file |
| +++ a/mm/internal.h |
| @@ -1156,6 +1156,8 @@ struct vma_prepare { |
| }; |
| |
| /* shrinker related functions */ |
| +unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, |
| + int priority); |
| |
| #ifdef CONFIG_SHRINKER_DEBUG |
| extern int shrinker_debugfs_add(struct shrinker *shrinker); |
| --- a/mm/Makefile~mm-vmscan-move-shrinker-related-code-into-a-separate-file |
| +++ a/mm/Makefile |
| @@ -48,8 +48,8 @@ endif |
| |
| obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ |
| maccess.o page-writeback.o folio-compat.o \ |
| - readahead.o swap.o truncate.o vmscan.o shmem.o \ |
| - util.o mmzone.o vmstat.o backing-dev.o \ |
| + readahead.o swap.o truncate.o vmscan.o shrinker.o \ |
| + shmem.o util.o mmzone.o vmstat.o backing-dev.o \ |
| mm_init.o percpu.o slab_common.o \ |
| compaction.o show_mem.o shmem_quota.o\ |
| interval_tree.o list_lru.o workingset.o \ |
| --- /dev/null |
| +++ a/mm/shrinker.c |
| @@ -0,0 +1,709 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +#include <linux/memcontrol.h> |
| +#include <linux/rwsem.h> |
| +#include <linux/shrinker.h> |
| +#include <trace/events/vmscan.h> |
| + |
| +#include "internal.h" |
| + |
| +LIST_HEAD(shrinker_list); |
| +DECLARE_RWSEM(shrinker_rwsem); |
| + |
| +#ifdef CONFIG_MEMCG |
| +static int shrinker_nr_max; |
| + |
| +/* The shrinker_info is expanded in a batch of BITS_PER_LONG */ |
| +static inline int shrinker_map_size(int nr_items) |
| +{ |
| + return (DIV_ROUND_UP(nr_items, BITS_PER_LONG) * sizeof(unsigned long)); |
| +} |
| + |
| +static inline int shrinker_defer_size(int nr_items) |
| +{ |
| + return (round_up(nr_items, BITS_PER_LONG) * sizeof(atomic_long_t)); |
| +} |
| + |
| +void free_shrinker_info(struct mem_cgroup *memcg) |
| +{ |
| + struct mem_cgroup_per_node *pn; |
| + struct shrinker_info *info; |
| + int nid; |
| + |
| + for_each_node(nid) { |
| + pn = memcg->nodeinfo[nid]; |
| + info = rcu_dereference_protected(pn->shrinker_info, true); |
| + kvfree(info); |
| + rcu_assign_pointer(pn->shrinker_info, NULL); |
| + } |
| +} |
| + |
| +int alloc_shrinker_info(struct mem_cgroup *memcg) |
| +{ |
| + struct shrinker_info *info; |
| + int nid, size, ret = 0; |
| + int map_size, defer_size = 0; |
| + |
| + down_write(&shrinker_rwsem); |
| + map_size = shrinker_map_size(shrinker_nr_max); |
| + defer_size = shrinker_defer_size(shrinker_nr_max); |
| + size = map_size + defer_size; |
| + for_each_node(nid) { |
| + info = kvzalloc_node(sizeof(*info) + size, GFP_KERNEL, nid); |
| + if (!info) { |
| + free_shrinker_info(memcg); |
| + ret = -ENOMEM; |
| + break; |
| + } |
| + info->nr_deferred = (atomic_long_t *)(info + 1); |
| + info->map = (void *)info->nr_deferred + defer_size; |
| + info->map_nr_max = shrinker_nr_max; |
| + rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); |
| + } |
| + up_write(&shrinker_rwsem); |
| + |
| + return ret; |
| +} |
| + |
| +static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, |
| + int nid) |
| +{ |
| + return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info, |
| + lockdep_is_held(&shrinker_rwsem)); |
| +} |
| + |
| +static int expand_one_shrinker_info(struct mem_cgroup *memcg, |
| + int map_size, int defer_size, |
| + int old_map_size, int old_defer_size, |
| + int new_nr_max) |
| +{ |
| + struct shrinker_info *new, *old; |
| + struct mem_cgroup_per_node *pn; |
| + int nid; |
| + int size = map_size + defer_size; |
| + |
| + for_each_node(nid) { |
| + pn = memcg->nodeinfo[nid]; |
| + old = shrinker_info_protected(memcg, nid); |
| + /* Not yet online memcg */ |
| + if (!old) |
| + return 0; |
| + |
| + /* Already expanded this shrinker_info */ |
| + if (new_nr_max <= old->map_nr_max) |
| + continue; |
| + |
| + new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); |
| + if (!new) |
| + return -ENOMEM; |
| + |
| + new->nr_deferred = (atomic_long_t *)(new + 1); |
| + new->map = (void *)new->nr_deferred + defer_size; |
| + new->map_nr_max = new_nr_max; |
| + |
| + /* map: set all old bits, clear all new bits */ |
| + memset(new->map, (int)0xff, old_map_size); |
| + memset((void *)new->map + old_map_size, 0, map_size - old_map_size); |
| + /* nr_deferred: copy old values, clear all new values */ |
| + memcpy(new->nr_deferred, old->nr_deferred, old_defer_size); |
| + memset((void *)new->nr_deferred + old_defer_size, 0, |
| + defer_size - old_defer_size); |
| + |
| + rcu_assign_pointer(pn->shrinker_info, new); |
| + kvfree_rcu(old, rcu); |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static int expand_shrinker_info(int new_id) |
| +{ |
| + int ret = 0; |
| + int new_nr_max = round_up(new_id + 1, BITS_PER_LONG); |
| + int map_size, defer_size = 0; |
| + int old_map_size, old_defer_size = 0; |
| + struct mem_cgroup *memcg; |
| + |
| + if (!root_mem_cgroup) |
| + goto out; |
| + |
| + lockdep_assert_held(&shrinker_rwsem); |
| + |
| + map_size = shrinker_map_size(new_nr_max); |
| + defer_size = shrinker_defer_size(new_nr_max); |
| + old_map_size = shrinker_map_size(shrinker_nr_max); |
| + old_defer_size = shrinker_defer_size(shrinker_nr_max); |
| + |
| + memcg = mem_cgroup_iter(NULL, NULL, NULL); |
| + do { |
| + ret = expand_one_shrinker_info(memcg, map_size, defer_size, |
| + old_map_size, old_defer_size, |
| + new_nr_max); |
| + if (ret) { |
| + mem_cgroup_iter_break(NULL, memcg); |
| + goto out; |
| + } |
| + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); |
| +out: |
| + if (!ret) |
| + shrinker_nr_max = new_nr_max; |
| + |
| + return ret; |
| +} |
| + |
| +void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) |
| +{ |
| + if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) { |
| + struct shrinker_info *info; |
| + |
| + rcu_read_lock(); |
| + info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); |
| + if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { |
| + /* Pairs with smp mb in shrink_slab() */ |
| + smp_mb__before_atomic(); |
| + set_bit(shrinker_id, info->map); |
| + } |
| + rcu_read_unlock(); |
| + } |
| +} |
| + |
| +static DEFINE_IDR(shrinker_idr); |
| + |
| +static int prealloc_memcg_shrinker(struct shrinker *shrinker) |
| +{ |
| + int id, ret = -ENOMEM; |
| + |
| + if (mem_cgroup_disabled()) |
| + return -ENOSYS; |
| + |
| + down_write(&shrinker_rwsem); |
| + /* This may call shrinker, so it must use down_read_trylock() */ |
| + id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); |
| + if (id < 0) |
| + goto unlock; |
| + |
| + if (id >= shrinker_nr_max) { |
| + if (expand_shrinker_info(id)) { |
| + idr_remove(&shrinker_idr, id); |
| + goto unlock; |
| + } |
| + } |
| + shrinker->id = id; |
| + ret = 0; |
| +unlock: |
| + up_write(&shrinker_rwsem); |
| + return ret; |
| +} |
| + |
| +static void unregister_memcg_shrinker(struct shrinker *shrinker) |
| +{ |
| + int id = shrinker->id; |
| + |
| + BUG_ON(id < 0); |
| + |
| + lockdep_assert_held(&shrinker_rwsem); |
| + |
| + idr_remove(&shrinker_idr, id); |
| +} |
| + |
| +static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, |
| + struct mem_cgroup *memcg) |
| +{ |
| + struct shrinker_info *info; |
| + |
| + info = shrinker_info_protected(memcg, nid); |
| + return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0); |
| +} |
| + |
| +static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, |
| + struct mem_cgroup *memcg) |
| +{ |
| + struct shrinker_info *info; |
| + |
| + info = shrinker_info_protected(memcg, nid); |
| + return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]); |
| +} |
| + |
| +void reparent_shrinker_deferred(struct mem_cgroup *memcg) |
| +{ |
| + int i, nid; |
| + long nr; |
| + struct mem_cgroup *parent; |
| + struct shrinker_info *child_info, *parent_info; |
| + |
| + parent = parent_mem_cgroup(memcg); |
| + if (!parent) |
| + parent = root_mem_cgroup; |
| + |
| + /* Prevent from concurrent shrinker_info expand */ |
| + down_read(&shrinker_rwsem); |
| + for_each_node(nid) { |
| + child_info = shrinker_info_protected(memcg, nid); |
| + parent_info = shrinker_info_protected(parent, nid); |
| + for (i = 0; i < child_info->map_nr_max; i++) { |
| + nr = atomic_long_read(&child_info->nr_deferred[i]); |
| + atomic_long_add(nr, &parent_info->nr_deferred[i]); |
| + } |
| + } |
| + up_read(&shrinker_rwsem); |
| +} |
| +#else |
| +static int prealloc_memcg_shrinker(struct shrinker *shrinker) |
| +{ |
| + return -ENOSYS; |
| +} |
| + |
| +static void unregister_memcg_shrinker(struct shrinker *shrinker) |
| +{ |
| +} |
| + |
| +static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, |
| + struct mem_cgroup *memcg) |
| +{ |
| + return 0; |
| +} |
| + |
| +static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, |
| + struct mem_cgroup *memcg) |
| +{ |
| + return 0; |
| +} |
| +#endif /* CONFIG_MEMCG */ |
| + |
| +static long xchg_nr_deferred(struct shrinker *shrinker, |
| + struct shrink_control *sc) |
| +{ |
| + int nid = sc->nid; |
| + |
| + if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) |
| + nid = 0; |
| + |
| + if (sc->memcg && |
| + (shrinker->flags & SHRINKER_MEMCG_AWARE)) |
| + return xchg_nr_deferred_memcg(nid, shrinker, |
| + sc->memcg); |
| + |
| + return atomic_long_xchg(&shrinker->nr_deferred[nid], 0); |
| +} |
| + |
| + |
| +static long add_nr_deferred(long nr, struct shrinker *shrinker, |
| + struct shrink_control *sc) |
| +{ |
| + int nid = sc->nid; |
| + |
| + if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) |
| + nid = 0; |
| + |
| + if (sc->memcg && |
| + (shrinker->flags & SHRINKER_MEMCG_AWARE)) |
| + return add_nr_deferred_memcg(nr, nid, shrinker, |
| + sc->memcg); |
| + |
| + return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]); |
| +} |
| + |
| +#define SHRINK_BATCH 128 |
| + |
| +static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, |
| + struct shrinker *shrinker, int priority) |
| +{ |
| + unsigned long freed = 0; |
| + unsigned long long delta; |
| + long total_scan; |
| + long freeable; |
| + long nr; |
| + long new_nr; |
| + long batch_size = shrinker->batch ? shrinker->batch |
| + : SHRINK_BATCH; |
| + long scanned = 0, next_deferred; |
| + |
| + freeable = shrinker->count_objects(shrinker, shrinkctl); |
| + if (freeable == 0 || freeable == SHRINK_EMPTY) |
| + return freeable; |
| + |
| + /* |
| + * copy the current shrinker scan count into a local variable |
| + * and zero it so that other concurrent shrinker invocations |
| + * don't also do this scanning work. |
| + */ |
| + nr = xchg_nr_deferred(shrinker, shrinkctl); |
| + |
| + if (shrinker->seeks) { |
| + delta = freeable >> priority; |
| + delta *= 4; |
| + do_div(delta, shrinker->seeks); |
| + } else { |
| + /* |
| + * These objects don't require any IO to create. Trim |
| + * them aggressively under memory pressure to keep |
| + * them from causing refetches in the IO caches. |
| + */ |
| + delta = freeable / 2; |
| + } |
| + |
| + total_scan = nr >> priority; |
| + total_scan += delta; |
| + total_scan = min(total_scan, (2 * freeable)); |
| + |
| + trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, |
| + freeable, delta, total_scan, priority); |
| + |
| + /* |
| + * Normally, we should not scan less than batch_size objects in one |
| + * pass to avoid too frequent shrinker calls, but if the slab has less |
| + * than batch_size objects in total and we are really tight on memory, |
| + * we will try to reclaim all available objects, otherwise we can end |
| + * up failing allocations although there are plenty of reclaimable |
| + * objects spread over several slabs with usage less than the |
| + * batch_size. |
| + * |
| + * We detect the "tight on memory" situations by looking at the total |
| + * number of objects we want to scan (total_scan). If it is greater |
| + * than the total number of objects on slab (freeable), we must be |
| + * scanning at high prio and therefore should try to reclaim as much as |
| + * possible. |
| + */ |
| + while (total_scan >= batch_size || |
| + total_scan >= freeable) { |
| + unsigned long ret; |
| + unsigned long nr_to_scan = min(batch_size, total_scan); |
| + |
| + shrinkctl->nr_to_scan = nr_to_scan; |
| + shrinkctl->nr_scanned = nr_to_scan; |
| + ret = shrinker->scan_objects(shrinker, shrinkctl); |
| + if (ret == SHRINK_STOP) |
| + break; |
| + freed += ret; |
| + |
| + count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); |
| + total_scan -= shrinkctl->nr_scanned; |
| + scanned += shrinkctl->nr_scanned; |
| + |
| + cond_resched(); |
| + } |
| + |
| + /* |
| + * The deferred work is increased by any new work (delta) that wasn't |
| + * done, decreased by old deferred work that was done now. |
| + * |
| + * And it is capped to two times of the freeable items. |
| + */ |
| + next_deferred = max_t(long, (nr + delta - scanned), 0); |
| + next_deferred = min(next_deferred, (2 * freeable)); |
| + |
| + /* |
| + * move the unused scan count back into the shrinker in a |
| + * manner that handles concurrent updates. |
| + */ |
| + new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl); |
| + |
| + trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan); |
| + return freed; |
| +} |
| + |
| +#ifdef CONFIG_MEMCG |
| +static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, |
| + struct mem_cgroup *memcg, int priority) |
| +{ |
| + struct shrinker_info *info; |
| + unsigned long ret, freed = 0; |
| + int i; |
| + |
| + if (!mem_cgroup_online(memcg)) |
| + return 0; |
| + |
| + if (!down_read_trylock(&shrinker_rwsem)) |
| + return 0; |
| + |
| + info = shrinker_info_protected(memcg, nid); |
| + if (unlikely(!info)) |
| + goto unlock; |
| + |
| + for_each_set_bit(i, info->map, info->map_nr_max) { |
| + struct shrink_control sc = { |
| + .gfp_mask = gfp_mask, |
| + .nid = nid, |
| + .memcg = memcg, |
| + }; |
| + struct shrinker *shrinker; |
| + |
| + shrinker = idr_find(&shrinker_idr, i); |
| + if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { |
| + if (!shrinker) |
| + clear_bit(i, info->map); |
| + continue; |
| + } |
| + |
| + /* Call non-slab shrinkers even though kmem is disabled */ |
| + if (!memcg_kmem_online() && |
| + !(shrinker->flags & SHRINKER_NONSLAB)) |
| + continue; |
| + |
| + ret = do_shrink_slab(&sc, shrinker, priority); |
| + if (ret == SHRINK_EMPTY) { |
| + clear_bit(i, info->map); |
| + /* |
| + * After the shrinker reported that it had no objects to |
| + * free, but before we cleared the corresponding bit in |
| + * the memcg shrinker map, a new object might have been |
| + * added. To make sure, we have the bit set in this |
| + * case, we invoke the shrinker one more time and reset |
| + * the bit if it reports that it is not empty anymore. |
| + * The memory barrier here pairs with the barrier in |
| + * set_shrinker_bit(): |
| + * |
| + * list_lru_add() shrink_slab_memcg() |
| + * list_add_tail() clear_bit() |
| + * <MB> <MB> |
| + * set_bit() do_shrink_slab() |
| + */ |
| + smp_mb__after_atomic(); |
| + ret = do_shrink_slab(&sc, shrinker, priority); |
| + if (ret == SHRINK_EMPTY) |
| + ret = 0; |
| + else |
| + set_shrinker_bit(memcg, nid, i); |
| + } |
| + freed += ret; |
| + |
| + if (rwsem_is_contended(&shrinker_rwsem)) { |
| + freed = freed ? : 1; |
| + break; |
| + } |
| + } |
| +unlock: |
| + up_read(&shrinker_rwsem); |
| + return freed; |
| +} |
| +#else /* !CONFIG_MEMCG */ |
| +static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, |
| + struct mem_cgroup *memcg, int priority) |
| +{ |
| + return 0; |
| +} |
| +#endif /* CONFIG_MEMCG */ |
| + |
| +/** |
| + * shrink_slab - shrink slab caches |
| + * @gfp_mask: allocation context |
| + * @nid: node whose slab caches to target |
| + * @memcg: memory cgroup whose slab caches to target |
| + * @priority: the reclaim priority |
| + * |
| + * Call the shrink functions to age shrinkable caches. |
| + * |
| + * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, |
| + * unaware shrinkers will receive a node id of 0 instead. |
| + * |
| + * @memcg specifies the memory cgroup to target. Unaware shrinkers |
| + * are called only if it is the root cgroup. |
| + * |
| + * @priority is sc->priority, we take the number of objects and >> by priority |
| + * in order to get the scan target. |
| + * |
| + * Returns the number of reclaimed slab objects. |
| + */ |
| +unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, |
| + int priority) |
| +{ |
| + unsigned long ret, freed = 0; |
| + struct shrinker *shrinker; |
| + |
| + /* |
| + * The root memcg might be allocated even though memcg is disabled |
| + * via "cgroup_disable=memory" boot parameter. This could make |
| + * mem_cgroup_is_root() return false, then just run memcg slab |
| + * shrink, but skip global shrink. This may result in premature |
| + * oom. |
| + */ |
| + if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) |
| + return shrink_slab_memcg(gfp_mask, nid, memcg, priority); |
| + |
| + if (!down_read_trylock(&shrinker_rwsem)) |
| + goto out; |
| + |
| + list_for_each_entry(shrinker, &shrinker_list, list) { |
| + struct shrink_control sc = { |
| + .gfp_mask = gfp_mask, |
| + .nid = nid, |
| + .memcg = memcg, |
| + }; |
| + |
| + ret = do_shrink_slab(&sc, shrinker, priority); |
| + if (ret == SHRINK_EMPTY) |
| + ret = 0; |
| + freed += ret; |
| + /* |
| + * Bail out if someone want to register a new shrinker to |
| + * prevent the registration from being stalled for long periods |
| + * by parallel ongoing shrinking. |
| + */ |
| + if (rwsem_is_contended(&shrinker_rwsem)) { |
| + freed = freed ? : 1; |
| + break; |
| + } |
| + } |
| + |
| + up_read(&shrinker_rwsem); |
| +out: |
| + cond_resched(); |
| + return freed; |
| +} |
| + |
| +/* |
| + * Add a shrinker callback to be called from the vm. |
| + */ |
| +static int __prealloc_shrinker(struct shrinker *shrinker) |
| +{ |
| + unsigned int size; |
| + int err; |
| + |
| + if (shrinker->flags & SHRINKER_MEMCG_AWARE) { |
| + err = prealloc_memcg_shrinker(shrinker); |
| + if (err != -ENOSYS) |
| + return err; |
| + |
| + shrinker->flags &= ~SHRINKER_MEMCG_AWARE; |
| + } |
| + |
| + size = sizeof(*shrinker->nr_deferred); |
| + if (shrinker->flags & SHRINKER_NUMA_AWARE) |
| + size *= nr_node_ids; |
| + |
| + shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); |
| + if (!shrinker->nr_deferred) |
| + return -ENOMEM; |
| + |
| + return 0; |
| +} |
| + |
| +#ifdef CONFIG_SHRINKER_DEBUG |
| +int prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...) |
| +{ |
| + va_list ap; |
| + int err; |
| + |
| + va_start(ap, fmt); |
| + shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); |
| + va_end(ap); |
| + if (!shrinker->name) |
| + return -ENOMEM; |
| + |
| + err = __prealloc_shrinker(shrinker); |
| + if (err) { |
| + kfree_const(shrinker->name); |
| + shrinker->name = NULL; |
| + } |
| + |
| + return err; |
| +} |
| +#else |
| +int prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...) |
| +{ |
| + return __prealloc_shrinker(shrinker); |
| +} |
| +#endif |
| + |
| +void free_prealloced_shrinker(struct shrinker *shrinker) |
| +{ |
| +#ifdef CONFIG_SHRINKER_DEBUG |
| + kfree_const(shrinker->name); |
| + shrinker->name = NULL; |
| +#endif |
| + if (shrinker->flags & SHRINKER_MEMCG_AWARE) { |
| + down_write(&shrinker_rwsem); |
| + unregister_memcg_shrinker(shrinker); |
| + up_write(&shrinker_rwsem); |
| + return; |
| + } |
| + |
| + kfree(shrinker->nr_deferred); |
| + shrinker->nr_deferred = NULL; |
| +} |
| + |
| +void register_shrinker_prepared(struct shrinker *shrinker) |
| +{ |
| + down_write(&shrinker_rwsem); |
| + list_add_tail(&shrinker->list, &shrinker_list); |
| + shrinker->flags |= SHRINKER_REGISTERED; |
| + shrinker_debugfs_add(shrinker); |
| + up_write(&shrinker_rwsem); |
| +} |
| + |
| +static int __register_shrinker(struct shrinker *shrinker) |
| +{ |
| + int err = __prealloc_shrinker(shrinker); |
| + |
| + if (err) |
| + return err; |
| + register_shrinker_prepared(shrinker); |
| + return 0; |
| +} |
| + |
| +#ifdef CONFIG_SHRINKER_DEBUG |
| +int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) |
| +{ |
| + va_list ap; |
| + int err; |
| + |
| + va_start(ap, fmt); |
| + shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); |
| + va_end(ap); |
| + if (!shrinker->name) |
| + return -ENOMEM; |
| + |
| + err = __register_shrinker(shrinker); |
| + if (err) { |
| + kfree_const(shrinker->name); |
| + shrinker->name = NULL; |
| + } |
| + return err; |
| +} |
| +#else |
| +int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) |
| +{ |
| + return __register_shrinker(shrinker); |
| +} |
| +#endif |
| +EXPORT_SYMBOL(register_shrinker); |
| + |
| +/* |
| + * Remove one |
| + */ |
| +void unregister_shrinker(struct shrinker *shrinker) |
| +{ |
| + struct dentry *debugfs_entry; |
| + int debugfs_id; |
| + |
| + if (!(shrinker->flags & SHRINKER_REGISTERED)) |
| + return; |
| + |
| + down_write(&shrinker_rwsem); |
| + list_del(&shrinker->list); |
| + shrinker->flags &= ~SHRINKER_REGISTERED; |
| + if (shrinker->flags & SHRINKER_MEMCG_AWARE) |
| + unregister_memcg_shrinker(shrinker); |
| + debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); |
| + up_write(&shrinker_rwsem); |
| + |
| + shrinker_debugfs_remove(debugfs_entry, debugfs_id); |
| + |
| + kfree(shrinker->nr_deferred); |
| + shrinker->nr_deferred = NULL; |
| +} |
| +EXPORT_SYMBOL(unregister_shrinker); |
| + |
| +/** |
| + * synchronize_shrinkers - Wait for all running shrinkers to complete. |
| + * |
| + * This is equivalent to calling unregister_shrink() and register_shrinker(), |
| + * but atomically and with less overhead. This is useful to guarantee that all |
| + * shrinker invocations have seen an update, before freeing memory, similar to |
| + * rcu. |
| + */ |
| +void synchronize_shrinkers(void) |
| +{ |
| + down_write(&shrinker_rwsem); |
| + up_write(&shrinker_rwsem); |
| +} |
| +EXPORT_SYMBOL(synchronize_shrinkers); |
| --- a/mm/vmscan.c~mm-vmscan-move-shrinker-related-code-into-a-separate-file |
| +++ a/mm/vmscan.c |
| @@ -35,7 +35,6 @@ |
| #include <linux/cpuset.h> |
| #include <linux/compaction.h> |
| #include <linux/notifier.h> |
| -#include <linux/rwsem.h> |
| #include <linux/delay.h> |
| #include <linux/kthread.h> |
| #include <linux/freezer.h> |
| @@ -188,246 +187,7 @@ struct scan_control { |
| */ |
| int vm_swappiness = 60; |
| |
| -LIST_HEAD(shrinker_list); |
| -DECLARE_RWSEM(shrinker_rwsem); |
| - |
| #ifdef CONFIG_MEMCG |
| -static int shrinker_nr_max; |
| - |
| -/* The shrinker_info is expanded in a batch of BITS_PER_LONG */ |
| -static inline int shrinker_map_size(int nr_items) |
| -{ |
| - return (DIV_ROUND_UP(nr_items, BITS_PER_LONG) * sizeof(unsigned long)); |
| -} |
| - |
| -static inline int shrinker_defer_size(int nr_items) |
| -{ |
| - return (round_up(nr_items, BITS_PER_LONG) * sizeof(atomic_long_t)); |
| -} |
| - |
| -static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, |
| - int nid) |
| -{ |
| - return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info, |
| - lockdep_is_held(&shrinker_rwsem)); |
| -} |
| - |
| -static int expand_one_shrinker_info(struct mem_cgroup *memcg, |
| - int map_size, int defer_size, |
| - int old_map_size, int old_defer_size, |
| - int new_nr_max) |
| -{ |
| - struct shrinker_info *new, *old; |
| - struct mem_cgroup_per_node *pn; |
| - int nid; |
| - int size = map_size + defer_size; |
| - |
| - for_each_node(nid) { |
| - pn = memcg->nodeinfo[nid]; |
| - old = shrinker_info_protected(memcg, nid); |
| - /* Not yet online memcg */ |
| - if (!old) |
| - return 0; |
| - |
| - /* Already expanded this shrinker_info */ |
| - if (new_nr_max <= old->map_nr_max) |
| - continue; |
| - |
| - new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); |
| - if (!new) |
| - return -ENOMEM; |
| - |
| - new->nr_deferred = (atomic_long_t *)(new + 1); |
| - new->map = (void *)new->nr_deferred + defer_size; |
| - new->map_nr_max = new_nr_max; |
| - |
| - /* map: set all old bits, clear all new bits */ |
| - memset(new->map, (int)0xff, old_map_size); |
| - memset((void *)new->map + old_map_size, 0, map_size - old_map_size); |
| - /* nr_deferred: copy old values, clear all new values */ |
| - memcpy(new->nr_deferred, old->nr_deferred, old_defer_size); |
| - memset((void *)new->nr_deferred + old_defer_size, 0, |
| - defer_size - old_defer_size); |
| - |
| - rcu_assign_pointer(pn->shrinker_info, new); |
| - kvfree_rcu(old, rcu); |
| - } |
| - |
| - return 0; |
| -} |
| - |
| -void free_shrinker_info(struct mem_cgroup *memcg) |
| -{ |
| - struct mem_cgroup_per_node *pn; |
| - struct shrinker_info *info; |
| - int nid; |
| - |
| - for_each_node(nid) { |
| - pn = memcg->nodeinfo[nid]; |
| - info = rcu_dereference_protected(pn->shrinker_info, true); |
| - kvfree(info); |
| - rcu_assign_pointer(pn->shrinker_info, NULL); |
| - } |
| -} |
| - |
| -int alloc_shrinker_info(struct mem_cgroup *memcg) |
| -{ |
| - struct shrinker_info *info; |
| - int nid, size, ret = 0; |
| - int map_size, defer_size = 0; |
| - |
| - down_write(&shrinker_rwsem); |
| - map_size = shrinker_map_size(shrinker_nr_max); |
| - defer_size = shrinker_defer_size(shrinker_nr_max); |
| - size = map_size + defer_size; |
| - for_each_node(nid) { |
| - info = kvzalloc_node(sizeof(*info) + size, GFP_KERNEL, nid); |
| - if (!info) { |
| - free_shrinker_info(memcg); |
| - ret = -ENOMEM; |
| - break; |
| - } |
| - info->nr_deferred = (atomic_long_t *)(info + 1); |
| - info->map = (void *)info->nr_deferred + defer_size; |
| - info->map_nr_max = shrinker_nr_max; |
| - rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); |
| - } |
| - up_write(&shrinker_rwsem); |
| - |
| - return ret; |
| -} |
| - |
| -static int expand_shrinker_info(int new_id) |
| -{ |
| - int ret = 0; |
| - int new_nr_max = round_up(new_id + 1, BITS_PER_LONG); |
| - int map_size, defer_size = 0; |
| - int old_map_size, old_defer_size = 0; |
| - struct mem_cgroup *memcg; |
| - |
| - if (!root_mem_cgroup) |
| - goto out; |
| - |
| - lockdep_assert_held(&shrinker_rwsem); |
| - |
| - map_size = shrinker_map_size(new_nr_max); |
| - defer_size = shrinker_defer_size(new_nr_max); |
| - old_map_size = shrinker_map_size(shrinker_nr_max); |
| - old_defer_size = shrinker_defer_size(shrinker_nr_max); |
| - |
| - memcg = mem_cgroup_iter(NULL, NULL, NULL); |
| - do { |
| - ret = expand_one_shrinker_info(memcg, map_size, defer_size, |
| - old_map_size, old_defer_size, |
| - new_nr_max); |
| - if (ret) { |
| - mem_cgroup_iter_break(NULL, memcg); |
| - goto out; |
| - } |
| - } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); |
| -out: |
| - if (!ret) |
| - shrinker_nr_max = new_nr_max; |
| - |
| - return ret; |
| -} |
| - |
| -void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) |
| -{ |
| - if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) { |
| - struct shrinker_info *info; |
| - |
| - rcu_read_lock(); |
| - info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); |
| - if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { |
| - /* Pairs with smp mb in shrink_slab() */ |
| - smp_mb__before_atomic(); |
| - set_bit(shrinker_id, info->map); |
| - } |
| - rcu_read_unlock(); |
| - } |
| -} |
| - |
| -static DEFINE_IDR(shrinker_idr); |
| - |
| -static int prealloc_memcg_shrinker(struct shrinker *shrinker) |
| -{ |
| - int id, ret = -ENOMEM; |
| - |
| - if (mem_cgroup_disabled()) |
| - return -ENOSYS; |
| - |
| - down_write(&shrinker_rwsem); |
| - /* This may call shrinker, so it must use down_read_trylock() */ |
| - id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); |
| - if (id < 0) |
| - goto unlock; |
| - |
| - if (id >= shrinker_nr_max) { |
| - if (expand_shrinker_info(id)) { |
| - idr_remove(&shrinker_idr, id); |
| - goto unlock; |
| - } |
| - } |
| - shrinker->id = id; |
| - ret = 0; |
| -unlock: |
| - up_write(&shrinker_rwsem); |
| - return ret; |
| -} |
| - |
| -static void unregister_memcg_shrinker(struct shrinker *shrinker) |
| -{ |
| - int id = shrinker->id; |
| - |
| - BUG_ON(id < 0); |
| - |
| - lockdep_assert_held(&shrinker_rwsem); |
| - |
| - idr_remove(&shrinker_idr, id); |
| -} |
| - |
| -static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, |
| - struct mem_cgroup *memcg) |
| -{ |
| - struct shrinker_info *info; |
| - |
| - info = shrinker_info_protected(memcg, nid); |
| - return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0); |
| -} |
| - |
| -static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, |
| - struct mem_cgroup *memcg) |
| -{ |
| - struct shrinker_info *info; |
| - |
| - info = shrinker_info_protected(memcg, nid); |
| - return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]); |
| -} |
| - |
| -void reparent_shrinker_deferred(struct mem_cgroup *memcg) |
| -{ |
| - int i, nid; |
| - long nr; |
| - struct mem_cgroup *parent; |
| - struct shrinker_info *child_info, *parent_info; |
| - |
| - parent = parent_mem_cgroup(memcg); |
| - if (!parent) |
| - parent = root_mem_cgroup; |
| - |
| - /* Prevent from concurrent shrinker_info expand */ |
| - down_read(&shrinker_rwsem); |
| - for_each_node(nid) { |
| - child_info = shrinker_info_protected(memcg, nid); |
| - parent_info = shrinker_info_protected(parent, nid); |
| - for (i = 0; i < child_info->map_nr_max; i++) { |
| - nr = atomic_long_read(&child_info->nr_deferred[i]); |
| - atomic_long_add(nr, &parent_info->nr_deferred[i]); |
| - } |
| - } |
| - up_read(&shrinker_rwsem); |
| -} |
| |
| /* Returns true for reclaim through cgroup limits or cgroup interfaces. */ |
| static bool cgroup_reclaim(struct scan_control *sc) |
| @@ -468,27 +228,6 @@ static bool writeback_throttling_sane(st |
| return false; |
| } |
| #else |
| -static int prealloc_memcg_shrinker(struct shrinker *shrinker) |
| -{ |
| - return -ENOSYS; |
| -} |
| - |
| -static void unregister_memcg_shrinker(struct shrinker *shrinker) |
| -{ |
| -} |
| - |
| -static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, |
| - struct mem_cgroup *memcg) |
| -{ |
| - return 0; |
| -} |
| - |
| -static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, |
| - struct mem_cgroup *memcg) |
| -{ |
| - return 0; |
| -} |
| - |
| static bool cgroup_reclaim(struct scan_control *sc) |
| { |
| return false; |
| @@ -557,39 +296,6 @@ static void flush_reclaim_state(struct s |
| } |
| } |
| |
| -static long xchg_nr_deferred(struct shrinker *shrinker, |
| - struct shrink_control *sc) |
| -{ |
| - int nid = sc->nid; |
| - |
| - if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) |
| - nid = 0; |
| - |
| - if (sc->memcg && |
| - (shrinker->flags & SHRINKER_MEMCG_AWARE)) |
| - return xchg_nr_deferred_memcg(nid, shrinker, |
| - sc->memcg); |
| - |
| - return atomic_long_xchg(&shrinker->nr_deferred[nid], 0); |
| -} |
| - |
| - |
| -static long add_nr_deferred(long nr, struct shrinker *shrinker, |
| - struct shrink_control *sc) |
| -{ |
| - int nid = sc->nid; |
| - |
| - if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) |
| - nid = 0; |
| - |
| - if (sc->memcg && |
| - (shrinker->flags & SHRINKER_MEMCG_AWARE)) |
| - return add_nr_deferred_memcg(nr, nid, shrinker, |
| - sc->memcg); |
| - |
| - return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]); |
| -} |
| - |
| static bool can_demote(int nid, struct scan_control *sc) |
| { |
| if (!numa_demotion_enabled) |
| @@ -671,413 +377,6 @@ static unsigned long lruvec_lru_size(str |
| return size; |
| } |
| |
| -/* |
| - * Add a shrinker callback to be called from the vm. |
| - */ |
| -static int __prealloc_shrinker(struct shrinker *shrinker) |
| -{ |
| - unsigned int size; |
| - int err; |
| - |
| - if (shrinker->flags & SHRINKER_MEMCG_AWARE) { |
| - err = prealloc_memcg_shrinker(shrinker); |
| - if (err != -ENOSYS) |
| - return err; |
| - |
| - shrinker->flags &= ~SHRINKER_MEMCG_AWARE; |
| - } |
| - |
| - size = sizeof(*shrinker->nr_deferred); |
| - if (shrinker->flags & SHRINKER_NUMA_AWARE) |
| - size *= nr_node_ids; |
| - |
| - shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); |
| - if (!shrinker->nr_deferred) |
| - return -ENOMEM; |
| - |
| - return 0; |
| -} |
| - |
| -#ifdef CONFIG_SHRINKER_DEBUG |
| -int prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...) |
| -{ |
| - va_list ap; |
| - int err; |
| - |
| - va_start(ap, fmt); |
| - shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); |
| - va_end(ap); |
| - if (!shrinker->name) |
| - return -ENOMEM; |
| - |
| - err = __prealloc_shrinker(shrinker); |
| - if (err) { |
| - kfree_const(shrinker->name); |
| - shrinker->name = NULL; |
| - } |
| - |
| - return err; |
| -} |
| -#else |
| -int prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...) |
| -{ |
| - return __prealloc_shrinker(shrinker); |
| -} |
| -#endif |
| - |
| -void free_prealloced_shrinker(struct shrinker *shrinker) |
| -{ |
| -#ifdef CONFIG_SHRINKER_DEBUG |
| - kfree_const(shrinker->name); |
| - shrinker->name = NULL; |
| -#endif |
| - if (shrinker->flags & SHRINKER_MEMCG_AWARE) { |
| - down_write(&shrinker_rwsem); |
| - unregister_memcg_shrinker(shrinker); |
| - up_write(&shrinker_rwsem); |
| - return; |
| - } |
| - |
| - kfree(shrinker->nr_deferred); |
| - shrinker->nr_deferred = NULL; |
| -} |
| - |
| -void register_shrinker_prepared(struct shrinker *shrinker) |
| -{ |
| - down_write(&shrinker_rwsem); |
| - list_add_tail(&shrinker->list, &shrinker_list); |
| - shrinker->flags |= SHRINKER_REGISTERED; |
| - shrinker_debugfs_add(shrinker); |
| - up_write(&shrinker_rwsem); |
| -} |
| - |
| -static int __register_shrinker(struct shrinker *shrinker) |
| -{ |
| - int err = __prealloc_shrinker(shrinker); |
| - |
| - if (err) |
| - return err; |
| - register_shrinker_prepared(shrinker); |
| - return 0; |
| -} |
| - |
| -#ifdef CONFIG_SHRINKER_DEBUG |
| -int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) |
| -{ |
| - va_list ap; |
| - int err; |
| - |
| - va_start(ap, fmt); |
| - shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); |
| - va_end(ap); |
| - if (!shrinker->name) |
| - return -ENOMEM; |
| - |
| - err = __register_shrinker(shrinker); |
| - if (err) { |
| - kfree_const(shrinker->name); |
| - shrinker->name = NULL; |
| - } |
| - return err; |
| -} |
| -#else |
| -int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) |
| -{ |
| - return __register_shrinker(shrinker); |
| -} |
| -#endif |
| -EXPORT_SYMBOL(register_shrinker); |
| - |
| -/* |
| - * Remove one |
| - */ |
| -void unregister_shrinker(struct shrinker *shrinker) |
| -{ |
| - struct dentry *debugfs_entry; |
| - int debugfs_id; |
| - |
| - if (!(shrinker->flags & SHRINKER_REGISTERED)) |
| - return; |
| - |
| - down_write(&shrinker_rwsem); |
| - list_del(&shrinker->list); |
| - shrinker->flags &= ~SHRINKER_REGISTERED; |
| - if (shrinker->flags & SHRINKER_MEMCG_AWARE) |
| - unregister_memcg_shrinker(shrinker); |
| - debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); |
| - up_write(&shrinker_rwsem); |
| - |
| - shrinker_debugfs_remove(debugfs_entry, debugfs_id); |
| - |
| - kfree(shrinker->nr_deferred); |
| - shrinker->nr_deferred = NULL; |
| -} |
| -EXPORT_SYMBOL(unregister_shrinker); |
| - |
| -/** |
| - * synchronize_shrinkers - Wait for all running shrinkers to complete. |
| - * |
| - * This is equivalent to calling unregister_shrink() and register_shrinker(), |
| - * but atomically and with less overhead. This is useful to guarantee that all |
| - * shrinker invocations have seen an update, before freeing memory, similar to |
| - * rcu. |
| - */ |
| -void synchronize_shrinkers(void) |
| -{ |
| - down_write(&shrinker_rwsem); |
| - up_write(&shrinker_rwsem); |
| -} |
| -EXPORT_SYMBOL(synchronize_shrinkers); |
| - |
| -#define SHRINK_BATCH 128 |
| - |
| -static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, |
| - struct shrinker *shrinker, int priority) |
| -{ |
| - unsigned long freed = 0; |
| - unsigned long long delta; |
| - long total_scan; |
| - long freeable; |
| - long nr; |
| - long new_nr; |
| - long batch_size = shrinker->batch ? shrinker->batch |
| - : SHRINK_BATCH; |
| - long scanned = 0, next_deferred; |
| - |
| - freeable = shrinker->count_objects(shrinker, shrinkctl); |
| - if (freeable == 0 || freeable == SHRINK_EMPTY) |
| - return freeable; |
| - |
| - /* |
| - * copy the current shrinker scan count into a local variable |
| - * and zero it so that other concurrent shrinker invocations |
| - * don't also do this scanning work. |
| - */ |
| - nr = xchg_nr_deferred(shrinker, shrinkctl); |
| - |
| - if (shrinker->seeks) { |
| - delta = freeable >> priority; |
| - delta *= 4; |
| - do_div(delta, shrinker->seeks); |
| - } else { |
| - /* |
| - * These objects don't require any IO to create. Trim |
| - * them aggressively under memory pressure to keep |
| - * them from causing refetches in the IO caches. |
| - */ |
| - delta = freeable / 2; |
| - } |
| - |
| - total_scan = nr >> priority; |
| - total_scan += delta; |
| - total_scan = min(total_scan, (2 * freeable)); |
| - |
| - trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, |
| - freeable, delta, total_scan, priority); |
| - |
| - /* |
| - * Normally, we should not scan less than batch_size objects in one |
| - * pass to avoid too frequent shrinker calls, but if the slab has less |
| - * than batch_size objects in total and we are really tight on memory, |
| - * we will try to reclaim all available objects, otherwise we can end |
| - * up failing allocations although there are plenty of reclaimable |
| - * objects spread over several slabs with usage less than the |
| - * batch_size. |
| - * |
| - * We detect the "tight on memory" situations by looking at the total |
| - * number of objects we want to scan (total_scan). If it is greater |
| - * than the total number of objects on slab (freeable), we must be |
| - * scanning at high prio and therefore should try to reclaim as much as |
| - * possible. |
| - */ |
| - while (total_scan >= batch_size || |
| - total_scan >= freeable) { |
| - unsigned long ret; |
| - unsigned long nr_to_scan = min(batch_size, total_scan); |
| - |
| - shrinkctl->nr_to_scan = nr_to_scan; |
| - shrinkctl->nr_scanned = nr_to_scan; |
| - ret = shrinker->scan_objects(shrinker, shrinkctl); |
| - if (ret == SHRINK_STOP) |
| - break; |
| - freed += ret; |
| - |
| - count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); |
| - total_scan -= shrinkctl->nr_scanned; |
| - scanned += shrinkctl->nr_scanned; |
| - |
| - cond_resched(); |
| - } |
| - |
| - /* |
| - * The deferred work is increased by any new work (delta) that wasn't |
| - * done, decreased by old deferred work that was done now. |
| - * |
| - * And it is capped to two times of the freeable items. |
| - */ |
| - next_deferred = max_t(long, (nr + delta - scanned), 0); |
| - next_deferred = min(next_deferred, (2 * freeable)); |
| - |
| - /* |
| - * move the unused scan count back into the shrinker in a |
| - * manner that handles concurrent updates. |
| - */ |
| - new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl); |
| - |
| - trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan); |
| - return freed; |
| -} |
| - |
| -#ifdef CONFIG_MEMCG |
| -static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, |
| - struct mem_cgroup *memcg, int priority) |
| -{ |
| - struct shrinker_info *info; |
| - unsigned long ret, freed = 0; |
| - int i; |
| - |
| - if (!mem_cgroup_online(memcg)) |
| - return 0; |
| - |
| - if (!down_read_trylock(&shrinker_rwsem)) |
| - return 0; |
| - |
| - info = shrinker_info_protected(memcg, nid); |
| - if (unlikely(!info)) |
| - goto unlock; |
| - |
| - for_each_set_bit(i, info->map, info->map_nr_max) { |
| - struct shrink_control sc = { |
| - .gfp_mask = gfp_mask, |
| - .nid = nid, |
| - .memcg = memcg, |
| - }; |
| - struct shrinker *shrinker; |
| - |
| - shrinker = idr_find(&shrinker_idr, i); |
| - if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { |
| - if (!shrinker) |
| - clear_bit(i, info->map); |
| - continue; |
| - } |
| - |
| - /* Call non-slab shrinkers even though kmem is disabled */ |
| - if (!memcg_kmem_online() && |
| - !(shrinker->flags & SHRINKER_NONSLAB)) |
| - continue; |
| - |
| - ret = do_shrink_slab(&sc, shrinker, priority); |
| - if (ret == SHRINK_EMPTY) { |
| - clear_bit(i, info->map); |
| - /* |
| - * After the shrinker reported that it had no objects to |
| - * free, but before we cleared the corresponding bit in |
| - * the memcg shrinker map, a new object might have been |
| - * added. To make sure, we have the bit set in this |
| - * case, we invoke the shrinker one more time and reset |
| - * the bit if it reports that it is not empty anymore. |
| - * The memory barrier here pairs with the barrier in |
| - * set_shrinker_bit(): |
| - * |
| - * list_lru_add() shrink_slab_memcg() |
| - * list_add_tail() clear_bit() |
| - * <MB> <MB> |
| - * set_bit() do_shrink_slab() |
| - */ |
| - smp_mb__after_atomic(); |
| - ret = do_shrink_slab(&sc, shrinker, priority); |
| - if (ret == SHRINK_EMPTY) |
| - ret = 0; |
| - else |
| - set_shrinker_bit(memcg, nid, i); |
| - } |
| - freed += ret; |
| - |
| - if (rwsem_is_contended(&shrinker_rwsem)) { |
| - freed = freed ? : 1; |
| - break; |
| - } |
| - } |
| -unlock: |
| - up_read(&shrinker_rwsem); |
| - return freed; |
| -} |
| -#else /* CONFIG_MEMCG */ |
| -static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, |
| - struct mem_cgroup *memcg, int priority) |
| -{ |
| - return 0; |
| -} |
| -#endif /* CONFIG_MEMCG */ |
| - |
| -/** |
| - * shrink_slab - shrink slab caches |
| - * @gfp_mask: allocation context |
| - * @nid: node whose slab caches to target |
| - * @memcg: memory cgroup whose slab caches to target |
| - * @priority: the reclaim priority |
| - * |
| - * Call the shrink functions to age shrinkable caches. |
| - * |
| - * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, |
| - * unaware shrinkers will receive a node id of 0 instead. |
| - * |
| - * @memcg specifies the memory cgroup to target. Unaware shrinkers |
| - * are called only if it is the root cgroup. |
| - * |
| - * @priority is sc->priority, we take the number of objects and >> by priority |
| - * in order to get the scan target. |
| - * |
| - * Returns the number of reclaimed slab objects. |
| - */ |
| -static unsigned long shrink_slab(gfp_t gfp_mask, int nid, |
| - struct mem_cgroup *memcg, |
| - int priority) |
| -{ |
| - unsigned long ret, freed = 0; |
| - struct shrinker *shrinker; |
| - |
| - /* |
| - * The root memcg might be allocated even though memcg is disabled |
| - * via "cgroup_disable=memory" boot parameter. This could make |
| - * mem_cgroup_is_root() return false, then just run memcg slab |
| - * shrink, but skip global shrink. This may result in premature |
| - * oom. |
| - */ |
| - if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) |
| - return shrink_slab_memcg(gfp_mask, nid, memcg, priority); |
| - |
| - if (!down_read_trylock(&shrinker_rwsem)) |
| - goto out; |
| - |
| - list_for_each_entry(shrinker, &shrinker_list, list) { |
| - struct shrink_control sc = { |
| - .gfp_mask = gfp_mask, |
| - .nid = nid, |
| - .memcg = memcg, |
| - }; |
| - |
| - ret = do_shrink_slab(&sc, shrinker, priority); |
| - if (ret == SHRINK_EMPTY) |
| - ret = 0; |
| - freed += ret; |
| - /* |
| - * Bail out if someone want to register a new shrinker to |
| - * prevent the registration from being stalled for long periods |
| - * by parallel ongoing shrinking. |
| - */ |
| - if (rwsem_is_contended(&shrinker_rwsem)) { |
| - freed = freed ? : 1; |
| - break; |
| - } |
| - } |
| - |
| - up_read(&shrinker_rwsem); |
| -out: |
| - cond_resched(); |
| - return freed; |
| -} |
| - |
| static unsigned long drop_slab_node(int nid) |
| { |
| unsigned long freed = 0; |
| _ |