| // SPDX-License-Identifier: GPL-2.0 | 
 | #include <linux/memcontrol.h> | 
 | #include <linux/rwsem.h> | 
 | #include <linux/shrinker.h> | 
 | #include <linux/rculist.h> | 
 | #include <trace/events/vmscan.h> | 
 |  | 
 | #include "internal.h" | 
 |  | 
 | LIST_HEAD(shrinker_list); | 
 | DEFINE_MUTEX(shrinker_mutex); | 
 |  | 
 | #ifdef CONFIG_MEMCG | 
 | static int shrinker_nr_max; | 
 |  | 
 | static inline int shrinker_unit_size(int nr_items) | 
 | { | 
 | 	return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *)); | 
 | } | 
 |  | 
 | static inline void shrinker_unit_free(struct shrinker_info *info, int start) | 
 | { | 
 | 	struct shrinker_info_unit **unit; | 
 | 	int nr, i; | 
 |  | 
 | 	if (!info) | 
 | 		return; | 
 |  | 
 | 	unit = info->unit; | 
 | 	nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS); | 
 |  | 
 | 	for (i = start; i < nr; i++) { | 
 | 		if (!unit[i]) | 
 | 			break; | 
 |  | 
 | 		kfree(unit[i]); | 
 | 		unit[i] = NULL; | 
 | 	} | 
 | } | 
 |  | 
 | static inline int shrinker_unit_alloc(struct shrinker_info *new, | 
 | 				       struct shrinker_info *old, int nid) | 
 | { | 
 | 	struct shrinker_info_unit *unit; | 
 | 	int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS); | 
 | 	int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0; | 
 | 	int i; | 
 |  | 
 | 	for (i = start; i < nr; i++) { | 
 | 		unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid); | 
 | 		if (!unit) { | 
 | 			shrinker_unit_free(new, start); | 
 | 			return -ENOMEM; | 
 | 		} | 
 |  | 
 | 		new->unit[i] = unit; | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | void free_shrinker_info(struct mem_cgroup *memcg) | 
 | { | 
 | 	struct mem_cgroup_per_node *pn; | 
 | 	struct shrinker_info *info; | 
 | 	int nid; | 
 |  | 
 | 	for_each_node(nid) { | 
 | 		pn = memcg->nodeinfo[nid]; | 
 | 		info = rcu_dereference_protected(pn->shrinker_info, true); | 
 | 		shrinker_unit_free(info, 0); | 
 | 		kvfree(info); | 
 | 		rcu_assign_pointer(pn->shrinker_info, NULL); | 
 | 	} | 
 | } | 
 |  | 
 | int alloc_shrinker_info(struct mem_cgroup *memcg) | 
 | { | 
 | 	int nid, ret = 0; | 
 | 	int array_size = 0; | 
 |  | 
 | 	mutex_lock(&shrinker_mutex); | 
 | 	array_size = shrinker_unit_size(shrinker_nr_max); | 
 | 	for_each_node(nid) { | 
 | 		struct shrinker_info *info = kvzalloc_node(sizeof(*info) + array_size, | 
 | 							   GFP_KERNEL, nid); | 
 | 		if (!info) | 
 | 			goto err; | 
 | 		info->map_nr_max = shrinker_nr_max; | 
 | 		if (shrinker_unit_alloc(info, NULL, nid)) { | 
 | 			kvfree(info); | 
 | 			goto err; | 
 | 		} | 
 | 		rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); | 
 | 	} | 
 | 	mutex_unlock(&shrinker_mutex); | 
 |  | 
 | 	return ret; | 
 |  | 
 | err: | 
 | 	mutex_unlock(&shrinker_mutex); | 
 | 	free_shrinker_info(memcg); | 
 | 	return -ENOMEM; | 
 | } | 
 |  | 
 | static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, | 
 | 						     int nid) | 
 | { | 
 | 	return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info, | 
 | 					 lockdep_is_held(&shrinker_mutex)); | 
 | } | 
 |  | 
 | static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size, | 
 | 				    int old_size, int new_nr_max) | 
 | { | 
 | 	struct shrinker_info *new, *old; | 
 | 	struct mem_cgroup_per_node *pn; | 
 | 	int nid; | 
 |  | 
 | 	for_each_node(nid) { | 
 | 		pn = memcg->nodeinfo[nid]; | 
 | 		old = shrinker_info_protected(memcg, nid); | 
 | 		/* Not yet online memcg */ | 
 | 		if (!old) | 
 | 			return 0; | 
 |  | 
 | 		/* Already expanded this shrinker_info */ | 
 | 		if (new_nr_max <= old->map_nr_max) | 
 | 			continue; | 
 |  | 
 | 		new = kvzalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid); | 
 | 		if (!new) | 
 | 			return -ENOMEM; | 
 |  | 
 | 		new->map_nr_max = new_nr_max; | 
 |  | 
 | 		memcpy(new->unit, old->unit, old_size); | 
 | 		if (shrinker_unit_alloc(new, old, nid)) { | 
 | 			kvfree(new); | 
 | 			return -ENOMEM; | 
 | 		} | 
 |  | 
 | 		rcu_assign_pointer(pn->shrinker_info, new); | 
 | 		kvfree_rcu(old, rcu); | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static int expand_shrinker_info(int new_id) | 
 | { | 
 | 	int ret = 0; | 
 | 	int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS); | 
 | 	int new_size, old_size = 0; | 
 | 	struct mem_cgroup *memcg; | 
 |  | 
 | 	if (!root_mem_cgroup) | 
 | 		goto out; | 
 |  | 
 | 	lockdep_assert_held(&shrinker_mutex); | 
 |  | 
 | 	new_size = shrinker_unit_size(new_nr_max); | 
 | 	old_size = shrinker_unit_size(shrinker_nr_max); | 
 |  | 
 | 	memcg = mem_cgroup_iter(NULL, NULL, NULL); | 
 | 	do { | 
 | 		ret = expand_one_shrinker_info(memcg, new_size, old_size, | 
 | 					       new_nr_max); | 
 | 		if (ret) { | 
 | 			mem_cgroup_iter_break(NULL, memcg); | 
 | 			goto out; | 
 | 		} | 
 | 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); | 
 | out: | 
 | 	if (!ret) | 
 | 		shrinker_nr_max = new_nr_max; | 
 |  | 
 | 	return ret; | 
 | } | 
 |  | 
 | static inline int shrinker_id_to_index(int shrinker_id) | 
 | { | 
 | 	return shrinker_id / SHRINKER_UNIT_BITS; | 
 | } | 
 |  | 
 | static inline int shrinker_id_to_offset(int shrinker_id) | 
 | { | 
 | 	return shrinker_id % SHRINKER_UNIT_BITS; | 
 | } | 
 |  | 
 | static inline int calc_shrinker_id(int index, int offset) | 
 | { | 
 | 	return index * SHRINKER_UNIT_BITS + offset; | 
 | } | 
 |  | 
 | void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) | 
 | { | 
 | 	if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) { | 
 | 		struct shrinker_info *info; | 
 | 		struct shrinker_info_unit *unit; | 
 |  | 
 | 		rcu_read_lock(); | 
 | 		info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); | 
 | 		unit = info->unit[shrinker_id_to_index(shrinker_id)]; | 
 | 		if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { | 
 | 			/* Pairs with smp mb in shrink_slab() */ | 
 | 			smp_mb__before_atomic(); | 
 | 			set_bit(shrinker_id_to_offset(shrinker_id), unit->map); | 
 | 		} | 
 | 		rcu_read_unlock(); | 
 | 	} | 
 | } | 
 |  | 
 | static DEFINE_IDR(shrinker_idr); | 
 |  | 
 | static int shrinker_memcg_alloc(struct shrinker *shrinker) | 
 | { | 
 | 	int id, ret = -ENOMEM; | 
 |  | 
 | 	if (mem_cgroup_disabled()) | 
 | 		return -ENOSYS; | 
 |  | 
 | 	mutex_lock(&shrinker_mutex); | 
 | 	id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); | 
 | 	if (id < 0) | 
 | 		goto unlock; | 
 |  | 
 | 	if (id >= shrinker_nr_max) { | 
 | 		if (expand_shrinker_info(id)) { | 
 | 			idr_remove(&shrinker_idr, id); | 
 | 			goto unlock; | 
 | 		} | 
 | 	} | 
 | 	shrinker->id = id; | 
 | 	ret = 0; | 
 | unlock: | 
 | 	mutex_unlock(&shrinker_mutex); | 
 | 	return ret; | 
 | } | 
 |  | 
 | static void shrinker_memcg_remove(struct shrinker *shrinker) | 
 | { | 
 | 	int id = shrinker->id; | 
 |  | 
 | 	BUG_ON(id < 0); | 
 |  | 
 | 	lockdep_assert_held(&shrinker_mutex); | 
 |  | 
 | 	idr_remove(&shrinker_idr, id); | 
 | } | 
 |  | 
 | static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, | 
 | 				   struct mem_cgroup *memcg) | 
 | { | 
 | 	struct shrinker_info *info; | 
 | 	struct shrinker_info_unit *unit; | 
 | 	long nr_deferred; | 
 |  | 
 | 	rcu_read_lock(); | 
 | 	info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); | 
 | 	unit = info->unit[shrinker_id_to_index(shrinker->id)]; | 
 | 	nr_deferred = atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0); | 
 | 	rcu_read_unlock(); | 
 |  | 
 | 	return nr_deferred; | 
 | } | 
 |  | 
 | static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, | 
 | 				  struct mem_cgroup *memcg) | 
 | { | 
 | 	struct shrinker_info *info; | 
 | 	struct shrinker_info_unit *unit; | 
 | 	long nr_deferred; | 
 |  | 
 | 	rcu_read_lock(); | 
 | 	info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); | 
 | 	unit = info->unit[shrinker_id_to_index(shrinker->id)]; | 
 | 	nr_deferred = | 
 | 		atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]); | 
 | 	rcu_read_unlock(); | 
 |  | 
 | 	return nr_deferred; | 
 | } | 
 |  | 
 | void reparent_shrinker_deferred(struct mem_cgroup *memcg) | 
 | { | 
 | 	int nid, index, offset; | 
 | 	long nr; | 
 | 	struct mem_cgroup *parent; | 
 | 	struct shrinker_info *child_info, *parent_info; | 
 | 	struct shrinker_info_unit *child_unit, *parent_unit; | 
 |  | 
 | 	parent = parent_mem_cgroup(memcg); | 
 | 	if (!parent) | 
 | 		parent = root_mem_cgroup; | 
 |  | 
 | 	/* Prevent from concurrent shrinker_info expand */ | 
 | 	mutex_lock(&shrinker_mutex); | 
 | 	for_each_node(nid) { | 
 | 		child_info = shrinker_info_protected(memcg, nid); | 
 | 		parent_info = shrinker_info_protected(parent, nid); | 
 | 		for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) { | 
 | 			child_unit = child_info->unit[index]; | 
 | 			parent_unit = parent_info->unit[index]; | 
 | 			for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) { | 
 | 				nr = atomic_long_read(&child_unit->nr_deferred[offset]); | 
 | 				atomic_long_add(nr, &parent_unit->nr_deferred[offset]); | 
 | 			} | 
 | 		} | 
 | 	} | 
 | 	mutex_unlock(&shrinker_mutex); | 
 | } | 
 | #else | 
 | static int shrinker_memcg_alloc(struct shrinker *shrinker) | 
 | { | 
 | 	return -ENOSYS; | 
 | } | 
 |  | 
 | static void shrinker_memcg_remove(struct shrinker *shrinker) | 
 | { | 
 | } | 
 |  | 
 | static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, | 
 | 				   struct mem_cgroup *memcg) | 
 | { | 
 | 	return 0; | 
 | } | 
 |  | 
 | static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, | 
 | 				  struct mem_cgroup *memcg) | 
 | { | 
 | 	return 0; | 
 | } | 
 | #endif /* CONFIG_MEMCG */ | 
 |  | 
 | static long xchg_nr_deferred(struct shrinker *shrinker, | 
 | 			     struct shrink_control *sc) | 
 | { | 
 | 	int nid = sc->nid; | 
 |  | 
 | 	if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) | 
 | 		nid = 0; | 
 |  | 
 | 	if (sc->memcg && | 
 | 	    (shrinker->flags & SHRINKER_MEMCG_AWARE)) | 
 | 		return xchg_nr_deferred_memcg(nid, shrinker, | 
 | 					      sc->memcg); | 
 |  | 
 | 	return atomic_long_xchg(&shrinker->nr_deferred[nid], 0); | 
 | } | 
 |  | 
 |  | 
 | static long add_nr_deferred(long nr, struct shrinker *shrinker, | 
 | 			    struct shrink_control *sc) | 
 | { | 
 | 	int nid = sc->nid; | 
 |  | 
 | 	if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) | 
 | 		nid = 0; | 
 |  | 
 | 	if (sc->memcg && | 
 | 	    (shrinker->flags & SHRINKER_MEMCG_AWARE)) | 
 | 		return add_nr_deferred_memcg(nr, nid, shrinker, | 
 | 					     sc->memcg); | 
 |  | 
 | 	return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]); | 
 | } | 
 |  | 
 | #define SHRINK_BATCH 128 | 
 |  | 
 | static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, | 
 | 				    struct shrinker *shrinker, int priority) | 
 | { | 
 | 	unsigned long freed = 0; | 
 | 	unsigned long long delta; | 
 | 	long total_scan; | 
 | 	long freeable; | 
 | 	long nr; | 
 | 	long new_nr; | 
 | 	long batch_size = shrinker->batch ? shrinker->batch | 
 | 					  : SHRINK_BATCH; | 
 | 	long scanned = 0, next_deferred; | 
 |  | 
 | 	freeable = shrinker->count_objects(shrinker, shrinkctl); | 
 | 	if (freeable == 0 || freeable == SHRINK_EMPTY) | 
 | 		return freeable; | 
 |  | 
 | 	/* | 
 | 	 * copy the current shrinker scan count into a local variable | 
 | 	 * and zero it so that other concurrent shrinker invocations | 
 | 	 * don't also do this scanning work. | 
 | 	 */ | 
 | 	nr = xchg_nr_deferred(shrinker, shrinkctl); | 
 |  | 
 | 	if (shrinker->seeks) { | 
 | 		delta = freeable >> priority; | 
 | 		delta *= 4; | 
 | 		do_div(delta, shrinker->seeks); | 
 | 	} else { | 
 | 		/* | 
 | 		 * These objects don't require any IO to create. Trim | 
 | 		 * them aggressively under memory pressure to keep | 
 | 		 * them from causing refetches in the IO caches. | 
 | 		 */ | 
 | 		delta = freeable / 2; | 
 | 	} | 
 |  | 
 | 	total_scan = nr >> priority; | 
 | 	total_scan += delta; | 
 | 	total_scan = min(total_scan, (2 * freeable)); | 
 |  | 
 | 	trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, | 
 | 				   freeable, delta, total_scan, priority); | 
 |  | 
 | 	/* | 
 | 	 * Normally, we should not scan less than batch_size objects in one | 
 | 	 * pass to avoid too frequent shrinker calls, but if the slab has less | 
 | 	 * than batch_size objects in total and we are really tight on memory, | 
 | 	 * we will try to reclaim all available objects, otherwise we can end | 
 | 	 * up failing allocations although there are plenty of reclaimable | 
 | 	 * objects spread over several slabs with usage less than the | 
 | 	 * batch_size. | 
 | 	 * | 
 | 	 * We detect the "tight on memory" situations by looking at the total | 
 | 	 * number of objects we want to scan (total_scan). If it is greater | 
 | 	 * than the total number of objects on slab (freeable), we must be | 
 | 	 * scanning at high prio and therefore should try to reclaim as much as | 
 | 	 * possible. | 
 | 	 */ | 
 | 	while (total_scan >= batch_size || | 
 | 	       total_scan >= freeable) { | 
 | 		unsigned long ret; | 
 | 		unsigned long nr_to_scan = min(batch_size, total_scan); | 
 |  | 
 | 		shrinkctl->nr_to_scan = nr_to_scan; | 
 | 		shrinkctl->nr_scanned = nr_to_scan; | 
 | 		ret = shrinker->scan_objects(shrinker, shrinkctl); | 
 | 		if (ret == SHRINK_STOP) | 
 | 			break; | 
 | 		freed += ret; | 
 |  | 
 | 		count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); | 
 | 		total_scan -= shrinkctl->nr_scanned; | 
 | 		scanned += shrinkctl->nr_scanned; | 
 |  | 
 | 		cond_resched(); | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * The deferred work is increased by any new work (delta) that wasn't | 
 | 	 * done, decreased by old deferred work that was done now. | 
 | 	 * | 
 | 	 * And it is capped to two times of the freeable items. | 
 | 	 */ | 
 | 	next_deferred = max_t(long, (nr + delta - scanned), 0); | 
 | 	next_deferred = min(next_deferred, (2 * freeable)); | 
 |  | 
 | 	/* | 
 | 	 * move the unused scan count back into the shrinker in a | 
 | 	 * manner that handles concurrent updates. | 
 | 	 */ | 
 | 	new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl); | 
 |  | 
 | 	trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan); | 
 | 	return freed; | 
 | } | 
 |  | 
 | #ifdef CONFIG_MEMCG | 
 | static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, | 
 | 			struct mem_cgroup *memcg, int priority) | 
 | { | 
 | 	struct shrinker_info *info; | 
 | 	unsigned long ret, freed = 0; | 
 | 	int offset, index = 0; | 
 |  | 
 | 	if (!mem_cgroup_online(memcg)) | 
 | 		return 0; | 
 |  | 
 | 	/* | 
 | 	 * lockless algorithm of memcg shrink. | 
 | 	 * | 
 | 	 * The shrinker_info may be freed asynchronously via RCU in the | 
 | 	 * expand_one_shrinker_info(), so the rcu_read_lock() needs to be used | 
 | 	 * to ensure the existence of the shrinker_info. | 
 | 	 * | 
 | 	 * The shrinker_info_unit is never freed unless its corresponding memcg | 
 | 	 * is destroyed. Here we already hold the refcount of memcg, so the | 
 | 	 * memcg will not be destroyed, and of course shrinker_info_unit will | 
 | 	 * not be freed. | 
 | 	 * | 
 | 	 * So in the memcg shrink: | 
 | 	 *  step 1: use rcu_read_lock() to guarantee existence of the | 
 | 	 *          shrinker_info. | 
 | 	 *  step 2: after getting shrinker_info_unit we can safely release the | 
 | 	 *          RCU lock. | 
 | 	 *  step 3: traverse the bitmap and calculate shrinker_id | 
 | 	 *  step 4: use rcu_read_lock() to guarantee existence of the shrinker. | 
 | 	 *  step 5: use shrinker_id to find the shrinker, then use | 
 | 	 *          shrinker_try_get() to guarantee existence of the shrinker, | 
 | 	 *          then we can release the RCU lock to do do_shrink_slab() that | 
 | 	 *          may sleep. | 
 | 	 *  step 6: do shrinker_put() paired with step 5 to put the refcount, | 
 | 	 *          if the refcount reaches 0, then wake up the waiter in | 
 | 	 *          shrinker_free() by calling complete(). | 
 | 	 *          Note: here is different from the global shrink, we don't | 
 | 	 *                need to acquire the RCU lock to guarantee existence of | 
 | 	 *                the shrinker, because we don't need to use this | 
 | 	 *                shrinker to traverse the next shrinker in the bitmap. | 
 | 	 *  step 7: we have already exited the read-side of rcu critical section | 
 | 	 *          before calling do_shrink_slab(), the shrinker_info may be | 
 | 	 *          released in expand_one_shrinker_info(), so go back to step 1 | 
 | 	 *          to reacquire the shrinker_info. | 
 | 	 */ | 
 | again: | 
 | 	rcu_read_lock(); | 
 | 	info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); | 
 | 	if (unlikely(!info)) | 
 | 		goto unlock; | 
 |  | 
 | 	if (index < shrinker_id_to_index(info->map_nr_max)) { | 
 | 		struct shrinker_info_unit *unit; | 
 |  | 
 | 		unit = info->unit[index]; | 
 |  | 
 | 		rcu_read_unlock(); | 
 |  | 
 | 		for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) { | 
 | 			struct shrink_control sc = { | 
 | 				.gfp_mask = gfp_mask, | 
 | 				.nid = nid, | 
 | 				.memcg = memcg, | 
 | 			}; | 
 | 			struct shrinker *shrinker; | 
 | 			int shrinker_id = calc_shrinker_id(index, offset); | 
 |  | 
 | 			rcu_read_lock(); | 
 | 			shrinker = idr_find(&shrinker_idr, shrinker_id); | 
 | 			if (unlikely(!shrinker || !shrinker_try_get(shrinker))) { | 
 | 				clear_bit(offset, unit->map); | 
 | 				rcu_read_unlock(); | 
 | 				continue; | 
 | 			} | 
 | 			rcu_read_unlock(); | 
 |  | 
 | 			/* Call non-slab shrinkers even though kmem is disabled */ | 
 | 			if (!memcg_kmem_online() && | 
 | 			    !(shrinker->flags & SHRINKER_NONSLAB)) | 
 | 				continue; | 
 |  | 
 | 			ret = do_shrink_slab(&sc, shrinker, priority); | 
 | 			if (ret == SHRINK_EMPTY) { | 
 | 				clear_bit(offset, unit->map); | 
 | 				/* | 
 | 				 * After the shrinker reported that it had no objects to | 
 | 				 * free, but before we cleared the corresponding bit in | 
 | 				 * the memcg shrinker map, a new object might have been | 
 | 				 * added. To make sure, we have the bit set in this | 
 | 				 * case, we invoke the shrinker one more time and reset | 
 | 				 * the bit if it reports that it is not empty anymore. | 
 | 				 * The memory barrier here pairs with the barrier in | 
 | 				 * set_shrinker_bit(): | 
 | 				 * | 
 | 				 * list_lru_add()     shrink_slab_memcg() | 
 | 				 *   list_add_tail()    clear_bit() | 
 | 				 *   <MB>               <MB> | 
 | 				 *   set_bit()          do_shrink_slab() | 
 | 				 */ | 
 | 				smp_mb__after_atomic(); | 
 | 				ret = do_shrink_slab(&sc, shrinker, priority); | 
 | 				if (ret == SHRINK_EMPTY) | 
 | 					ret = 0; | 
 | 				else | 
 | 					set_shrinker_bit(memcg, nid, shrinker_id); | 
 | 			} | 
 | 			freed += ret; | 
 | 			shrinker_put(shrinker); | 
 | 		} | 
 |  | 
 | 		index++; | 
 | 		goto again; | 
 | 	} | 
 | unlock: | 
 | 	rcu_read_unlock(); | 
 | 	return freed; | 
 | } | 
 | #else /* !CONFIG_MEMCG */ | 
 | static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, | 
 | 			struct mem_cgroup *memcg, int priority) | 
 | { | 
 | 	return 0; | 
 | } | 
 | #endif /* CONFIG_MEMCG */ | 
 |  | 
 | /** | 
 |  * shrink_slab - shrink slab caches | 
 |  * @gfp_mask: allocation context | 
 |  * @nid: node whose slab caches to target | 
 |  * @memcg: memory cgroup whose slab caches to target | 
 |  * @priority: the reclaim priority | 
 |  * | 
 |  * Call the shrink functions to age shrinkable caches. | 
 |  * | 
 |  * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, | 
 |  * unaware shrinkers will receive a node id of 0 instead. | 
 |  * | 
 |  * @memcg specifies the memory cgroup to target. Unaware shrinkers | 
 |  * are called only if it is the root cgroup. | 
 |  * | 
 |  * @priority is sc->priority, we take the number of objects and >> by priority | 
 |  * in order to get the scan target. | 
 |  * | 
 |  * Returns the number of reclaimed slab objects. | 
 |  */ | 
 | unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, | 
 | 			  int priority) | 
 | { | 
 | 	unsigned long ret, freed = 0; | 
 | 	struct shrinker *shrinker; | 
 |  | 
 | 	/* | 
 | 	 * The root memcg might be allocated even though memcg is disabled | 
 | 	 * via "cgroup_disable=memory" boot parameter.  This could make | 
 | 	 * mem_cgroup_is_root() return false, then just run memcg slab | 
 | 	 * shrink, but skip global shrink.  This may result in premature | 
 | 	 * oom. | 
 | 	 */ | 
 | 	if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) | 
 | 		return shrink_slab_memcg(gfp_mask, nid, memcg, priority); | 
 |  | 
 | 	/* | 
 | 	 * lockless algorithm of global shrink. | 
 | 	 * | 
 | 	 * In the unregistration setp, the shrinker will be freed asynchronously | 
 | 	 * via RCU after its refcount reaches 0. So both rcu_read_lock() and | 
 | 	 * shrinker_try_get() can be used to ensure the existence of the shrinker. | 
 | 	 * | 
 | 	 * So in the global shrink: | 
 | 	 *  step 1: use rcu_read_lock() to guarantee existence of the shrinker | 
 | 	 *          and the validity of the shrinker_list walk. | 
 | 	 *  step 2: use shrinker_try_get() to try get the refcount, if successful, | 
 | 	 *          then the existence of the shrinker can also be guaranteed, | 
 | 	 *          so we can release the RCU lock to do do_shrink_slab() that | 
 | 	 *          may sleep. | 
 | 	 *  step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(), | 
 | 	 *          which ensures that neither this shrinker nor the next shrinker | 
 | 	 *          will be freed in the next traversal operation. | 
 | 	 *  step 4: do shrinker_put() paired with step 2 to put the refcount, | 
 | 	 *          if the refcount reaches 0, then wake up the waiter in | 
 | 	 *          shrinker_free() by calling complete(). | 
 | 	 */ | 
 | 	rcu_read_lock(); | 
 | 	list_for_each_entry_rcu(shrinker, &shrinker_list, list) { | 
 | 		struct shrink_control sc = { | 
 | 			.gfp_mask = gfp_mask, | 
 | 			.nid = nid, | 
 | 			.memcg = memcg, | 
 | 		}; | 
 |  | 
 | 		if (!shrinker_try_get(shrinker)) | 
 | 			continue; | 
 |  | 
 | 		rcu_read_unlock(); | 
 |  | 
 | 		ret = do_shrink_slab(&sc, shrinker, priority); | 
 | 		if (ret == SHRINK_EMPTY) | 
 | 			ret = 0; | 
 | 		freed += ret; | 
 |  | 
 | 		rcu_read_lock(); | 
 | 		shrinker_put(shrinker); | 
 | 	} | 
 |  | 
 | 	rcu_read_unlock(); | 
 | 	cond_resched(); | 
 | 	return freed; | 
 | } | 
 |  | 
 | struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...) | 
 | { | 
 | 	struct shrinker *shrinker; | 
 | 	unsigned int size; | 
 | 	va_list ap; | 
 | 	int err; | 
 |  | 
 | 	shrinker = kzalloc(sizeof(struct shrinker), GFP_KERNEL); | 
 | 	if (!shrinker) | 
 | 		return NULL; | 
 |  | 
 | 	va_start(ap, fmt); | 
 | 	err = shrinker_debugfs_name_alloc(shrinker, fmt, ap); | 
 | 	va_end(ap); | 
 | 	if (err) | 
 | 		goto err_name; | 
 |  | 
 | 	shrinker->flags = flags | SHRINKER_ALLOCATED; | 
 | 	shrinker->seeks = DEFAULT_SEEKS; | 
 |  | 
 | 	if (flags & SHRINKER_MEMCG_AWARE) { | 
 | 		err = shrinker_memcg_alloc(shrinker); | 
 | 		if (err == -ENOSYS) { | 
 | 			/* Memcg is not supported, fallback to non-memcg-aware shrinker. */ | 
 | 			shrinker->flags &= ~SHRINKER_MEMCG_AWARE; | 
 | 			goto non_memcg; | 
 | 		} | 
 |  | 
 | 		if (err) | 
 | 			goto err_flags; | 
 |  | 
 | 		return shrinker; | 
 | 	} | 
 |  | 
 | non_memcg: | 
 | 	/* | 
 | 	 * The nr_deferred is available on per memcg level for memcg aware | 
 | 	 * shrinkers, so only allocate nr_deferred in the following cases: | 
 | 	 *  - non-memcg-aware shrinkers | 
 | 	 *  - !CONFIG_MEMCG | 
 | 	 *  - memcg is disabled by kernel command line | 
 | 	 */ | 
 | 	size = sizeof(*shrinker->nr_deferred); | 
 | 	if (flags & SHRINKER_NUMA_AWARE) | 
 | 		size *= nr_node_ids; | 
 |  | 
 | 	shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); | 
 | 	if (!shrinker->nr_deferred) | 
 | 		goto err_flags; | 
 |  | 
 | 	return shrinker; | 
 |  | 
 | err_flags: | 
 | 	shrinker_debugfs_name_free(shrinker); | 
 | err_name: | 
 | 	kfree(shrinker); | 
 | 	return NULL; | 
 | } | 
 | EXPORT_SYMBOL_GPL(shrinker_alloc); | 
 |  | 
 | void shrinker_register(struct shrinker *shrinker) | 
 | { | 
 | 	if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) { | 
 | 		pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker"); | 
 | 		return; | 
 | 	} | 
 |  | 
 | 	mutex_lock(&shrinker_mutex); | 
 | 	list_add_tail_rcu(&shrinker->list, &shrinker_list); | 
 | 	shrinker->flags |= SHRINKER_REGISTERED; | 
 | 	shrinker_debugfs_add(shrinker); | 
 | 	mutex_unlock(&shrinker_mutex); | 
 |  | 
 | 	init_completion(&shrinker->done); | 
 | 	/* | 
 | 	 * Now the shrinker is fully set up, take the first reference to it to | 
 | 	 * indicate that lookup operations are now allowed to use it via | 
 | 	 * shrinker_try_get(). | 
 | 	 */ | 
 | 	refcount_set(&shrinker->refcount, 1); | 
 | } | 
 | EXPORT_SYMBOL_GPL(shrinker_register); | 
 |  | 
 | static void shrinker_free_rcu_cb(struct rcu_head *head) | 
 | { | 
 | 	struct shrinker *shrinker = container_of(head, struct shrinker, rcu); | 
 |  | 
 | 	kfree(shrinker->nr_deferred); | 
 | 	kfree(shrinker); | 
 | } | 
 |  | 
 | void shrinker_free(struct shrinker *shrinker) | 
 | { | 
 | 	struct dentry *debugfs_entry = NULL; | 
 | 	int debugfs_id; | 
 |  | 
 | 	if (!shrinker) | 
 | 		return; | 
 |  | 
 | 	if (shrinker->flags & SHRINKER_REGISTERED) { | 
 | 		/* drop the initial refcount */ | 
 | 		shrinker_put(shrinker); | 
 | 		/* | 
 | 		 * Wait for all lookups of the shrinker to complete, after that, | 
 | 		 * no shrinker is running or will run again, then we can safely | 
 | 		 * free it asynchronously via RCU and safely free the structure | 
 | 		 * where the shrinker is located, such as super_block etc. | 
 | 		 */ | 
 | 		wait_for_completion(&shrinker->done); | 
 | 	} | 
 |  | 
 | 	mutex_lock(&shrinker_mutex); | 
 | 	if (shrinker->flags & SHRINKER_REGISTERED) { | 
 | 		/* | 
 | 		 * Now we can safely remove it from the shrinker_list and then | 
 | 		 * free it. | 
 | 		 */ | 
 | 		list_del_rcu(&shrinker->list); | 
 | 		debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); | 
 | 		shrinker->flags &= ~SHRINKER_REGISTERED; | 
 | 	} | 
 |  | 
 | 	shrinker_debugfs_name_free(shrinker); | 
 |  | 
 | 	if (shrinker->flags & SHRINKER_MEMCG_AWARE) | 
 | 		shrinker_memcg_remove(shrinker); | 
 | 	mutex_unlock(&shrinker_mutex); | 
 |  | 
 | 	if (debugfs_entry) | 
 | 		shrinker_debugfs_remove(debugfs_entry, debugfs_id); | 
 |  | 
 | 	call_rcu(&shrinker->rcu, shrinker_free_rcu_cb); | 
 | } | 
 | EXPORT_SYMBOL_GPL(shrinker_free); |