| // SPDX-License-Identifier: GPL-2.0 | 
 | /* | 
 |  * Common Code for Data Access Monitoring | 
 |  * | 
 |  * Author: SeongJae Park <sj@kernel.org> | 
 |  */ | 
 |  | 
 | #include <linux/migrate.h> | 
 | #include <linux/mmu_notifier.h> | 
 | #include <linux/page_idle.h> | 
 | #include <linux/pagemap.h> | 
 | #include <linux/rmap.h> | 
 | #include <linux/swap.h> | 
 | #include <linux/swapops.h> | 
 |  | 
 | #include "../internal.h" | 
 | #include "ops-common.h" | 
 |  | 
 | /* | 
 |  * Get an online page for a pfn if it's in the LRU list.  Otherwise, returns | 
 |  * NULL. | 
 |  * | 
 |  * The body of this function is stolen from the 'page_idle_get_folio()'.  We | 
 |  * steal rather than reuse it because the code is quite simple. | 
 |  */ | 
 | struct folio *damon_get_folio(unsigned long pfn) | 
 | { | 
 | 	struct page *page = pfn_to_online_page(pfn); | 
 | 	struct folio *folio; | 
 |  | 
 | 	if (!page) | 
 | 		return NULL; | 
 |  | 
 | 	folio = page_folio(page); | 
 | 	if (!folio_test_lru(folio) || !folio_try_get(folio)) | 
 | 		return NULL; | 
 | 	if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) { | 
 | 		folio_put(folio); | 
 | 		folio = NULL; | 
 | 	} | 
 | 	return folio; | 
 | } | 
 |  | 
 | void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr) | 
 | { | 
 | 	pte_t pteval = ptep_get(pte); | 
 | 	struct folio *folio; | 
 | 	bool young = false; | 
 | 	unsigned long pfn; | 
 |  | 
 | 	if (likely(pte_present(pteval))) | 
 | 		pfn = pte_pfn(pteval); | 
 | 	else | 
 | 		pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); | 
 |  | 
 | 	folio = damon_get_folio(pfn); | 
 | 	if (!folio) | 
 | 		return; | 
 |  | 
 | 	/* | 
 | 	 * PFN swap PTEs, such as device-exclusive ones, that actually map pages | 
 | 	 * are "old" from a CPU perspective. The MMU notifier takes care of any | 
 | 	 * device aspects. | 
 | 	 */ | 
 | 	if (likely(pte_present(pteval))) | 
 | 		young |= ptep_test_and_clear_young(vma, addr, pte); | 
 | 	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE); | 
 | 	if (young) | 
 | 		folio_set_young(folio); | 
 |  | 
 | 	folio_set_idle(folio); | 
 | 	folio_put(folio); | 
 | } | 
 |  | 
 | void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr) | 
 | { | 
 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 
 | 	struct folio *folio = damon_get_folio(pmd_pfn(pmdp_get(pmd))); | 
 |  | 
 | 	if (!folio) | 
 | 		return; | 
 |  | 
 | 	if (pmdp_clear_young_notify(vma, addr, pmd)) | 
 | 		folio_set_young(folio); | 
 |  | 
 | 	folio_set_idle(folio); | 
 | 	folio_put(folio); | 
 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 
 | } | 
 |  | 
 | #define DAMON_MAX_SUBSCORE	(100) | 
 | #define DAMON_MAX_AGE_IN_LOG	(32) | 
 |  | 
 | int damon_hot_score(struct damon_ctx *c, struct damon_region *r, | 
 | 			struct damos *s) | 
 | { | 
 | 	int freq_subscore; | 
 | 	unsigned int age_in_sec; | 
 | 	int age_in_log, age_subscore; | 
 | 	unsigned int freq_weight = s->quota.weight_nr_accesses; | 
 | 	unsigned int age_weight = s->quota.weight_age; | 
 | 	int hotness; | 
 |  | 
 | 	freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / | 
 | 		damon_max_nr_accesses(&c->attrs); | 
 |  | 
 | 	age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000; | 
 | 	for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; | 
 | 			age_in_log++, age_in_sec >>= 1) | 
 | 		; | 
 |  | 
 | 	/* If frequency is 0, higher age means it's colder */ | 
 | 	if (freq_subscore == 0) | 
 | 		age_in_log *= -1; | 
 |  | 
 | 	/* | 
 | 	 * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. | 
 | 	 * Scale it to be in [0, 100] and set it as age subscore. | 
 | 	 */ | 
 | 	age_in_log += DAMON_MAX_AGE_IN_LOG; | 
 | 	age_subscore = age_in_log * DAMON_MAX_SUBSCORE / | 
 | 		DAMON_MAX_AGE_IN_LOG / 2; | 
 |  | 
 | 	hotness = (freq_weight * freq_subscore + age_weight * age_subscore); | 
 | 	if (freq_weight + age_weight) | 
 | 		hotness /= freq_weight + age_weight; | 
 | 	/* | 
 | 	 * Transform it to fit in [0, DAMOS_MAX_SCORE] | 
 | 	 */ | 
 | 	hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; | 
 |  | 
 | 	return hotness; | 
 | } | 
 |  | 
 | int damon_cold_score(struct damon_ctx *c, struct damon_region *r, | 
 | 			struct damos *s) | 
 | { | 
 | 	int hotness = damon_hot_score(c, r, s); | 
 |  | 
 | 	/* Return coldness of the region */ | 
 | 	return DAMOS_MAX_SCORE - hotness; | 
 | } | 
 |  | 
 | static bool damon_folio_mkold_one(struct folio *folio, | 
 | 		struct vm_area_struct *vma, unsigned long addr, void *arg) | 
 | { | 
 | 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); | 
 |  | 
 | 	while (page_vma_mapped_walk(&pvmw)) { | 
 | 		addr = pvmw.address; | 
 | 		if (pvmw.pte) | 
 | 			damon_ptep_mkold(pvmw.pte, vma, addr); | 
 | 		else | 
 | 			damon_pmdp_mkold(pvmw.pmd, vma, addr); | 
 | 	} | 
 | 	return true; | 
 | } | 
 |  | 
 | void damon_folio_mkold(struct folio *folio) | 
 | { | 
 | 	struct rmap_walk_control rwc = { | 
 | 		.rmap_one = damon_folio_mkold_one, | 
 | 		.anon_lock = folio_lock_anon_vma_read, | 
 | 	}; | 
 | 	bool need_lock; | 
 |  | 
 | 	if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { | 
 | 		folio_set_idle(folio); | 
 | 		return; | 
 | 	} | 
 |  | 
 | 	need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); | 
 | 	if (need_lock && !folio_trylock(folio)) | 
 | 		return; | 
 |  | 
 | 	rmap_walk(folio, &rwc); | 
 |  | 
 | 	if (need_lock) | 
 | 		folio_unlock(folio); | 
 |  | 
 | } | 
 |  | 
 | static bool damon_folio_young_one(struct folio *folio, | 
 | 		struct vm_area_struct *vma, unsigned long addr, void *arg) | 
 | { | 
 | 	bool *accessed = arg; | 
 | 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); | 
 | 	pte_t pte; | 
 |  | 
 | 	*accessed = false; | 
 | 	while (page_vma_mapped_walk(&pvmw)) { | 
 | 		addr = pvmw.address; | 
 | 		if (pvmw.pte) { | 
 | 			pte = ptep_get(pvmw.pte); | 
 |  | 
 | 			/* | 
 | 			 * PFN swap PTEs, such as device-exclusive ones, that | 
 | 			 * actually map pages are "old" from a CPU perspective. | 
 | 			 * The MMU notifier takes care of any device aspects. | 
 | 			 */ | 
 | 			*accessed = (pte_present(pte) && pte_young(pte)) || | 
 | 				!folio_test_idle(folio) || | 
 | 				mmu_notifier_test_young(vma->vm_mm, addr); | 
 | 		} else { | 
 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 
 | 			*accessed = pmd_young(pmdp_get(pvmw.pmd)) || | 
 | 				!folio_test_idle(folio) || | 
 | 				mmu_notifier_test_young(vma->vm_mm, addr); | 
 | #else | 
 | 			WARN_ON_ONCE(1); | 
 | #endif	/* CONFIG_TRANSPARENT_HUGEPAGE */ | 
 | 		} | 
 | 		if (*accessed) { | 
 | 			page_vma_mapped_walk_done(&pvmw); | 
 | 			break; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	/* If accessed, stop walking */ | 
 | 	return *accessed == false; | 
 | } | 
 |  | 
 | bool damon_folio_young(struct folio *folio) | 
 | { | 
 | 	bool accessed = false; | 
 | 	struct rmap_walk_control rwc = { | 
 | 		.arg = &accessed, | 
 | 		.rmap_one = damon_folio_young_one, | 
 | 		.anon_lock = folio_lock_anon_vma_read, | 
 | 	}; | 
 | 	bool need_lock; | 
 |  | 
 | 	if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { | 
 | 		if (folio_test_idle(folio)) | 
 | 			return false; | 
 | 		else | 
 | 			return true; | 
 | 	} | 
 |  | 
 | 	need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); | 
 | 	if (need_lock && !folio_trylock(folio)) | 
 | 		return false; | 
 |  | 
 | 	rmap_walk(folio, &rwc); | 
 |  | 
 | 	if (need_lock) | 
 | 		folio_unlock(folio); | 
 |  | 
 | 	return accessed; | 
 | } | 
 |  | 
 | bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio) | 
 | { | 
 | 	bool matched = false; | 
 | 	struct mem_cgroup *memcg; | 
 | 	size_t folio_sz; | 
 |  | 
 | 	switch (filter->type) { | 
 | 	case DAMOS_FILTER_TYPE_ANON: | 
 | 		matched = folio_test_anon(folio); | 
 | 		break; | 
 | 	case DAMOS_FILTER_TYPE_ACTIVE: | 
 | 		matched = folio_test_active(folio); | 
 | 		break; | 
 | 	case DAMOS_FILTER_TYPE_MEMCG: | 
 | 		rcu_read_lock(); | 
 | 		memcg = folio_memcg_check(folio); | 
 | 		if (!memcg) | 
 | 			matched = false; | 
 | 		else | 
 | 			matched = filter->memcg_id == mem_cgroup_id(memcg); | 
 | 		rcu_read_unlock(); | 
 | 		break; | 
 | 	case DAMOS_FILTER_TYPE_YOUNG: | 
 | 		matched = damon_folio_young(folio); | 
 | 		if (matched) | 
 | 			damon_folio_mkold(folio); | 
 | 		break; | 
 | 	case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: | 
 | 		folio_sz = folio_size(folio); | 
 | 		matched = filter->sz_range.min <= folio_sz && | 
 | 			  folio_sz <= filter->sz_range.max; | 
 | 		break; | 
 | 	case DAMOS_FILTER_TYPE_UNMAPPED: | 
 | 		matched = !folio_mapped(folio) || !folio_raw_mapping(folio); | 
 | 		break; | 
 | 	default: | 
 | 		break; | 
 | 	} | 
 |  | 
 | 	return matched == filter->matching; | 
 | } | 
 |  | 
 | static unsigned int __damon_migrate_folio_list( | 
 | 		struct list_head *migrate_folios, struct pglist_data *pgdat, | 
 | 		int target_nid) | 
 | { | 
 | 	unsigned int nr_succeeded = 0; | 
 | 	struct migration_target_control mtc = { | 
 | 		/* | 
 | 		 * Allocate from 'node', or fail quickly and quietly. | 
 | 		 * When this happens, 'page' will likely just be discarded | 
 | 		 * instead of migrated. | 
 | 		 */ | 
 | 		.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | | 
 | 			__GFP_NOWARN | __GFP_NOMEMALLOC | GFP_NOWAIT, | 
 | 		.nid = target_nid, | 
 | 	}; | 
 |  | 
 | 	if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE) | 
 | 		return 0; | 
 |  | 
 | 	if (list_empty(migrate_folios)) | 
 | 		return 0; | 
 |  | 
 | 	/* Migration ignores all cpuset and mempolicy settings */ | 
 | 	migrate_pages(migrate_folios, alloc_migration_target, NULL, | 
 | 		      (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON, | 
 | 		      &nr_succeeded); | 
 |  | 
 | 	return nr_succeeded; | 
 | } | 
 |  | 
 | static unsigned int damon_migrate_folio_list(struct list_head *folio_list, | 
 | 						struct pglist_data *pgdat, | 
 | 						int target_nid) | 
 | { | 
 | 	unsigned int nr_migrated = 0; | 
 | 	struct folio *folio; | 
 | 	LIST_HEAD(ret_folios); | 
 | 	LIST_HEAD(migrate_folios); | 
 |  | 
 | 	while (!list_empty(folio_list)) { | 
 | 		struct folio *folio; | 
 |  | 
 | 		cond_resched(); | 
 |  | 
 | 		folio = lru_to_folio(folio_list); | 
 | 		list_del(&folio->lru); | 
 |  | 
 | 		if (!folio_trylock(folio)) | 
 | 			goto keep; | 
 |  | 
 | 		/* Relocate its contents to another node. */ | 
 | 		list_add(&folio->lru, &migrate_folios); | 
 | 		folio_unlock(folio); | 
 | 		continue; | 
 | keep: | 
 | 		list_add(&folio->lru, &ret_folios); | 
 | 	} | 
 | 	/* 'folio_list' is always empty here */ | 
 |  | 
 | 	/* Migrate folios selected for migration */ | 
 | 	nr_migrated += __damon_migrate_folio_list( | 
 | 			&migrate_folios, pgdat, target_nid); | 
 | 	/* | 
 | 	 * Folios that could not be migrated are still in @migrate_folios.  Add | 
 | 	 * those back on @folio_list | 
 | 	 */ | 
 | 	if (!list_empty(&migrate_folios)) | 
 | 		list_splice_init(&migrate_folios, folio_list); | 
 |  | 
 | 	try_to_unmap_flush(); | 
 |  | 
 | 	list_splice(&ret_folios, folio_list); | 
 |  | 
 | 	while (!list_empty(folio_list)) { | 
 | 		folio = lru_to_folio(folio_list); | 
 | 		list_del(&folio->lru); | 
 | 		folio_putback_lru(folio); | 
 | 	} | 
 |  | 
 | 	return nr_migrated; | 
 | } | 
 |  | 
 | unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid) | 
 | { | 
 | 	int nid; | 
 | 	unsigned long nr_migrated = 0; | 
 | 	LIST_HEAD(node_folio_list); | 
 | 	unsigned int noreclaim_flag; | 
 |  | 
 | 	if (list_empty(folio_list)) | 
 | 		return nr_migrated; | 
 |  | 
 | 	if (target_nid < 0 || target_nid >= MAX_NUMNODES || | 
 | 			!node_state(target_nid, N_MEMORY)) | 
 | 		return nr_migrated; | 
 |  | 
 | 	noreclaim_flag = memalloc_noreclaim_save(); | 
 |  | 
 | 	nid = folio_nid(lru_to_folio(folio_list)); | 
 | 	do { | 
 | 		struct folio *folio = lru_to_folio(folio_list); | 
 |  | 
 | 		if (nid == folio_nid(folio)) { | 
 | 			list_move(&folio->lru, &node_folio_list); | 
 | 			continue; | 
 | 		} | 
 |  | 
 | 		nr_migrated += damon_migrate_folio_list(&node_folio_list, | 
 | 							   NODE_DATA(nid), | 
 | 							   target_nid); | 
 | 		nid = folio_nid(lru_to_folio(folio_list)); | 
 | 	} while (!list_empty(folio_list)); | 
 |  | 
 | 	nr_migrated += damon_migrate_folio_list(&node_folio_list, | 
 | 						   NODE_DATA(nid), | 
 | 						   target_nid); | 
 |  | 
 | 	memalloc_noreclaim_restore(noreclaim_flag); | 
 |  | 
 | 	return nr_migrated; | 
 | } |