| /* SPDX-License-Identifier: GPL-2.0 */ |
| #ifndef _MM_SWAP_TABLE_H |
| #define _MM_SWAP_TABLE_H |
| |
| #include <linux/rcupdate.h> |
| #include <linux/atomic.h> |
| #include "swap.h" |
| |
| /* A typical flat array in each cluster as swap table */ |
| struct swap_table { |
| atomic_long_t entries[SWAPFILE_CLUSTER]; |
| }; |
| |
| #define SWP_TABLE_USE_PAGE (sizeof(struct swap_table) == PAGE_SIZE) |
| |
| /* |
| * A swap table entry represents the status of a swap slot on a swap |
| * (physical or virtual) device. The swap table in each cluster is a |
| * 1:1 map of the swap slots in this cluster. |
| * |
| * Swap table entry type and bits layouts: |
| * |
| * NULL: |---------------- 0 ---------------| - Free slot |
| * Shadow: | SWAP_COUNT |---- SHADOW_VAL ---|1| - Swapped out slot |
| * PFN: | SWAP_COUNT |------ PFN -------|10| - Cached slot |
| * Pointer: |----------- Pointer ----------|100| - (Unused) |
| * Bad: |------------- 1 -------------|1000| - Bad slot |
| * |
| * SWAP_COUNT is `SWP_TB_COUNT_BITS` long, each entry is an atomic long. |
| * |
| * Usages: |
| * |
| * - NULL: Swap slot is unused, could be allocated. |
| * |
| * - Shadow: Swap slot is used and not cached (usually swapped out). It reuses |
| * the XA_VALUE format to be compatible with working set shadows. SHADOW_VAL |
| * part might be all 0 if the working shadow info is absent. In such a case, |
| * we still want to keep the shadow format as a placeholder. |
| * |
| * Memcg ID is embedded in SHADOW_VAL. |
| * |
| * - PFN: Swap slot is in use, and cached. Memcg info is recorded on the page |
| * struct. |
| * |
| * - Pointer: Unused yet. `0b100` is reserved for potential pointer usage |
| * because only the lower three bits can be used as a marker for 8 bytes |
| * aligned pointers. |
| * |
| * - Bad: Swap slot is reserved, protects swap header or holes on swap devices. |
| */ |
| |
| #if defined(MAX_POSSIBLE_PHYSMEM_BITS) |
| #define SWAP_CACHE_PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT) |
| #elif defined(MAX_PHYSMEM_BITS) |
| #define SWAP_CACHE_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) |
| #else |
| #define SWAP_CACHE_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT) |
| #endif |
| |
| /* NULL Entry, all 0 */ |
| #define SWP_TB_NULL 0UL |
| |
| /* Swapped out: shadow */ |
| #define SWP_TB_SHADOW_MARK 0b1UL |
| |
| /* Cached: PFN */ |
| #define SWP_TB_PFN_BITS (SWAP_CACHE_PFN_BITS + SWP_TB_PFN_MARK_BITS) |
| #define SWP_TB_PFN_MARK 0b10UL |
| #define SWP_TB_PFN_MARK_BITS 2 |
| #define SWP_TB_PFN_MARK_MASK (BIT(SWP_TB_PFN_MARK_BITS) - 1) |
| |
| /* SWAP_COUNT part for PFN or shadow, the width can be shrunk or extended */ |
| #define SWP_TB_COUNT_BITS min(4, BITS_PER_LONG - SWP_TB_PFN_BITS) |
| #define SWP_TB_COUNT_MASK (~((~0UL) >> SWP_TB_COUNT_BITS)) |
| #define SWP_TB_COUNT_SHIFT (BITS_PER_LONG - SWP_TB_COUNT_BITS) |
| #define SWP_TB_COUNT_MAX ((1 << SWP_TB_COUNT_BITS) - 1) |
| |
| /* Bad slot: ends with 0b1000 and rests of bits are all 1 */ |
| #define SWP_TB_BAD ((~0UL) << 3) |
| |
| /* Macro for shadow offset calculation */ |
| #define SWAP_COUNT_SHIFT SWP_TB_COUNT_BITS |
| |
| /* |
| * Helpers for casting one type of info into a swap table entry. |
| */ |
| static inline unsigned long null_to_swp_tb(void) |
| { |
| BUILD_BUG_ON(sizeof(unsigned long) != sizeof(atomic_long_t)); |
| return 0; |
| } |
| |
| static inline unsigned long __count_to_swp_tb(unsigned char count) |
| { |
| /* |
| * At least three values are needed to distinguish free (0), |
| * used (count > 0 && count < SWP_TB_COUNT_MAX), and |
| * overflow (count == SWP_TB_COUNT_MAX). |
| */ |
| BUILD_BUG_ON(SWP_TB_COUNT_MAX < 2 || SWP_TB_COUNT_BITS < 2); |
| VM_WARN_ON(count > SWP_TB_COUNT_MAX); |
| return ((unsigned long)count) << SWP_TB_COUNT_SHIFT; |
| } |
| |
| static inline unsigned long pfn_to_swp_tb(unsigned long pfn, unsigned int count) |
| { |
| unsigned long swp_tb; |
| |
| BUILD_BUG_ON(sizeof(unsigned long) != sizeof(void *)); |
| BUILD_BUG_ON(SWAP_CACHE_PFN_BITS > |
| (BITS_PER_LONG - SWP_TB_PFN_MARK_BITS - SWP_TB_COUNT_BITS)); |
| |
| swp_tb = (pfn << SWP_TB_PFN_MARK_BITS) | SWP_TB_PFN_MARK; |
| VM_WARN_ON_ONCE(swp_tb & SWP_TB_COUNT_MASK); |
| |
| return swp_tb | __count_to_swp_tb(count); |
| } |
| |
| static inline unsigned long folio_to_swp_tb(struct folio *folio, unsigned int count) |
| { |
| return pfn_to_swp_tb(folio_pfn(folio), count); |
| } |
| |
| static inline unsigned long shadow_to_swp_tb(void *shadow, unsigned int count) |
| { |
| BUILD_BUG_ON((BITS_PER_XA_VALUE + 1) != |
| BITS_PER_BYTE * sizeof(unsigned long)); |
| BUILD_BUG_ON((unsigned long)xa_mk_value(0) != SWP_TB_SHADOW_MARK); |
| |
| VM_WARN_ON_ONCE(shadow && !xa_is_value(shadow)); |
| VM_WARN_ON_ONCE(shadow && ((unsigned long)shadow & SWP_TB_COUNT_MASK)); |
| |
| return (unsigned long)shadow | __count_to_swp_tb(count) | SWP_TB_SHADOW_MARK; |
| } |
| |
| /* |
| * Helpers for swap table entry type checking. |
| */ |
| static inline bool swp_tb_is_null(unsigned long swp_tb) |
| { |
| return !swp_tb; |
| } |
| |
| static inline bool swp_tb_is_folio(unsigned long swp_tb) |
| { |
| return ((swp_tb & SWP_TB_PFN_MARK_MASK) == SWP_TB_PFN_MARK); |
| } |
| |
| static inline bool swp_tb_is_shadow(unsigned long swp_tb) |
| { |
| return xa_is_value((void *)swp_tb); |
| } |
| |
| static inline bool swp_tb_is_bad(unsigned long swp_tb) |
| { |
| return swp_tb == SWP_TB_BAD; |
| } |
| |
| static inline bool swp_tb_is_countable(unsigned long swp_tb) |
| { |
| return (swp_tb_is_shadow(swp_tb) || swp_tb_is_folio(swp_tb) || |
| swp_tb_is_null(swp_tb)); |
| } |
| |
| /* |
| * Helpers for retrieving info from swap table. |
| */ |
| static inline struct folio *swp_tb_to_folio(unsigned long swp_tb) |
| { |
| VM_WARN_ON(!swp_tb_is_folio(swp_tb)); |
| return pfn_folio((swp_tb & ~SWP_TB_COUNT_MASK) >> SWP_TB_PFN_MARK_BITS); |
| } |
| |
| static inline void *swp_tb_to_shadow(unsigned long swp_tb) |
| { |
| VM_WARN_ON(!swp_tb_is_shadow(swp_tb)); |
| /* No shift needed, xa_value is stored as it is in the lower bits. */ |
| return (void *)(swp_tb & ~SWP_TB_COUNT_MASK); |
| } |
| |
| static inline unsigned char __swp_tb_get_count(unsigned long swp_tb) |
| { |
| VM_WARN_ON(!swp_tb_is_countable(swp_tb)); |
| return ((swp_tb & SWP_TB_COUNT_MASK) >> SWP_TB_COUNT_SHIFT); |
| } |
| |
| static inline int swp_tb_get_count(unsigned long swp_tb) |
| { |
| if (swp_tb_is_countable(swp_tb)) |
| return __swp_tb_get_count(swp_tb); |
| return -EINVAL; |
| } |
| |
| static inline unsigned long __swp_tb_mk_count(unsigned long swp_tb, int count) |
| { |
| return ((swp_tb & ~SWP_TB_COUNT_MASK) | __count_to_swp_tb(count)); |
| } |
| |
| /* |
| * Helpers for accessing or modifying the swap table of a cluster, |
| * the swap cluster must be locked. |
| */ |
| static inline void __swap_table_set(struct swap_cluster_info *ci, |
| unsigned int off, unsigned long swp_tb) |
| { |
| atomic_long_t *table = rcu_dereference_protected(ci->table, true); |
| |
| lockdep_assert_held(&ci->lock); |
| VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER); |
| atomic_long_set(&table[off], swp_tb); |
| } |
| |
| static inline unsigned long __swap_table_xchg(struct swap_cluster_info *ci, |
| unsigned int off, unsigned long swp_tb) |
| { |
| atomic_long_t *table = rcu_dereference_protected(ci->table, true); |
| |
| lockdep_assert_held(&ci->lock); |
| VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER); |
| /* Ordering is guaranteed by cluster lock, relax */ |
| return atomic_long_xchg_relaxed(&table[off], swp_tb); |
| } |
| |
| static inline unsigned long __swap_table_get(struct swap_cluster_info *ci, |
| unsigned int off) |
| { |
| atomic_long_t *table; |
| |
| VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER); |
| table = rcu_dereference_check(ci->table, lockdep_is_held(&ci->lock)); |
| |
| return atomic_long_read(&table[off]); |
| } |
| |
| static inline unsigned long swap_table_get(struct swap_cluster_info *ci, |
| unsigned int off) |
| { |
| atomic_long_t *table; |
| unsigned long swp_tb; |
| |
| VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER); |
| |
| rcu_read_lock(); |
| table = rcu_dereference(ci->table); |
| swp_tb = table ? atomic_long_read(&table[off]) : null_to_swp_tb(); |
| rcu_read_unlock(); |
| |
| return swp_tb; |
| } |
| #endif |